gegl r2184 - in trunk: . gegl gegl/operation operations/common
- From: ok svn gnome org
- To: svn-commits-list gnome org
- Subject: gegl r2184 - in trunk: . gegl gegl/operation operations/common
- Date: Fri, 18 Apr 2008 00:36:11 +0100 (BST)
Author: ok
Date: Thu Apr 17 23:36:11 2008
New Revision: 2184
URL: http://svn.gnome.org/viewvc/gegl?rev=2184&view=rev
Log:
* configure.ac: added mmx and cpu detection.
* gegl/Makefile.am:
* gegl/gegl-cpuaccel.[ch]: added from GIMP.
* gegl/gegl-plugin.h: include config.h when compiling in-tree (to
allow checking for USE_SSE in ops.
* gegl/gegl-utils.[ch]: (gegl_malloc), (gegl_free): added malloc and
free that aligns on 16byte boundaries.
* gegl/operation/gegl-operation-point-composer.c:,
* gegl/operation/gegl-operation-point-filter.c:
(process_inner): align allocated buffers on 16byte boundaries to
make it possible to process RGBA buffers using SSE.
* gegl/operation/gegl-operation-processors.c:
(gegl_operation_class_add_processor): added category SSE that is
autoenabled if sse support is compiled in and detected.
* operations/common/invert.c: (process_sse),
(gegl_chant_class_init): accelerate the invert operation with vector
maths using SSE.
Added:
trunk/gegl/gegl-cpuaccel.c
trunk/gegl/gegl-cpuaccel.h
Modified:
trunk/ChangeLog
trunk/configure.ac
trunk/gegl/Makefile.am
trunk/gegl/gegl-plugin.h
trunk/gegl/gegl-utils.c
trunk/gegl/gegl-utils.h
trunk/gegl/operation/gegl-operation-point-composer.c
trunk/gegl/operation/gegl-operation-point-filter.c
trunk/gegl/operation/gegl-operation-processors.c
trunk/operations/common/invert.c
Modified: trunk/configure.ac
==============================================================================
--- trunk/configure.ac (original)
+++ trunk/configure.ac Thu Apr 17 23:36:11 2008
@@ -235,6 +235,29 @@
esac
AC_MSG_RESULT([$target_or_host])
+case "$target_or_host" in
+ i*86-*-*)
+ have_x86=yes
+ AC_DEFINE(ARCH_X86, 1, [Define to 1 if you are compiling for ix86.])
+ ;;
+ x86_64-*-*)
+ have_x86=yes
+ AC_DEFINE(ARCH_X86, 1, [Define to 1 if you are compiling for ix86.])
+ AC_DEFINE(ARCH_X86_64, 1, [Define to 1 if you are compiling for amd64.])
+ ;;
+ ppc-*-* | powerpc-*)
+ have_ppc=yes
+ AC_DEFINE(ARCH_PPC, 1, [Define to 1 if you are compiling for PowerPC.])
+ ;;
+ ppc64-*-* | powerpc64-*)
+ have_ppc=yes
+ AC_DEFINE(ARCH_PPC, 1, [Define to 1 if you are compiling for PowerPC.])
+ AC_DEFINE(ARCH_PPC64, 1, [Define to 1 if you are compiling for PowerPC64.])
+ ;;
+ *)
+ ;;
+esac
+
####################################################
@@ -304,6 +327,9 @@
AM_CONDITIONAL(OS_WIN32, test "$os_win32" = "yes")
AM_CONDITIONAL(OS_UNIX, test "$os_win32" != "yes")
+dnl Checks for programs.
+#AC_PROG_YACC
+#AM_PROG_LEX
#############################
# Threads and multi processor
@@ -319,9 +345,85 @@
fi
-dnl Checks for programs.
-#AC_PROG_YACC
-#AM_PROG_LEX
+
+
+########################
+# Check for MMX assembly
+########################
+
+dnl GEGL_DETECT_CFLAGS(RESULT, FLAGSET)
+dnl Detect if the compiler supports a set of flags
+
+AC_DEFUN([GEGL_DETECT_CFLAGS],
+[
+ $1=
+ for flag in $2; do
+ if test -z "[$]$1"; then
+ $1_save_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS $flag"
+ AC_MSG_CHECKING([whether [$]CC understands [$]flag])
+ AC_TRY_COMPILE([], [], [$1_works=yes], [$1_works=no])
+ AC_MSG_RESULT([$]$1_works)
+ CFLAGS="[$]$1_save_CFLAGS"
+ if test "x[$]$1_works" = "xyes"; then
+ $1="$flag"
+ fi
+ fi
+ done
+])
+
+AC_ARG_ENABLE(mmx,
+ [ --enable-mmx enable MMX support (default=auto)],,
+ enable_mmx=$have_x86)
+
+AC_ARG_ENABLE(sse,
+ [ --enable-sse enable SSE support (default=auto)],,
+ enable_sse=$enable_mmx)
+
+if test "x$enable_mmx" = xyes; then
+ GEGL_DETECT_CFLAGS(MMX_EXTRA_CFLAGS, '-mmmx')
+ SSE_EXTRA_CFLAGS=
+
+ AC_MSG_CHECKING(whether we can compile MMX code)
+
+ mmx_save_CFLAGS="$CFLAGS"
+ CFLAGS="$mmx_save_CFLAGS $MMX_EXTRA_CFLAGS"
+
+ AC_COMPILE_IFELSE([asm ("movq 0, %mm0");],
+
+ AC_DEFINE(USE_MMX, 1, [Define to 1 if MMX assembly is available.])
+ AC_MSG_RESULT(yes)
+
+ if test "x$enable_sse" = xyes; then
+ GEGL_DETECT_CFLAGS(sse_flag, '-msse')
+ SSE_EXTRA_CFLAGS="$MMX_EXTRA_CFLAGS $sse_flag"
+
+ AC_MSG_CHECKING(whether we can compile SSE code)
+
+ CFLAGS="$CFLAGS $sse_flag"
+
+ AC_COMPILE_IFELSE([asm ("movntps %xmm0, 0");],
+ AC_DEFINE(USE_SSE, 1, [Define to 1 if SSE assembly is available.])
+ AC_MSG_RESULT(yes)
+ ,
+ enable_sse=no
+ AC_MSG_RESULT(no)
+ AC_MSG_WARN([The assembler does not support the SSE command set.])
+ )
+
+ fi
+ ,
+ enable_mmx=no
+ AC_MSG_RESULT(no)
+ AC_MSG_WARN([The assembler does not support the MMX command set.])
+ )
+
+ CFLAGS="$mmx_save_CFLAGS"
+
+ AC_SUBST(MMX_EXTRA_CFLAGS)
+ AC_SUBST(SSE_EXTRA_CFLAGS)
+fi
+
###############################
@@ -659,6 +761,8 @@
AC_SUBST(AVFORMAT_LIBS)
+
+
################
# Check for lcms
################
@@ -679,6 +783,8 @@
#AC_SUBST(LCMS_LIBS)
+
+
#######################
# Check for other items
#######################
Modified: trunk/gegl/Makefile.am
==============================================================================
--- trunk/gegl/Makefile.am (original)
+++ trunk/gegl/Makefile.am Thu Apr 17 23:36:11 2008
@@ -19,6 +19,8 @@
gegl-dot.c \
gegl-utils.c \
gegl-xml.c \
+ gegl-cpuaccel.c \
+ gegl-cpuaccel.h \
\
gegl-init.h \
gegl-types.h \
Added: trunk/gegl/gegl-cpuaccel.c
==============================================================================
--- (empty file)
+++ trunk/gegl/gegl-cpuaccel.c Thu Apr 17 23:36:11 2008
@@ -0,0 +1,498 @@
+/* GEGL - The GEGL Library
+ * Copyright (C) 1995-1997 Peter Mattis and Spencer Kimball
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * x86 bits Copyright (C) Manish Singh <yosh gimp org>
+ */
+
+/*
+ * PPC CPU acceleration detection was taken from DirectFB but seems to be
+ * originating from mpeg2dec with the following copyright:
+ *
+ * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma ess engr uvic ca>
+ */
+
+#include "config.h"
+
+#include <string.h>
+#include <signal.h>
+#include <setjmp.h>
+
+#include <glib.h>
+
+#include "gegl-cpuaccel.h"
+
+
+static GeglCpuAccelFlags cpu_accel (void) G_GNUC_CONST;
+
+
+static gboolean use_cpu_accel = TRUE;
+
+
+/**
+ * gegl_cpu_accel_get_support:
+ *
+ * Query for CPU acceleration support.
+ *
+ * Return value: #GeglCpuAccelFlags as supported by the CPU.
+ *
+ * Since: GEGL 2.4
+ */
+GeglCpuAccelFlags
+gegl_cpu_accel_get_support (void)
+{
+ return use_cpu_accel ? cpu_accel () : GEGL_CPU_ACCEL_NONE;
+}
+
+/**
+ * gegl_cpu_accel_set_use:
+ * @use: whether to use CPU acceleration features or not
+ *
+ * This function is for internal use only.
+ *
+ * Since: GEGL 2.4
+ */
+void
+gegl_cpu_accel_set_use (gboolean use)
+{
+ use_cpu_accel = use ? TRUE : FALSE;
+}
+
+
+#if defined(ARCH_X86) && defined(USE_MMX) && defined(__GNUC__)
+
+#define HAVE_ACCEL 1
+
+
+typedef enum
+{
+ ARCH_X86_VENDOR_NONE,
+ ARCH_X86_VENDOR_INTEL,
+ ARCH_X86_VENDOR_AMD,
+ ARCH_X86_VENDOR_CENTAUR,
+ ARCH_X86_VENDOR_CYRIX,
+ ARCH_X86_VENDOR_NSC,
+ ARCH_X86_VENDOR_TRANSMETA,
+ ARCH_X86_VENDOR_NEXGEN,
+ ARCH_X86_VENDOR_RISE,
+ ARCH_X86_VENDOR_UMC,
+ ARCH_X86_VENDOR_SIS,
+ ARCH_X86_VENDOR_UNKNOWN = 0xff
+} X86Vendor;
+
+enum
+{
+ ARCH_X86_INTEL_FEATURE_MMX = 1 << 23,
+ ARCH_X86_INTEL_FEATURE_XMM = 1 << 25,
+ ARCH_X86_INTEL_FEATURE_XMM2 = 1 << 26,
+
+ ARCH_X86_AMD_FEATURE_MMXEXT = 1 << 22,
+ ARCH_X86_AMD_FEATURE_3DNOW = 1 << 31,
+
+ ARCH_X86_CENTAUR_FEATURE_MMX = 1 << 23,
+ ARCH_X86_CENTAUR_FEATURE_MMXEXT = 1 << 24,
+ ARCH_X86_CENTAUR_FEATURE_3DNOW = 1 << 31,
+
+ ARCH_X86_CYRIX_FEATURE_MMX = 1 << 23,
+ ARCH_X86_CYRIX_FEATURE_MMXEXT = 1 << 24
+};
+
+enum
+{
+ ARCH_X86_INTEL_FEATURE_PNI = 1 << 0
+};
+
+#if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__))
+#define cpuid(op,eax,ebx,ecx,edx) \
+ __asm__ ("movl %%ebx, %%esi\n\t" \
+ "cpuid\n\t" \
+ "xchgl %%ebx,%%esi" \
+ : "=a" (eax), \
+ "=S" (ebx), \
+ "=c" (ecx), \
+ "=d" (edx) \
+ : "0" (op))
+#else
+#define cpuid(op,eax,ebx,ecx,edx) \
+ __asm__ ("cpuid" \
+ : "=a" (eax), \
+ "=b" (ebx), \
+ "=c" (ecx), \
+ "=d" (edx) \
+ : "0" (op))
+#endif
+
+
+static X86Vendor
+arch_get_vendor (void)
+{
+ guint32 eax, ebx, ecx, edx;
+ gchar id[16];
+
+#ifndef ARCH_X86_64
+ /* Only need to check this on ia32 */
+ __asm__ ("pushfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "movl %0,%1\n\t"
+ "xorl $0x200000,%0\n\t"
+ "pushl %0\n\t"
+ "popfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "popfl"
+ : "=a" (eax),
+ "=c" (ecx)
+ :
+ : "cc");
+
+ if (eax == ecx)
+ return ARCH_X86_VENDOR_NONE;
+#endif
+
+ cpuid (0, eax, ebx, ecx, edx);
+
+ if (eax == 0)
+ return ARCH_X86_VENDOR_NONE;
+
+ *(int *)&id[0] = ebx;
+ *(int *)&id[4] = edx;
+ *(int *)&id[8] = ecx;
+
+ id[12] = '\0';
+
+#ifdef ARCH_X86_64
+ if (strcmp (id, "AuthenticAMD") == 0)
+ return ARCH_X86_VENDOR_AMD;
+ else if (strcmp (id, "GenuineIntel") == 0)
+ return ARCH_X86_VENDOR_INTEL;
+#else
+ if (strcmp (id, "GenuineIntel") == 0)
+ return ARCH_X86_VENDOR_INTEL;
+ else if (strcmp (id, "AuthenticAMD") == 0)
+ return ARCH_X86_VENDOR_AMD;
+ else if (strcmp (id, "CentaurHauls") == 0)
+ return ARCH_X86_VENDOR_CENTAUR;
+ else if (strcmp (id, "CyrixInstead") == 0)
+ return ARCH_X86_VENDOR_CYRIX;
+ else if (strcmp (id, "Geode by NSC") == 0)
+ return ARCH_X86_VENDOR_NSC;
+ else if (strcmp (id, "GenuineTMx86") == 0 ||
+ strcmp (id, "TransmetaCPU") == 0)
+ return ARCH_X86_VENDOR_TRANSMETA;
+ else if (strcmp (id, "NexGenDriven") == 0)
+ return ARCH_X86_VENDOR_NEXGEN;
+ else if (strcmp (id, "RiseRiseRise") == 0)
+ return ARCH_X86_VENDOR_RISE;
+ else if (strcmp (id, "UMC UMC UMC ") == 0)
+ return ARCH_X86_VENDOR_UMC;
+ else if (strcmp (id, "SiS SiS SiS ") == 0)
+ return ARCH_X86_VENDOR_SIS;
+#endif
+
+ return ARCH_X86_VENDOR_UNKNOWN;
+}
+
+static guint32
+arch_accel_intel (void)
+{
+ guint32 caps = 0;
+
+#ifdef USE_MMX
+ {
+ guint32 eax, ebx, ecx, edx;
+
+ cpuid (1, eax, ebx, ecx, edx);
+
+ if ((edx & ARCH_X86_INTEL_FEATURE_MMX) == 0)
+ return 0;
+
+ caps = GEGL_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+ if (edx & ARCH_X86_INTEL_FEATURE_XMM)
+ caps |= GEGL_CPU_ACCEL_X86_SSE | GEGL_CPU_ACCEL_X86_MMXEXT;
+
+ if (edx & ARCH_X86_INTEL_FEATURE_XMM2)
+ caps |= GEGL_CPU_ACCEL_X86_SSE2;
+
+ if (ecx & ARCH_X86_INTEL_FEATURE_PNI)
+ caps |= GEGL_CPU_ACCEL_X86_SSE3;
+#endif /* USE_SSE */
+ }
+#endif /* USE_MMX */
+
+ return caps;
+}
+
+static guint32
+arch_accel_amd (void)
+{
+ guint32 caps;
+
+ caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+ {
+ guint32 eax, ebx, ecx, edx;
+
+ cpuid (0x80000000, eax, ebx, ecx, edx);
+
+ if (eax < 0x80000001)
+ return caps;
+
+#ifdef USE_SSE
+ cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (edx & ARCH_X86_AMD_FEATURE_3DNOW)
+ caps |= GEGL_CPU_ACCEL_X86_3DNOW;
+
+ if (edx & ARCH_X86_AMD_FEATURE_MMXEXT)
+ caps |= GEGL_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+ }
+#endif /* USE_MMX */
+
+ return caps;
+}
+
+static guint32
+arch_accel_centaur (void)
+{
+ guint32 caps;
+
+ caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+ {
+ guint32 eax, ebx, ecx, edx;
+
+ cpuid (0x80000000, eax, ebx, ecx, edx);
+
+ if (eax < 0x80000001)
+ return caps;
+
+ cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (edx & ARCH_X86_CENTAUR_FEATURE_MMX)
+ caps |= GEGL_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+ if (edx & ARCH_X86_CENTAUR_FEATURE_3DNOW)
+ caps |= GEGL_CPU_ACCEL_X86_3DNOW;
+
+ if (edx & ARCH_X86_CENTAUR_FEATURE_MMXEXT)
+ caps |= GEGL_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+ }
+#endif /* USE_MMX */
+
+ return caps;
+}
+
+static guint32
+arch_accel_cyrix (void)
+{
+ guint32 caps;
+
+ caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+ {
+ guint32 eax, ebx, ecx, edx;
+
+ cpuid (0, eax, ebx, ecx, edx);
+
+ if (eax != 2)
+ return caps;
+
+ cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (edx & ARCH_X86_CYRIX_FEATURE_MMX)
+ caps |= GEGL_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+ if (edx & ARCH_X86_CYRIX_FEATURE_MMXEXT)
+ caps |= GEGL_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+ }
+#endif /* USE_MMX */
+
+ return caps;
+}
+
+#ifdef USE_SSE
+static jmp_buf sigill_return;
+
+static void
+sigill_handler (gint n)
+{
+ longjmp (sigill_return, 1);
+}
+
+static gboolean
+arch_accel_sse_os_support (void)
+{
+ if (setjmp (sigill_return))
+ {
+ return FALSE;
+ }
+ else
+ {
+ signal (SIGILL, sigill_handler);
+ __asm__ __volatile__ ("xorps %xmm0, %xmm0");
+ signal (SIGILL, SIG_DFL);
+ }
+
+ return TRUE;
+}
+#endif /* USE_SSE */
+
+static guint32
+arch_accel (void)
+{
+ guint32 caps;
+ X86Vendor vendor;
+
+ vendor = arch_get_vendor ();
+
+ switch (vendor)
+ {
+ case ARCH_X86_VENDOR_NONE:
+ caps = 0;
+ break;
+
+ case ARCH_X86_VENDOR_AMD:
+ caps = arch_accel_amd ();
+ break;
+
+ case ARCH_X86_VENDOR_CENTAUR:
+ caps = arch_accel_centaur ();
+ break;
+
+ case ARCH_X86_VENDOR_CYRIX:
+ case ARCH_X86_VENDOR_NSC:
+ caps = arch_accel_cyrix ();
+ break;
+
+ /* check for what Intel speced, even if UNKNOWN */
+ default:
+ caps = arch_accel_intel ();
+ break;
+ }
+
+#ifdef USE_SSE
+ if ((caps & GEGL_CPU_ACCEL_X86_SSE) && !arch_accel_sse_os_support ())
+ caps &= ~(GEGL_CPU_ACCEL_X86_SSE | GEGL_CPU_ACCEL_X86_SSE2);
+#endif
+
+ return caps;
+}
+
+#endif /* ARCH_X86 && USE_MMX && __GNUC__ */
+
+
+#if defined(ARCH_PPC) && defined (USE_ALTIVEC)
+
+#if defined(HAVE_ALTIVEC_SYSCTL)
+
+#include <sys/sysctl.h>
+
+#define HAVE_ACCEL 1
+
+static guint32
+arch_accel (void)
+{
+ gint sels[2] = { CTL_HW, HW_VECTORUNIT };
+ gboolean has_vu = FALSE;
+ gsize length = sizeof(has_vu);
+ gint err;
+
+ err = sysctl (sels, 2, &has_vu, &length, NULL, 0);
+
+ if (err == 0 && has_vu)
+ return GEGL_CPU_ACCEL_PPC_ALTIVEC;
+
+ return 0;
+}
+
+#elif defined(__GNUC__)
+
+#define HAVE_ACCEL 1
+
+static sigjmp_buf jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static void
+sigill_handler (gint sig)
+{
+ if (!canjump)
+ {
+ signal (sig, SIG_DFL);
+ raise (sig);
+ }
+
+ canjump = 0;
+ siglongjmp (jmpbuf, 1);
+}
+
+static guint32
+arch_accel (void)
+{
+ signal (SIGILL, sigill_handler);
+
+ if (sigsetjmp (jmpbuf, 1))
+ {
+ signal (SIGILL, SIG_DFL);
+ return 0;
+ }
+
+ canjump = 1;
+
+ asm volatile ("mtspr 256, %0\n\t"
+ "vand %%v0, %%v0, %%v0"
+ :
+ : "r" (-1));
+
+ signal (SIGILL, SIG_DFL);
+
+ return GEGL_CPU_ACCEL_PPC_ALTIVEC;
+}
+#endif /* __GNUC__ */
+
+#endif /* ARCH_PPC && USE_ALTIVEC */
+
+
+static GeglCpuAccelFlags
+cpu_accel (void)
+{
+#ifdef HAVE_ACCEL
+ static guint32 accel = ~0U;
+
+ if (accel != ~0U)
+ return accel;
+
+ accel = arch_accel ();
+
+ return (GeglCpuAccelFlags) accel;
+
+#else /* !HAVE_ACCEL */
+ return GEGL_CPU_ACCEL_NONE;
+#endif
+}
Added: trunk/gegl/gegl-cpuaccel.h
==============================================================================
--- (empty file)
+++ trunk/gegl/gegl-cpuaccel.h Thu Apr 17 23:36:11 2008
@@ -0,0 +1,52 @@
+/* LIBGEGL - The GEGL Library
+ * Copyright (C) 1995-1997 Peter Mattis and Spencer Kimball
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __GEGL_CPU_ACCEL_H__
+#define __GEGL_CPU_ACCEL_H__
+
+G_BEGIN_DECLS
+
+
+typedef enum
+{
+ GEGL_CPU_ACCEL_NONE = 0x0,
+
+ /* x86 accelerations */
+ GEGL_CPU_ACCEL_X86_MMX = 0x80000000,
+ GEGL_CPU_ACCEL_X86_3DNOW = 0x40000000,
+ GEGL_CPU_ACCEL_X86_MMXEXT = 0x20000000,
+ GEGL_CPU_ACCEL_X86_SSE = 0x10000000,
+ GEGL_CPU_ACCEL_X86_SSE2 = 0x08000000,
+ GEGL_CPU_ACCEL_X86_SSE3 = 0x02000000,
+
+ /* powerpc accelerations */
+ GEGL_CPU_ACCEL_PPC_ALTIVEC = 0x04000000
+} GeglCpuAccelFlags;
+
+
+GeglCpuAccelFlags gegl_cpu_accel_get_support (void);
+
+
+/* for internal use only */
+void gegl_cpu_accel_set_use (gboolean use);
+
+
+G_END_DECLS
+
+#endif /* __GEGL_CPU_ACCEL_H__ */
Modified: trunk/gegl/gegl-plugin.h
==============================================================================
--- trunk/gegl/gegl-plugin.h (original)
+++ trunk/gegl/gegl-plugin.h Thu Apr 17 23:36:11 2008
@@ -20,6 +20,10 @@
#ifndef __GEGL_PLUGIN_H__
#define __GEGL_PLUGIN_H__
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <string.h>
#include <glib-object.h>
#include <gegl.h>
@@ -53,7 +57,7 @@
* should be extended so a range of abi versions are accepted.
*/
-#define GEGL_MODULE_ABI_VERSION 0x0007
+#define GEGL_MODULE_ABI_VERSION 0x0008
struct _GeglModuleInfo
{
@@ -96,6 +100,17 @@
#include <operation/gegl-operation-sink.h>
#include <operation/gegl-operation-meta.h>
+#ifdef USE_SSE
+
+typedef float v4sf __attribute__ ((vector_size (4*sizeof(float))));
+typedef union
+{
+ v4sf v;
+ float a[4];
+} GeglV4;
+
+#endif
+
#else
/***** ***/
Modified: trunk/gegl/gegl-utils.c
==============================================================================
--- trunk/gegl/gegl-utils.c (original)
+++ trunk/gegl/gegl-utils.c Thu Apr 17 23:36:11 2008
@@ -291,4 +291,35 @@
return our_type;
}
+#define GEGL_ALIGN 16
+void *
+gegl_malloc (gsize size);
+
+void *
+gegl_malloc (gsize size)
+{
+ gint off;
+ gint i;
+ gint to_add;
+ gchar *mem = g_malloc (size + GEGL_ALIGN + 1);
+ void *ret;
+ *mem='G';
+ off = (((guint)mem) + 1) % GEGL_ALIGN;
+ to_add = GEGL_ALIGN-off;
+ ret = (void*)(mem + 1 + to_add);
+ for (i=1;i<1+to_add;i++)
+ mem[i]=' ';
+ return ret;
+}
+
+void
+gegl_free (void *buf);
+void
+gegl_free (void *buf)
+{
+ gchar *p = buf;
+ while (*p!='G')
+ p--;
+ g_free (p);
+}
Modified: trunk/gegl/gegl-utils.h
==============================================================================
--- trunk/gegl/gegl-utils.h (original)
+++ trunk/gegl/gegl-utils.h Thu Apr 17 23:36:11 2008
@@ -78,6 +78,10 @@
gint _gegl_float_epsilon_equal (float v1,
float v2);
+void *
+gegl_aligned_malloc (gsize size);
+void
+gegl_aligned_free (void *buf);
G_END_DECLS
Modified: trunk/gegl/operation/gegl-operation-point-composer.c
==============================================================================
--- trunk/gegl/operation/gegl-operation-point-composer.c (original)
+++ trunk/gegl/operation/gegl-operation-point-composer.c Thu Apr 17 23:36:11 2008
@@ -134,7 +134,7 @@
{
gfloat *in_buf = NULL, *out_buf = NULL, *aux_buf = NULL;
- in_buf = g_malloc (in_format->format.bytes_per_pixel *
+ in_buf = gegl_malloc (in_format->format.bytes_per_pixel *
output->extent.width * output->extent.height);
if (in_format == out_format)
{
@@ -142,7 +142,7 @@
}
else
{
- out_buf = g_malloc (out_format->format.bytes_per_pixel *
+ out_buf = gegl_malloc (out_format->format.bytes_per_pixel *
output->extent.width * output->extent.height);
}
@@ -150,7 +150,7 @@
if (aux)
{
- aux_buf = g_malloc (aux_format->format.bytes_per_pixel *
+ aux_buf = gegl_malloc (aux_format->format.bytes_per_pixel *
output->extent.width * output->extent.height);
gegl_buffer_get (aux, 1.0, result, aux_format, aux_buf, GEGL_AUTO_ROWSTRIDE);
}
@@ -165,11 +165,11 @@
gegl_buffer_set (output, NULL, out_format, out_buf, GEGL_AUTO_ROWSTRIDE);
- g_free (in_buf);
+ gegl_free (in_buf);
if (in_format != out_format)
- g_free (out_buf);
+ gegl_free (out_buf);
if (aux)
- g_free (aux_buf);
+ gegl_free (aux_buf);
}
return TRUE;
}
Modified: trunk/gegl/operation/gegl-operation-point-filter.c
==============================================================================
--- trunk/gegl/operation/gegl-operation-point-filter.c (original)
+++ trunk/gegl/operation/gegl-operation-point-filter.c Thu Apr 17 23:36:11 2008
@@ -25,6 +25,7 @@
#include "gegl-operation-point-filter.h"
#include "graph/gegl-pad.h"
#include "graph/gegl-node.h"
+#include "gegl-utils.h"
#include <string.h>
static gboolean process_inner (GeglOperation *operation,
@@ -95,7 +96,7 @@
if (in_format == out_format)
{
gfloat *buf;
- buf = g_malloc (in_format->format.bytes_per_pixel *
+ buf = gegl_malloc (in_format->format.bytes_per_pixel *
output->extent.width * output->extent.height);
gegl_buffer_get (input, 1.0, result, in_format, buf, GEGL_AUTO_ROWSTRIDE);
@@ -107,15 +108,15 @@
output->extent.width * output->extent.height);
gegl_buffer_set (output, result, out_format, buf, GEGL_AUTO_ROWSTRIDE);
- g_free (buf);
+ gegl_free (buf);
}
else
{
gfloat *in_buf;
gfloat *out_buf;
- in_buf = g_malloc (in_format->format.bytes_per_pixel *
+ in_buf = gegl_malloc (in_format->format.bytes_per_pixel *
input->extent.width * input->extent.height);
- out_buf = g_malloc (out_format->format.bytes_per_pixel *
+ out_buf = gegl_malloc (out_format->format.bytes_per_pixel *
output->extent.width * output->extent.height);
gegl_buffer_get (input, 1.0, result, in_format, in_buf, GEGL_AUTO_ROWSTRIDE);
@@ -127,8 +128,8 @@
output->extent.width * output->extent.height);
gegl_buffer_set (output, result, out_format, out_buf, GEGL_AUTO_ROWSTRIDE);
- g_free (in_buf);
- g_free (out_buf);
+ gegl_free (in_buf);
+ gegl_free (out_buf);
}
}
return TRUE;
Modified: trunk/gegl/operation/gegl-operation-processors.c
==============================================================================
--- trunk/gegl/operation/gegl-operation-processors.c (original)
+++ trunk/gegl/operation/gegl-operation-processors.c Thu Apr 17 23:36:11 2008
@@ -27,6 +27,7 @@
#include "gegl-types.h"
#include "gegl-operation.h"
#include "gegl-utils.h"
+#include "gegl-cpuaccel.h"
#include "graph/gegl-node.h"
#include "graph/gegl-connection.h"
#include "graph/gegl-pad.h"
@@ -107,12 +108,21 @@
g_type_name (G_TYPE_FROM_CLASS (cclass)));
}
+#ifdef USE_SSE
+ /* always look for sse ops */
+#else
if (g_getenv ("GEGL_QUALITY"))
+#endif
{
const gchar *quality = g_getenv ("GEGL_QUALITY");
GCallback fast = NULL;
GCallback good = NULL;
GCallback reference = NULL;
+#ifdef USE_SSE
+ GCallback sse = NULL;
+ if (quality == NULL)
+ quality = "sse";
+#endif
for (i=0;i<MAX_PROCESSOR;i++)
{
@@ -125,6 +135,10 @@
fast = cb;
else if (g_str_equal (string, "good"))
good = cb;
+#ifdef USE_SSE
+ else if (g_str_equal (string, "sse"))
+ sse = cb;
+#endif
else if (g_str_equal (string, "reference"))
reference = cb;
}
@@ -133,16 +147,37 @@
g_assert (reference);
if (g_str_equal (quality, "fast"))
{
+#ifdef USE_SSE
+ g_print ("Setting %s processor for %s\n", fast?"fast":sse?"sse":good?"good":"reference",
+ g_type_name (G_TYPE_FROM_CLASS (cclass)));
+ PROCESS_VFUNC = fast?fast:sse?sse:good?good:reference;
+#else
g_print ("Setting %s processor for %s\n", fast?"fast":good?"good":"reference",
- g_type_name (G_TYPE_FROM_CLASS (cclass)));
+ g_type_name (G_TYPE_FROM_CLASS (cclass)));
PROCESS_VFUNC = fast?fast:good?good:reference;
+#endif
}
else if (g_str_equal (quality, "good"))
{
+#ifdef USE_SSE
+ g_print ("Setting %s processor for %s\n", sse?"sse":good?"good":"reference",
+ g_type_name (G_TYPE_FROM_CLASS (cclass)));
+#else
g_print ("Setting %s processor for %s\n", good?"good":"reference",
g_type_name (G_TYPE_FROM_CLASS (cclass)));
PROCESS_VFUNC = good?good:reference;
+#endif
+ }
+ else
+ {
+ /* best */
+#ifdef USE_SSE
+ if (sse && gegl_cpu_accel_get_support () & GEGL_CPU_ACCEL_X86_SSE)
+ g_print ("Setting sse processor for %s\n", g_type_name (G_TYPE_FROM_CLASS (cclass)));
+ PROCESS_VFUNC = sse?sse:reference;
+#else
+ PROCESS_VFUNC = reference;
+#endif
}
- /* best */
}
}
Modified: trunk/operations/common/invert.c
==============================================================================
--- trunk/operations/common/invert.c (original)
+++ trunk/operations/common/invert.c Thu Apr 17 23:36:11 2008
@@ -23,6 +23,7 @@
#define GEGL_CHANT_TYPE_POINT_FILTER
#define GEGL_CHANT_C_FILE "invert.c"
+#define GEGLV4
#include "gegl-chant.h"
@@ -53,38 +54,27 @@
return TRUE;
}
-
+#ifdef USE_SSE
static gboolean
-process_fast (GeglOperation *op,
- void *in_buf,
- void *out_buf,
- glong samples)
+process_sse (GeglOperation *op,
+ void *in_buf,
+ void *out_buf,
+ glong samples)
{
- glong i;
- gfloat *in = in_buf;
- gfloat *out = out_buf;
+ GeglV4 *in = in_buf;
+ GeglV4 *out = out_buf;
+ GeglV4 one={{1.0,1.0,1.0,1.0}};
- for (i=0; i<samples; i++)
+ while (--samples)
{
- int j;
- for (j=0; j<3; j++)
- {
- gfloat c;
- c = in[j];
- c = 1.0 - c;
- if (i%2)
- out[j] = c;
- else
- out[j] = (c - 0.5) * 2.0 + 0.5;
- }
- out[3]=in[3];
- in += 4;
- out+= 4;
+ out->v = one.v - in->v;
+ out->a[3]=in->a[3];
+ in ++;
+ out ++;
}
return TRUE;
}
-
-
+#endif
static void
gegl_chant_class_init (GeglChantClass *klass)
@@ -103,9 +93,10 @@
"Inverts the components (except alpha), the result is the"
" corresponding \"negative\" image.";
- g_print ("hi\n");
+#ifdef USE_SSE
gegl_operation_class_add_processor (operation_class,
- G_CALLBACK (process_fast), "fast");
+ G_CALLBACK (process_sse), "sse");
+#endif
}
#endif
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]