gegl r2184 - in trunk: . gegl gegl/operation operations/common



Author: ok
Date: Thu Apr 17 23:36:11 2008
New Revision: 2184
URL: http://svn.gnome.org/viewvc/gegl?rev=2184&view=rev

Log:
* configure.ac: added mmx and cpu detection.
* gegl/Makefile.am:
* gegl/gegl-cpuaccel.[ch]: added from GIMP.
* gegl/gegl-plugin.h: include config.h when compiling in-tree (to
allow checking for USE_SSE in ops.
* gegl/gegl-utils.[ch]: (gegl_malloc), (gegl_free): added malloc and
free that aligns on 16byte boundaries.
* gegl/operation/gegl-operation-point-composer.c:,
* gegl/operation/gegl-operation-point-filter.c:
(process_inner): align allocated buffers on 16byte boundaries to
make it possible to process RGBA buffers using SSE.
* gegl/operation/gegl-operation-processors.c: 
(gegl_operation_class_add_processor): added category SSE that is
autoenabled if sse support is compiled in and detected.
* operations/common/invert.c: (process_sse), 
(gegl_chant_class_init): accelerate the invert operation with vector
maths using SSE.


Added:
   trunk/gegl/gegl-cpuaccel.c
   trunk/gegl/gegl-cpuaccel.h
Modified:
   trunk/ChangeLog
   trunk/configure.ac
   trunk/gegl/Makefile.am
   trunk/gegl/gegl-plugin.h
   trunk/gegl/gegl-utils.c
   trunk/gegl/gegl-utils.h
   trunk/gegl/operation/gegl-operation-point-composer.c
   trunk/gegl/operation/gegl-operation-point-filter.c
   trunk/gegl/operation/gegl-operation-processors.c
   trunk/operations/common/invert.c

Modified: trunk/configure.ac
==============================================================================
--- trunk/configure.ac	(original)
+++ trunk/configure.ac	Thu Apr 17 23:36:11 2008
@@ -235,6 +235,29 @@
 esac
 AC_MSG_RESULT([$target_or_host])
 
+case "$target_or_host" in
+  i*86-*-*)
+    have_x86=yes
+    AC_DEFINE(ARCH_X86, 1, [Define to 1 if you are compiling for ix86.])
+    ;;
+  x86_64-*-*)
+    have_x86=yes
+    AC_DEFINE(ARCH_X86, 1, [Define to 1 if you are compiling for ix86.])
+    AC_DEFINE(ARCH_X86_64, 1, [Define to 1 if you are compiling for amd64.])
+    ;;
+  ppc-*-* | powerpc-*)
+    have_ppc=yes
+    AC_DEFINE(ARCH_PPC, 1, [Define to 1 if you are compiling for PowerPC.])
+    ;;
+  ppc64-*-* | powerpc64-*)
+    have_ppc=yes
+    AC_DEFINE(ARCH_PPC, 1, [Define to 1 if you are compiling for PowerPC.])
+    AC_DEFINE(ARCH_PPC64, 1, [Define to 1 if you are compiling for PowerPC64.])
+    ;;
+  *)
+    ;;
+esac
+
 
 
 ####################################################
@@ -304,6 +327,9 @@
 AM_CONDITIONAL(OS_WIN32, test "$os_win32" = "yes")
 AM_CONDITIONAL(OS_UNIX, test "$os_win32" != "yes")
 
+dnl Checks for programs.
+#AC_PROG_YACC
+#AM_PROG_LEX
 
 #############################
 # Threads and multi processor 
@@ -319,9 +345,85 @@
 fi
 
 
-dnl Checks for programs.
-#AC_PROG_YACC
-#AM_PROG_LEX
+
+
+########################
+# Check for MMX assembly
+########################
+
+dnl GEGL_DETECT_CFLAGS(RESULT, FLAGSET)
+dnl Detect if the compiler supports a set of flags
+
+AC_DEFUN([GEGL_DETECT_CFLAGS],
+[
+  $1=
+  for flag in $2; do
+    if test -z "[$]$1"; then
+      $1_save_CFLAGS="$CFLAGS"
+      CFLAGS="$CFLAGS $flag"
+      AC_MSG_CHECKING([whether [$]CC understands [$]flag])
+      AC_TRY_COMPILE([], [], [$1_works=yes], [$1_works=no])
+      AC_MSG_RESULT([$]$1_works)
+      CFLAGS="[$]$1_save_CFLAGS"
+      if test "x[$]$1_works" = "xyes"; then
+        $1="$flag"
+      fi
+    fi
+  done
+])
+
+AC_ARG_ENABLE(mmx,
+                [  --enable-mmx            enable MMX support (default=auto)],,
+                  enable_mmx=$have_x86)
+
+AC_ARG_ENABLE(sse,
+  [  --enable-sse            enable SSE support (default=auto)],,
+  enable_sse=$enable_mmx)
+
+if test "x$enable_mmx" = xyes; then
+  GEGL_DETECT_CFLAGS(MMX_EXTRA_CFLAGS, '-mmmx')
+  SSE_EXTRA_CFLAGS=
+
+  AC_MSG_CHECKING(whether we can compile MMX code)
+
+  mmx_save_CFLAGS="$CFLAGS"
+  CFLAGS="$mmx_save_CFLAGS $MMX_EXTRA_CFLAGS"
+
+  AC_COMPILE_IFELSE([asm ("movq 0, %mm0");],
+
+    AC_DEFINE(USE_MMX, 1, [Define to 1 if MMX assembly is available.])
+    AC_MSG_RESULT(yes)
+
+    if test "x$enable_sse" = xyes; then
+      GEGL_DETECT_CFLAGS(sse_flag, '-msse')
+      SSE_EXTRA_CFLAGS="$MMX_EXTRA_CFLAGS $sse_flag"
+
+      AC_MSG_CHECKING(whether we can compile SSE code)
+
+      CFLAGS="$CFLAGS $sse_flag"
+
+      AC_COMPILE_IFELSE([asm ("movntps %xmm0, 0");],
+        AC_DEFINE(USE_SSE, 1, [Define to 1 if SSE assembly is available.])
+        AC_MSG_RESULT(yes)
+      ,
+        enable_sse=no
+        AC_MSG_RESULT(no)
+        AC_MSG_WARN([The assembler does not support the SSE command set.])
+      )
+
+    fi
+  ,
+    enable_mmx=no
+    AC_MSG_RESULT(no)
+    AC_MSG_WARN([The assembler does not support the MMX command set.])
+  )
+
+  CFLAGS="$mmx_save_CFLAGS"
+
+  AC_SUBST(MMX_EXTRA_CFLAGS)
+  AC_SUBST(SSE_EXTRA_CFLAGS)
+fi
+
 
 
 ###############################
@@ -659,6 +761,8 @@
 AC_SUBST(AVFORMAT_LIBS) 
 
 
+
+
 ################
 # Check for lcms
 ################
@@ -679,6 +783,8 @@
 #AC_SUBST(LCMS_LIBS)
 
 
+
+
 #######################
 # Check for other items
 #######################

Modified: trunk/gegl/Makefile.am
==============================================================================
--- trunk/gegl/Makefile.am	(original)
+++ trunk/gegl/Makefile.am	Thu Apr 17 23:36:11 2008
@@ -19,6 +19,8 @@
 	gegl-dot.c			\
 	gegl-utils.c			\
 	gegl-xml.c			\
+	gegl-cpuaccel.c			\
+	gegl-cpuaccel.h			\
 	\
 	gegl-init.h			\
 	gegl-types.h			\

Added: trunk/gegl/gegl-cpuaccel.c
==============================================================================
--- (empty file)
+++ trunk/gegl/gegl-cpuaccel.c	Thu Apr 17 23:36:11 2008
@@ -0,0 +1,498 @@
+/* GEGL - The GEGL Library
+ * Copyright (C) 1995-1997 Peter Mattis and Spencer Kimball
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * x86 bits Copyright (C) Manish Singh <yosh gimp org>
+ */
+
+/*
+ * PPC CPU acceleration detection was taken from DirectFB but seems to be
+ * originating from mpeg2dec with the following copyright:
+ *
+ * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma ess engr uvic ca>
+ */
+
+#include "config.h"
+
+#include <string.h>
+#include <signal.h>
+#include <setjmp.h>
+
+#include <glib.h>
+
+#include "gegl-cpuaccel.h"
+
+
+static GeglCpuAccelFlags  cpu_accel (void) G_GNUC_CONST;
+
+
+static gboolean  use_cpu_accel = TRUE;
+
+
+/**
+ * gegl_cpu_accel_get_support:
+ *
+ * Query for CPU acceleration support.
+ *
+ * Return value: #GeglCpuAccelFlags as supported by the CPU.
+ *
+ * Since: GEGL 2.4
+ */
+GeglCpuAccelFlags
+gegl_cpu_accel_get_support (void)
+{
+  return use_cpu_accel ? cpu_accel () : GEGL_CPU_ACCEL_NONE;
+}
+
+/**
+ * gegl_cpu_accel_set_use:
+ * @use:  whether to use CPU acceleration features or not
+ *
+ * This function is for internal use only.
+ *
+ * Since: GEGL 2.4
+ */
+void
+gegl_cpu_accel_set_use (gboolean use)
+{
+  use_cpu_accel = use ? TRUE : FALSE;
+}
+
+
+#if defined(ARCH_X86) && defined(USE_MMX) && defined(__GNUC__)
+
+#define HAVE_ACCEL 1
+
+
+typedef enum
+{
+  ARCH_X86_VENDOR_NONE,
+  ARCH_X86_VENDOR_INTEL,
+  ARCH_X86_VENDOR_AMD,
+  ARCH_X86_VENDOR_CENTAUR,
+  ARCH_X86_VENDOR_CYRIX,
+  ARCH_X86_VENDOR_NSC,
+  ARCH_X86_VENDOR_TRANSMETA,
+  ARCH_X86_VENDOR_NEXGEN,
+  ARCH_X86_VENDOR_RISE,
+  ARCH_X86_VENDOR_UMC,
+  ARCH_X86_VENDOR_SIS,
+  ARCH_X86_VENDOR_UNKNOWN    = 0xff
+} X86Vendor;
+
+enum
+{
+  ARCH_X86_INTEL_FEATURE_MMX      = 1 << 23,
+  ARCH_X86_INTEL_FEATURE_XMM      = 1 << 25,
+  ARCH_X86_INTEL_FEATURE_XMM2     = 1 << 26,
+
+  ARCH_X86_AMD_FEATURE_MMXEXT     = 1 << 22,
+  ARCH_X86_AMD_FEATURE_3DNOW      = 1 << 31,
+
+  ARCH_X86_CENTAUR_FEATURE_MMX    = 1 << 23,
+  ARCH_X86_CENTAUR_FEATURE_MMXEXT = 1 << 24,
+  ARCH_X86_CENTAUR_FEATURE_3DNOW  = 1 << 31,
+
+  ARCH_X86_CYRIX_FEATURE_MMX      = 1 << 23,
+  ARCH_X86_CYRIX_FEATURE_MMXEXT   = 1 << 24
+};
+
+enum
+{
+  ARCH_X86_INTEL_FEATURE_PNI      = 1 << 0
+};
+
+#if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__))
+#define cpuid(op,eax,ebx,ecx,edx)  \
+  __asm__ ("movl %%ebx, %%esi\n\t" \
+           "cpuid\n\t"             \
+           "xchgl %%ebx,%%esi"     \
+           : "=a" (eax),           \
+             "=S" (ebx),           \
+             "=c" (ecx),           \
+             "=d" (edx)            \
+           : "0" (op))
+#else
+#define cpuid(op,eax,ebx,ecx,edx)  \
+  __asm__ ("cpuid"                 \
+           : "=a" (eax),           \
+             "=b" (ebx),           \
+             "=c" (ecx),           \
+             "=d" (edx)            \
+           : "0" (op))
+#endif
+
+
+static X86Vendor
+arch_get_vendor (void)
+{
+  guint32 eax, ebx, ecx, edx;
+  gchar   id[16];
+
+#ifndef ARCH_X86_64
+  /* Only need to check this on ia32 */
+  __asm__ ("pushfl\n\t"
+           "pushfl\n\t"
+           "popl %0\n\t"
+           "movl %0,%1\n\t"
+           "xorl $0x200000,%0\n\t"
+           "pushl %0\n\t"
+           "popfl\n\t"
+           "pushfl\n\t"
+           "popl %0\n\t"
+           "popfl"
+           : "=a" (eax),
+             "=c" (ecx)
+           :
+           : "cc");
+
+  if (eax == ecx)
+    return ARCH_X86_VENDOR_NONE;
+#endif
+
+  cpuid (0, eax, ebx, ecx, edx);
+
+  if (eax == 0)
+    return ARCH_X86_VENDOR_NONE;
+
+  *(int *)&id[0] = ebx;
+  *(int *)&id[4] = edx;
+  *(int *)&id[8] = ecx;
+
+  id[12] = '\0';
+
+#ifdef ARCH_X86_64
+  if (strcmp (id, "AuthenticAMD") == 0)
+    return ARCH_X86_VENDOR_AMD;
+  else if (strcmp (id, "GenuineIntel") == 0)
+    return ARCH_X86_VENDOR_INTEL;
+#else
+  if (strcmp (id, "GenuineIntel") == 0)
+    return ARCH_X86_VENDOR_INTEL;
+  else if (strcmp (id, "AuthenticAMD") == 0)
+    return ARCH_X86_VENDOR_AMD;
+  else if (strcmp (id, "CentaurHauls") == 0)
+    return ARCH_X86_VENDOR_CENTAUR;
+  else if (strcmp (id, "CyrixInstead") == 0)
+    return ARCH_X86_VENDOR_CYRIX;
+  else if (strcmp (id, "Geode by NSC") == 0)
+    return ARCH_X86_VENDOR_NSC;
+  else if (strcmp (id, "GenuineTMx86") == 0 ||
+           strcmp (id, "TransmetaCPU") == 0)
+    return ARCH_X86_VENDOR_TRANSMETA;
+  else if (strcmp (id, "NexGenDriven") == 0)
+    return ARCH_X86_VENDOR_NEXGEN;
+  else if (strcmp (id, "RiseRiseRise") == 0)
+    return ARCH_X86_VENDOR_RISE;
+  else if (strcmp (id, "UMC UMC UMC ") == 0)
+    return ARCH_X86_VENDOR_UMC;
+  else if (strcmp (id, "SiS SiS SiS ") == 0)
+    return ARCH_X86_VENDOR_SIS;
+#endif
+
+  return ARCH_X86_VENDOR_UNKNOWN;
+}
+
+static guint32
+arch_accel_intel (void)
+{
+  guint32 caps = 0;
+
+#ifdef USE_MMX
+  {
+    guint32 eax, ebx, ecx, edx;
+
+    cpuid (1, eax, ebx, ecx, edx);
+
+    if ((edx & ARCH_X86_INTEL_FEATURE_MMX) == 0)
+      return 0;
+
+    caps = GEGL_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+    if (edx & ARCH_X86_INTEL_FEATURE_XMM)
+      caps |= GEGL_CPU_ACCEL_X86_SSE | GEGL_CPU_ACCEL_X86_MMXEXT;
+
+    if (edx & ARCH_X86_INTEL_FEATURE_XMM2)
+      caps |= GEGL_CPU_ACCEL_X86_SSE2;
+
+    if (ecx & ARCH_X86_INTEL_FEATURE_PNI)
+      caps |= GEGL_CPU_ACCEL_X86_SSE3;
+#endif /* USE_SSE */
+  }
+#endif /* USE_MMX */
+
+  return caps;
+}
+
+static guint32
+arch_accel_amd (void)
+{
+  guint32 caps;
+
+  caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+  {
+    guint32 eax, ebx, ecx, edx;
+
+    cpuid (0x80000000, eax, ebx, ecx, edx);
+
+    if (eax < 0x80000001)
+      return caps;
+
+#ifdef USE_SSE
+    cpuid (0x80000001, eax, ebx, ecx, edx);
+
+    if (edx & ARCH_X86_AMD_FEATURE_3DNOW)
+      caps |= GEGL_CPU_ACCEL_X86_3DNOW;
+
+    if (edx & ARCH_X86_AMD_FEATURE_MMXEXT)
+      caps |= GEGL_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+  }
+#endif /* USE_MMX */
+
+  return caps;
+}
+
+static guint32
+arch_accel_centaur (void)
+{
+  guint32 caps;
+
+  caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+  {
+    guint32 eax, ebx, ecx, edx;
+
+    cpuid (0x80000000, eax, ebx, ecx, edx);
+
+    if (eax < 0x80000001)
+      return caps;
+
+    cpuid (0x80000001, eax, ebx, ecx, edx);
+
+    if (edx & ARCH_X86_CENTAUR_FEATURE_MMX)
+      caps |= GEGL_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+    if (edx & ARCH_X86_CENTAUR_FEATURE_3DNOW)
+      caps |= GEGL_CPU_ACCEL_X86_3DNOW;
+
+    if (edx & ARCH_X86_CENTAUR_FEATURE_MMXEXT)
+      caps |= GEGL_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+  }
+#endif /* USE_MMX */
+
+  return caps;
+}
+
+static guint32
+arch_accel_cyrix (void)
+{
+  guint32 caps;
+
+  caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+  {
+    guint32 eax, ebx, ecx, edx;
+
+    cpuid (0, eax, ebx, ecx, edx);
+
+    if (eax != 2)
+      return caps;
+
+    cpuid (0x80000001, eax, ebx, ecx, edx);
+
+    if (edx & ARCH_X86_CYRIX_FEATURE_MMX)
+      caps |= GEGL_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+    if (edx & ARCH_X86_CYRIX_FEATURE_MMXEXT)
+      caps |= GEGL_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+  }
+#endif /* USE_MMX */
+
+  return caps;
+}
+
+#ifdef USE_SSE
+static jmp_buf sigill_return;
+
+static void
+sigill_handler (gint n)
+{
+  longjmp (sigill_return, 1);
+}
+
+static gboolean
+arch_accel_sse_os_support (void)
+{
+  if (setjmp (sigill_return))
+    {
+      return FALSE;
+    }
+  else
+    {
+      signal (SIGILL, sigill_handler);
+      __asm__ __volatile__ ("xorps %xmm0, %xmm0");
+      signal (SIGILL, SIG_DFL);
+    }
+
+  return TRUE;
+}
+#endif /* USE_SSE */
+
+static guint32
+arch_accel (void)
+{
+  guint32 caps;
+  X86Vendor vendor;
+
+  vendor = arch_get_vendor ();
+
+  switch (vendor)
+    {
+    case ARCH_X86_VENDOR_NONE:
+      caps = 0;
+      break;
+
+    case ARCH_X86_VENDOR_AMD:
+      caps = arch_accel_amd ();
+      break;
+
+    case ARCH_X86_VENDOR_CENTAUR:
+      caps = arch_accel_centaur ();
+      break;
+
+    case ARCH_X86_VENDOR_CYRIX:
+    case ARCH_X86_VENDOR_NSC:
+      caps = arch_accel_cyrix ();
+      break;
+
+    /* check for what Intel speced, even if UNKNOWN */
+    default:
+      caps = arch_accel_intel ();
+      break;
+    }
+
+#ifdef USE_SSE
+  if ((caps & GEGL_CPU_ACCEL_X86_SSE) && !arch_accel_sse_os_support ())
+    caps &= ~(GEGL_CPU_ACCEL_X86_SSE | GEGL_CPU_ACCEL_X86_SSE2);
+#endif
+
+  return caps;
+}
+
+#endif /* ARCH_X86 && USE_MMX && __GNUC__ */
+
+
+#if defined(ARCH_PPC) && defined (USE_ALTIVEC)
+
+#if defined(HAVE_ALTIVEC_SYSCTL)
+
+#include <sys/sysctl.h>
+
+#define HAVE_ACCEL 1
+
+static guint32
+arch_accel (void)
+{
+  gint     sels[2] = { CTL_HW, HW_VECTORUNIT };
+  gboolean has_vu  = FALSE;
+  gsize    length  = sizeof(has_vu);
+  gint     err;
+
+  err = sysctl (sels, 2, &has_vu, &length, NULL, 0);
+
+  if (err == 0 && has_vu)
+    return GEGL_CPU_ACCEL_PPC_ALTIVEC;
+
+  return 0;
+}
+
+#elif defined(__GNUC__)
+
+#define HAVE_ACCEL 1
+
+static          sigjmp_buf   jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static void
+sigill_handler (gint sig)
+{
+  if (!canjump)
+    {
+      signal (sig, SIG_DFL);
+      raise (sig);
+    }
+
+  canjump = 0;
+  siglongjmp (jmpbuf, 1);
+}
+
+static guint32
+arch_accel (void)
+{
+  signal (SIGILL, sigill_handler);
+
+  if (sigsetjmp (jmpbuf, 1))
+    {
+      signal (SIGILL, SIG_DFL);
+      return 0;
+    }
+
+  canjump = 1;
+
+  asm volatile ("mtspr 256, %0\n\t"
+                "vand %%v0, %%v0, %%v0"
+                :
+                : "r" (-1));
+
+  signal (SIGILL, SIG_DFL);
+
+  return GEGL_CPU_ACCEL_PPC_ALTIVEC;
+}
+#endif /* __GNUC__ */
+
+#endif /* ARCH_PPC && USE_ALTIVEC */
+
+
+static GeglCpuAccelFlags
+cpu_accel (void)
+{
+#ifdef HAVE_ACCEL
+  static guint32 accel = ~0U;
+
+  if (accel != ~0U)
+    return accel;
+
+  accel = arch_accel ();
+
+  return (GeglCpuAccelFlags) accel;
+
+#else /* !HAVE_ACCEL */
+  return GEGL_CPU_ACCEL_NONE;
+#endif
+}

Added: trunk/gegl/gegl-cpuaccel.h
==============================================================================
--- (empty file)
+++ trunk/gegl/gegl-cpuaccel.h	Thu Apr 17 23:36:11 2008
@@ -0,0 +1,52 @@
+/* LIBGEGL - The GEGL Library
+ * Copyright (C) 1995-1997 Peter Mattis and Spencer Kimball
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __GEGL_CPU_ACCEL_H__
+#define __GEGL_CPU_ACCEL_H__
+
+G_BEGIN_DECLS
+
+
+typedef enum
+{
+  GEGL_CPU_ACCEL_NONE        = 0x0,
+
+  /* x86 accelerations */
+  GEGL_CPU_ACCEL_X86_MMX     = 0x80000000,
+  GEGL_CPU_ACCEL_X86_3DNOW   = 0x40000000,
+  GEGL_CPU_ACCEL_X86_MMXEXT  = 0x20000000,
+  GEGL_CPU_ACCEL_X86_SSE     = 0x10000000,
+  GEGL_CPU_ACCEL_X86_SSE2    = 0x08000000,
+  GEGL_CPU_ACCEL_X86_SSE3    = 0x02000000,
+
+  /* powerpc accelerations */
+  GEGL_CPU_ACCEL_PPC_ALTIVEC = 0x04000000
+} GeglCpuAccelFlags;
+
+
+GeglCpuAccelFlags  gegl_cpu_accel_get_support (void);
+
+
+/* for internal use only */
+void               gegl_cpu_accel_set_use     (gboolean use);
+
+
+G_END_DECLS
+
+#endif  /* __GEGL_CPU_ACCEL_H__ */

Modified: trunk/gegl/gegl-plugin.h
==============================================================================
--- trunk/gegl/gegl-plugin.h	(original)
+++ trunk/gegl/gegl-plugin.h	Thu Apr 17 23:36:11 2008
@@ -20,6 +20,10 @@
 #ifndef __GEGL_PLUGIN_H__
 #define __GEGL_PLUGIN_H__
 
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
 #include <string.h>
 #include <glib-object.h>
 #include <gegl.h>
@@ -53,7 +57,7 @@
  *    should be extended so a range of abi versions are accepted.
  */
 
-#define GEGL_MODULE_ABI_VERSION 0x0007
+#define GEGL_MODULE_ABI_VERSION 0x0008
 
 struct _GeglModuleInfo
 {
@@ -96,6 +100,17 @@
 #include <operation/gegl-operation-sink.h>
 #include <operation/gegl-operation-meta.h>
 
+#ifdef USE_SSE
+
+typedef float v4sf __attribute__ ((vector_size (4*sizeof(float))));
+typedef union
+{
+  v4sf  v;
+  float a[4];
+} GeglV4;
+
+#endif
+
 #else
 
 /***** ***/

Modified: trunk/gegl/gegl-utils.c
==============================================================================
--- trunk/gegl/gegl-utils.c	(original)
+++ trunk/gegl/gegl-utils.c	Thu Apr 17 23:36:11 2008
@@ -291,4 +291,35 @@
   return our_type;
 }
 
+#define GEGL_ALIGN 16
 
+void *
+gegl_malloc (gsize size);
+
+void *
+gegl_malloc (gsize size)
+{
+  gint   off;
+  gint   i;
+  gint   to_add;
+  gchar *mem = g_malloc (size + GEGL_ALIGN + 1);
+  void *ret;
+  *mem='G';
+  off = (((guint)mem) + 1) % GEGL_ALIGN;
+  to_add = GEGL_ALIGN-off;
+  ret = (void*)(mem + 1 + to_add);
+  for (i=1;i<1+to_add;i++)
+    mem[i]=' ';
+  return ret;
+}
+
+void
+gegl_free (void *buf);
+void
+gegl_free (void *buf)
+{
+  gchar *p = buf;
+  while (*p!='G')
+   p--;
+  g_free (p);
+}

Modified: trunk/gegl/gegl-utils.h
==============================================================================
--- trunk/gegl/gegl-utils.h	(original)
+++ trunk/gegl/gegl-utils.h	Thu Apr 17 23:36:11 2008
@@ -78,6 +78,10 @@
 gint        _gegl_float_epsilon_equal (float     v1,
                                        float     v2);
 
+void *
+gegl_aligned_malloc (gsize size);
+void
+gegl_aligned_free (void *buf);
 
 G_END_DECLS
 

Modified: trunk/gegl/operation/gegl-operation-point-composer.c
==============================================================================
--- trunk/gegl/operation/gegl-operation-point-composer.c	(original)
+++ trunk/gegl/operation/gegl-operation-point-composer.c	Thu Apr 17 23:36:11 2008
@@ -134,7 +134,7 @@
     {
       gfloat *in_buf = NULL, *out_buf = NULL, *aux_buf = NULL;
 
-      in_buf = g_malloc (in_format->format.bytes_per_pixel *
+      in_buf = gegl_malloc (in_format->format.bytes_per_pixel *
                          output->extent.width * output->extent.height);
       if (in_format == out_format)
         {
@@ -142,7 +142,7 @@
         }
       else
         {
-          out_buf = g_malloc (out_format->format.bytes_per_pixel *
+          out_buf = gegl_malloc (out_format->format.bytes_per_pixel *
                               output->extent.width * output->extent.height);
         }
 
@@ -150,7 +150,7 @@
 
       if (aux)
         {
-          aux_buf = g_malloc (aux_format->format.bytes_per_pixel *
+          aux_buf = gegl_malloc (aux_format->format.bytes_per_pixel *
                              output->extent.width * output->extent.height);
           gegl_buffer_get (aux, 1.0, result, aux_format, aux_buf, GEGL_AUTO_ROWSTRIDE);
         }
@@ -165,11 +165,11 @@
 
       gegl_buffer_set (output, NULL, out_format, out_buf, GEGL_AUTO_ROWSTRIDE);
 
-      g_free (in_buf);
+      gegl_free (in_buf);
       if (in_format != out_format)
-        g_free (out_buf);
+        gegl_free (out_buf);
       if (aux)
-        g_free (aux_buf);
+        gegl_free (aux_buf);
     }
   return TRUE;
 }

Modified: trunk/gegl/operation/gegl-operation-point-filter.c
==============================================================================
--- trunk/gegl/operation/gegl-operation-point-filter.c	(original)
+++ trunk/gegl/operation/gegl-operation-point-filter.c	Thu Apr 17 23:36:11 2008
@@ -25,6 +25,7 @@
 #include "gegl-operation-point-filter.h"
 #include "graph/gegl-pad.h"
 #include "graph/gegl-node.h"
+#include "gegl-utils.h"
 #include <string.h>
 
 static gboolean process_inner (GeglOperation       *operation,
@@ -95,7 +96,7 @@
       if (in_format == out_format)
         {
           gfloat *buf;
-          buf = g_malloc (in_format->format.bytes_per_pixel *
+          buf = gegl_malloc (in_format->format.bytes_per_pixel *
                           output->extent.width * output->extent.height);
 
           gegl_buffer_get (input, 1.0, result, in_format, buf, GEGL_AUTO_ROWSTRIDE);
@@ -107,15 +108,15 @@
             output->extent.width * output->extent.height);
 
           gegl_buffer_set (output, result, out_format, buf, GEGL_AUTO_ROWSTRIDE);
-          g_free (buf);
+          gegl_free (buf);
         }
       else
         {
           gfloat *in_buf;
           gfloat *out_buf;
-          in_buf = g_malloc (in_format->format.bytes_per_pixel *
+          in_buf = gegl_malloc (in_format->format.bytes_per_pixel *
                              input->extent.width * input->extent.height);
-          out_buf = g_malloc (out_format->format.bytes_per_pixel *
+          out_buf = gegl_malloc (out_format->format.bytes_per_pixel *
                              output->extent.width * output->extent.height);
 
           gegl_buffer_get (input, 1.0, result, in_format, in_buf, GEGL_AUTO_ROWSTRIDE);
@@ -127,8 +128,8 @@
             output->extent.width * output->extent.height);
 
           gegl_buffer_set (output, result, out_format, out_buf, GEGL_AUTO_ROWSTRIDE);
-          g_free (in_buf);
-          g_free (out_buf);
+          gegl_free (in_buf);
+          gegl_free (out_buf);
         }
     }
   return TRUE;

Modified: trunk/gegl/operation/gegl-operation-processors.c
==============================================================================
--- trunk/gegl/operation/gegl-operation-processors.c	(original)
+++ trunk/gegl/operation/gegl-operation-processors.c	Thu Apr 17 23:36:11 2008
@@ -27,6 +27,7 @@
 #include "gegl-types.h"
 #include "gegl-operation.h"
 #include "gegl-utils.h"
+#include "gegl-cpuaccel.h"
 #include "graph/gegl-node.h"
 #include "graph/gegl-connection.h"
 #include "graph/gegl-pad.h"
@@ -107,12 +108,21 @@
                  g_type_name (G_TYPE_FROM_CLASS (cclass)));
     }
 
+#ifdef USE_SSE
+  /* always look for sse ops */
+#else
   if (g_getenv ("GEGL_QUALITY"))
+#endif
     {
       const gchar *quality = g_getenv ("GEGL_QUALITY");
       GCallback fast      = NULL;
       GCallback good      = NULL;
       GCallback reference = NULL;
+#ifdef USE_SSE
+      GCallback sse       = NULL;
+      if (quality == NULL)
+        quality = "sse";
+#endif
 
       for (i=0;i<MAX_PROCESSOR;i++)
         {
@@ -125,6 +135,10 @@
                 fast = cb;
               else if (g_str_equal (string, "good"))
                 good = cb;
+#ifdef USE_SSE
+              else if (g_str_equal (string, "sse"))
+                sse = cb;
+#endif
               else if (g_str_equal (string, "reference"))
                 reference = cb;
             }
@@ -133,16 +147,37 @@
       g_assert (reference);
       if (g_str_equal (quality, "fast"))
         {
+#ifdef USE_SSE
+          g_print ("Setting %s processor for %s\n", fast?"fast":sse?"sse":good?"good":"reference",
+          g_type_name (G_TYPE_FROM_CLASS (cclass)));
+          PROCESS_VFUNC = fast?fast:sse?sse:good?good:reference;
+#else
           g_print ("Setting %s processor for %s\n", fast?"fast":good?"good":"reference",
-           g_type_name (G_TYPE_FROM_CLASS (cclass)));
+          g_type_name (G_TYPE_FROM_CLASS (cclass)));
           PROCESS_VFUNC = fast?fast:good?good:reference;
+#endif
         }
       else if (g_str_equal (quality, "good"))
         {
+#ifdef USE_SSE
+          g_print ("Setting %s processor for %s\n", sse?"sse":good?"good":"reference",
+           g_type_name (G_TYPE_FROM_CLASS (cclass)));
+#else
           g_print ("Setting %s processor for %s\n", good?"good":"reference",
            g_type_name (G_TYPE_FROM_CLASS (cclass)));
           PROCESS_VFUNC = good?good:reference;
+#endif
+        }
+      else
+        {
+          /* best */
+#ifdef USE_SSE
+          if (sse && gegl_cpu_accel_get_support () & GEGL_CPU_ACCEL_X86_SSE)
+            g_print ("Setting sse processor for %s\n", g_type_name (G_TYPE_FROM_CLASS (cclass)));
+          PROCESS_VFUNC = sse?sse:reference;
+#else
+          PROCESS_VFUNC = reference;
+#endif
         }
-        /* best */
     }
 }

Modified: trunk/operations/common/invert.c
==============================================================================
--- trunk/operations/common/invert.c	(original)
+++ trunk/operations/common/invert.c	Thu Apr 17 23:36:11 2008
@@ -23,6 +23,7 @@
 
 #define GEGL_CHANT_TYPE_POINT_FILTER
 #define GEGL_CHANT_C_FILE       "invert.c"
+#define GEGLV4
 
 #include "gegl-chant.h"
 
@@ -53,38 +54,27 @@
   return TRUE;
 }
 
-
+#ifdef USE_SSE
 static gboolean
-process_fast (GeglOperation *op,
-              void          *in_buf,
-              void          *out_buf,
-              glong          samples)
+process_sse (GeglOperation *op,
+             void          *in_buf,
+             void          *out_buf,
+             glong          samples)
 {
-  glong   i;
-  gfloat *in  = in_buf;
-  gfloat *out = out_buf;
+  GeglV4 *in  = in_buf;
+  GeglV4 *out = out_buf;
+  GeglV4  one={{1.0,1.0,1.0,1.0}};
 
-  for (i=0; i<samples; i++)
+  while (--samples)
     {
-      int  j;
-      for (j=0; j<3; j++)
-        {
-          gfloat c;
-          c = in[j];
-          c = 1.0 - c;
-          if (i%2)
-            out[j] = c;
-          else
-            out[j] = (c - 0.5) * 2.0 + 0.5;
-        }
-      out[3]=in[3];
-      in += 4;
-      out+= 4;
+      out->v = one.v - in->v;
+      out->a[3]=in->a[3];
+      in  ++;
+      out ++;
     }
   return TRUE;
 }
-
-
+#endif
 
 static void
 gegl_chant_class_init (GeglChantClass *klass)
@@ -103,9 +93,10 @@
      "Inverts the components (except alpha), the result is the"
      " corresponding \"negative\" image.";
 
-  g_print ("hi\n");
+#ifdef USE_SSE
   gegl_operation_class_add_processor (operation_class,
-                                      G_CALLBACK (process_fast), "fast");
+                                      G_CALLBACK (process_sse), "sse");
+#endif
 }
 
 #endif



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]