babl r316 - in trunk: . babl extensions



Author: jheller
Date: Fri May 23 13:36:35 2008
New Revision: 316
URL: http://svn.gnome.org/viewvc/babl?rev=316&view=rev

Log:
2008-05-23  Jan Heller  <jheller svn gnome org>

        Added support for CPU acceleration, RGBA float -> RGB u8 and
        RGBA float -> RGBA u8 SSE accelerated conversions.

        * babl/Makefile.am: Added babl-cpuaccel.[ch].
        * babl/babl-cpuaccel.h:
        * babl/babl-cpuaccel.c (babl_cpu_accel_get_support),
        (babl_cpu_accel_set_use), (arch_get_vendor), (arch_accel_intel),
        (arch_accel_amd), (arch_accel_centaur), (arch_accel_cyrix),
        (sigill_handler), (arch_accel_sse_os_support), (arch_accel),
        (cpu_accel): Runtime CPU detection code. Ported from GIMP.
        * babl/babl-internal.h: Include babl-cpuaccel.h.
        * babl/babl-memory.c (babl_malloc): Make babl_malloc align memory to
        BABL_ALIGN==16 boundaries.
        * babl/babl.c (babl_init): Enabled CPU acceleration.
        * configure.ac: Added compile time MMX/SSE/AltiVec detection. Ported
        from GIMP.
        * extensions/Makefile.am: Added SSE_EXTRA_CFLAGS for sse-fixups.c
        compilation.
        * extensions/sse-fixups.c (conv_rgbaF_linear_rgb8_linear),
        (conv_rgbaF_linear_rgba8_linear), (init): Added RGBA float -> RGB u8
        and RGBA float -> RGBA u8 SSE accelerated conversions.


Added:
   trunk/babl/babl-cpuaccel.c
   trunk/babl/babl-cpuaccel.h
   trunk/extensions/sse-fixups.c
Modified:
   trunk/ChangeLog
   trunk/babl/Makefile.am
   trunk/babl/babl-internal.h
   trunk/babl/babl-memory.c
   trunk/babl/babl.c
   trunk/configure.ac
   trunk/extensions/Makefile.am

Modified: trunk/babl/Makefile.am
==============================================================================
--- trunk/babl/Makefile.am	(original)
+++ trunk/babl/Makefile.am	Fri May 23 13:36:35 2008
@@ -29,7 +29,8 @@
 	babl-type.c			\
 	babl-util.c			\
 	babl-list.c			\
-	babl-hash-table.c
+	babl-hash-table.c               \
+	babl-cpuaccel.c
 
 h_sources  =             		\
 	babl-db.h			\
@@ -39,7 +40,8 @@
 	babl-util.h			\
 	babl.h				\
 	babl-list.h			\
-	babl-hash-table.h
+	babl-hash-table.h               \
+	babl-cpuaccel.h
 
 library_includedir=$(includedir)/babl-$(BABL_API_VERSION)/babl
 library_include_HEADERS =		\

Added: trunk/babl/babl-cpuaccel.c
==============================================================================
--- (empty file)
+++ trunk/babl/babl-cpuaccel.c	Fri May 23 13:36:35 2008
@@ -0,0 +1,497 @@
+/* babl - dynamically extendable universal pixel conversion library.
+ * Copyright (C) 2005-2008, Ãyvind KolÃs and others.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * x86 bits Copyright (C) Manish Singh <yosh gimp org>
+ */
+
+/*
+ * PPC CPU acceleration detection was taken from DirectFB but seems to be
+ * originating from mpeg2dec with the following copyright:
+ *
+ * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma ess engr uvic ca>
+ */
+
+#include "config.h"
+
+#include <string.h>
+#include <signal.h>
+#include <setjmp.h>
+
+#include "babl-cpuaccel.h"
+
+typedef unsigned int gboolean;
+typedef unsigned int guint32;
+typedef int gint;
+typedef char gchar;
+#define TRUE  1
+#define FALSE 0
+#define G_GNUC_CONST
+
+static BablCpuAccelFlags  cpu_accel (void) G_GNUC_CONST;
+
+static gboolean  use_cpu_accel = TRUE;
+
+
+/**
+ * babl_cpu_accel_get_support:
+ *
+ * Query for CPU acceleration support.
+ *
+ * Return value: #BablCpuAccelFlags as supported by the CPU.
+ */
+BablCpuAccelFlags
+babl_cpu_accel_get_support (void)
+{
+  return use_cpu_accel ? cpu_accel () : BABL_CPU_ACCEL_NONE;
+}
+
+/**
+ * babl_cpu_accel_set_use:
+ * @use:  whether to use CPU acceleration features or not
+ *
+ * This function is for internal use only.
+ */
+void
+babl_cpu_accel_set_use (gboolean use)
+{
+  use_cpu_accel = use ? TRUE : FALSE;
+}
+
+
+#if defined(ARCH_X86) && defined(USE_MMX) && defined(__GNUC__)
+
+#define HAVE_ACCEL 1
+
+
+typedef enum
+{
+  ARCH_X86_VENDOR_NONE,
+  ARCH_X86_VENDOR_INTEL,
+  ARCH_X86_VENDOR_AMD,
+  ARCH_X86_VENDOR_CENTAUR,
+  ARCH_X86_VENDOR_CYRIX,
+  ARCH_X86_VENDOR_NSC,
+  ARCH_X86_VENDOR_TRANSMETA,
+  ARCH_X86_VENDOR_NEXGEN,
+  ARCH_X86_VENDOR_RISE,
+  ARCH_X86_VENDOR_UMC,
+  ARCH_X86_VENDOR_SIS,
+  ARCH_X86_VENDOR_UNKNOWN    = 0xff
+} X86Vendor;
+
+enum
+{
+  ARCH_X86_INTEL_FEATURE_MMX      = 1 << 23,
+  ARCH_X86_INTEL_FEATURE_XMM      = 1 << 25,
+  ARCH_X86_INTEL_FEATURE_XMM2     = 1 << 26,
+
+  ARCH_X86_AMD_FEATURE_MMXEXT     = 1 << 22,
+  ARCH_X86_AMD_FEATURE_3DNOW      = 1 << 31,
+
+  ARCH_X86_CENTAUR_FEATURE_MMX    = 1 << 23,
+  ARCH_X86_CENTAUR_FEATURE_MMXEXT = 1 << 24,
+  ARCH_X86_CENTAUR_FEATURE_3DNOW  = 1 << 31,
+
+  ARCH_X86_CYRIX_FEATURE_MMX      = 1 << 23,
+  ARCH_X86_CYRIX_FEATURE_MMXEXT   = 1 << 24
+};
+
+enum
+{
+  ARCH_X86_INTEL_FEATURE_PNI      = 1 << 0
+};
+
+#if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__))
+#define cpuid(op,eax,ebx,ecx,edx)  \
+  __asm__ ("movl %%ebx, %%esi\n\t" \
+           "cpuid\n\t"             \
+           "xchgl %%ebx,%%esi"     \
+           : "=a" (eax),           \
+             "=S" (ebx),           \
+             "=c" (ecx),           \
+             "=d" (edx)            \
+           : "0" (op))
+#else
+#define cpuid(op,eax,ebx,ecx,edx)  \
+  __asm__ ("cpuid"                 \
+           : "=a" (eax),           \
+             "=b" (ebx),           \
+             "=c" (ecx),           \
+             "=d" (edx)            \
+           : "0" (op))
+#endif
+
+
+static X86Vendor
+arch_get_vendor (void)
+{
+  guint32 eax, ebx, ecx, edx;
+  gchar   id[16];
+
+#ifndef ARCH_X86_64
+  /* Only need to check this on ia32 */
+  __asm__ ("pushfl\n\t"
+           "pushfl\n\t"
+           "popl %0\n\t"
+           "movl %0,%1\n\t"
+           "xorl $0x200000,%0\n\t"
+           "pushl %0\n\t"
+           "popfl\n\t"
+           "pushfl\n\t"
+           "popl %0\n\t"
+           "popfl"
+           : "=a" (eax),
+             "=c" (ecx)
+           :
+           : "cc");
+
+  if (eax == ecx)
+    return ARCH_X86_VENDOR_NONE;
+#endif
+
+  cpuid (0, eax, ebx, ecx, edx);
+
+  if (eax == 0)
+    return ARCH_X86_VENDOR_NONE;
+
+  *(int *)&id[0] = ebx;
+  *(int *)&id[4] = edx;
+  *(int *)&id[8] = ecx;
+
+  id[12] = '\0';
+
+#ifdef ARCH_X86_64
+  if (strcmp (id, "AuthenticAMD") == 0)
+    return ARCH_X86_VENDOR_AMD;
+  else if (strcmp (id, "GenuineIntel") == 0)
+    return ARCH_X86_VENDOR_INTEL;
+#else
+  if (strcmp (id, "GenuineIntel") == 0)
+    return ARCH_X86_VENDOR_INTEL;
+  else if (strcmp (id, "AuthenticAMD") == 0)
+    return ARCH_X86_VENDOR_AMD;
+  else if (strcmp (id, "CentaurHauls") == 0)
+    return ARCH_X86_VENDOR_CENTAUR;
+  else if (strcmp (id, "CyrixInstead") == 0)
+    return ARCH_X86_VENDOR_CYRIX;
+  else if (strcmp (id, "Geode by NSC") == 0)
+    return ARCH_X86_VENDOR_NSC;
+  else if (strcmp (id, "GenuineTMx86") == 0 ||
+           strcmp (id, "TransmetaCPU") == 0)
+    return ARCH_X86_VENDOR_TRANSMETA;
+  else if (strcmp (id, "NexGenDriven") == 0)
+    return ARCH_X86_VENDOR_NEXGEN;
+  else if (strcmp (id, "RiseRiseRise") == 0)
+    return ARCH_X86_VENDOR_RISE;
+  else if (strcmp (id, "UMC UMC UMC ") == 0)
+    return ARCH_X86_VENDOR_UMC;
+  else if (strcmp (id, "SiS SiS SiS ") == 0)
+    return ARCH_X86_VENDOR_SIS;
+#endif
+
+  return ARCH_X86_VENDOR_UNKNOWN;
+}
+
+static guint32
+arch_accel_intel (void)
+{
+  guint32 caps = 0;
+
+#ifdef USE_MMX
+  {
+    guint32 eax, ebx, ecx, edx;
+
+    cpuid (1, eax, ebx, ecx, edx);
+
+    if ((edx & ARCH_X86_INTEL_FEATURE_MMX) == 0)
+      return 0;
+
+    caps = BABL_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+    if (edx & ARCH_X86_INTEL_FEATURE_XMM)
+      caps |= BABL_CPU_ACCEL_X86_SSE | BABL_CPU_ACCEL_X86_MMXEXT;
+
+    if (edx & ARCH_X86_INTEL_FEATURE_XMM2)
+      caps |= BABL_CPU_ACCEL_X86_SSE2;
+
+    if (ecx & ARCH_X86_INTEL_FEATURE_PNI)
+      caps |= BABL_CPU_ACCEL_X86_SSE3;
+#endif /* USE_SSE */
+  }
+#endif /* USE_MMX */
+
+  return caps;
+}
+
+static guint32
+arch_accel_amd (void)
+{
+  guint32 caps;
+
+  caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+  {
+    guint32 eax, ebx, ecx, edx;
+
+    cpuid (0x80000000, eax, ebx, ecx, edx);
+
+    if (eax < 0x80000001)
+      return caps;
+
+#ifdef USE_SSE
+    cpuid (0x80000001, eax, ebx, ecx, edx);
+
+    if (edx & ARCH_X86_AMD_FEATURE_3DNOW)
+      caps |= BABL_CPU_ACCEL_X86_3DNOW;
+
+    if (edx & ARCH_X86_AMD_FEATURE_MMXEXT)
+      caps |= BABL_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+  }
+#endif /* USE_MMX */
+
+  return caps;
+}
+
+static guint32
+arch_accel_centaur (void)
+{
+  guint32 caps;
+
+  caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+  {
+    guint32 eax, ebx, ecx, edx;
+
+    cpuid (0x80000000, eax, ebx, ecx, edx);
+
+    if (eax < 0x80000001)
+      return caps;
+
+    cpuid (0x80000001, eax, ebx, ecx, edx);
+
+    if (edx & ARCH_X86_CENTAUR_FEATURE_MMX)
+      caps |= BABL_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+    if (edx & ARCH_X86_CENTAUR_FEATURE_3DNOW)
+      caps |= BABL_CPU_ACCEL_X86_3DNOW;
+
+    if (edx & ARCH_X86_CENTAUR_FEATURE_MMXEXT)
+      caps |= BABL_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+  }
+#endif /* USE_MMX */
+
+  return caps;
+}
+
+static guint32
+arch_accel_cyrix (void)
+{
+  guint32 caps;
+
+  caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+  {
+    guint32 eax, ebx, ecx, edx;
+
+    cpuid (0, eax, ebx, ecx, edx);
+
+    if (eax != 2)
+      return caps;
+
+    cpuid (0x80000001, eax, ebx, ecx, edx);
+
+    if (edx & ARCH_X86_CYRIX_FEATURE_MMX)
+      caps |= BABL_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+    if (edx & ARCH_X86_CYRIX_FEATURE_MMXEXT)
+      caps |= BABL_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+  }
+#endif /* USE_MMX */
+
+  return caps;
+}
+
+#ifdef USE_SSE
+static jmp_buf sigill_return;
+
+static void
+sigill_handler (gint n)
+{
+  longjmp (sigill_return, 1);
+}
+
+static gboolean
+arch_accel_sse_os_support (void)
+{
+  if (setjmp (sigill_return))
+    {
+      return FALSE;
+    }
+  else
+    {
+      signal (SIGILL, sigill_handler);
+      __asm__ __volatile__ ("xorps %xmm0, %xmm0");
+      signal (SIGILL, SIG_DFL);
+    }
+
+  return TRUE;
+}
+#endif /* USE_SSE */
+
+static guint32
+arch_accel (void)
+{
+  guint32 caps;
+  X86Vendor vendor;
+
+  vendor = arch_get_vendor ();
+
+  switch (vendor)
+    {
+    case ARCH_X86_VENDOR_NONE:
+      caps = 0;
+      break;
+
+    case ARCH_X86_VENDOR_AMD:
+      caps = arch_accel_amd ();
+      break;
+
+    case ARCH_X86_VENDOR_CENTAUR:
+      caps = arch_accel_centaur ();
+      break;
+
+    case ARCH_X86_VENDOR_CYRIX:
+    case ARCH_X86_VENDOR_NSC:
+      caps = arch_accel_cyrix ();
+      break;
+
+    /* check for what Intel speced, even if UNKNOWN */
+    default:
+      caps = arch_accel_intel ();
+      break;
+    }
+
+#ifdef USE_SSE
+  if ((caps & BABL_CPU_ACCEL_X86_SSE) && !arch_accel_sse_os_support ())
+    caps &= ~(BABL_CPU_ACCEL_X86_SSE | BABL_CPU_ACCEL_X86_SSE2);
+#endif
+
+  return caps;
+}
+
+#endif /* ARCH_X86 && USE_MMX && __GNUC__ */
+
+
+#if defined(ARCH_PPC) && defined (USE_ALTIVEC)
+
+#if defined(HAVE_ALTIVEC_SYSCTL)
+
+#include <sys/sysctl.h>
+
+#define HAVE_ACCEL 1
+
+static guint32
+arch_accel (void)
+{
+  gint     sels[2] = { CTL_HW, HW_VECTORUNIT };
+  gboolean has_vu  = FALSE;
+  gsize    length  = sizeof(has_vu);
+  gint     err;
+
+  err = sysctl (sels, 2, &has_vu, &length, NULL, 0);
+
+  if (err == 0 && has_vu)
+    return BABL_CPU_ACCEL_PPC_ALTIVEC;
+
+  return 0;
+}
+
+#elif defined(__GNUC__)
+
+#define HAVE_ACCEL 1
+
+static          sigjmp_buf   jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static void
+sigill_handler (gint sig)
+{
+  if (!canjump)
+    {
+      signal (sig, SIG_DFL);
+      raise (sig);
+    }
+
+  canjump = 0;
+  siglongjmp (jmpbuf, 1);
+}
+
+static guint32
+arch_accel (void)
+{
+  signal (SIGILL, sigill_handler);
+
+  if (sigsetjmp (jmpbuf, 1))
+    {
+      signal (SIGILL, SIG_DFL);
+      return 0;
+    }
+
+  canjump = 1;
+
+  asm volatile ("mtspr 256, %0\n\t"
+                "vand %%v0, %%v0, %%v0"
+                :
+                : "r" (-1));
+
+  signal (SIGILL, SIG_DFL);
+
+  return BABL_CPU_ACCEL_PPC_ALTIVEC;
+}
+#endif /* __GNUC__ */
+
+#endif /* ARCH_PPC && USE_ALTIVEC */
+
+
+static BablCpuAccelFlags
+cpu_accel (void)
+{
+#ifdef HAVE_ACCEL
+  static guint32 accel = ~0U;
+
+  if (accel != ~0U)
+    return accel;
+
+  accel = arch_accel ();
+
+  return (BablCpuAccelFlags) accel;
+
+#else /* !HAVE_ACCEL */
+  return BABL_CPU_ACCEL_NONE;
+#endif
+}

Added: trunk/babl/babl-cpuaccel.h
==============================================================================
--- (empty file)
+++ trunk/babl/babl-cpuaccel.h	Fri May 23 13:36:35 2008
@@ -0,0 +1,43 @@
+/* babl - dynamically extendable universal pixel conversion library.
+ * Copyright (C) 2005-2008, Ãyvind KolÃs and others.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _BABL_CPU_ACCEL_H
+#define _BABL_CPU_ACCEL_H
+
+typedef enum
+{
+  BABL_CPU_ACCEL_NONE        = 0x0,
+
+  /* x86 accelerations */
+  BABL_CPU_ACCEL_X86_MMX     = 0x80000000,
+  BABL_CPU_ACCEL_X86_3DNOW   = 0x40000000,
+  BABL_CPU_ACCEL_X86_MMXEXT  = 0x20000000,
+  BABL_CPU_ACCEL_X86_SSE     = 0x10000000,
+  BABL_CPU_ACCEL_X86_SSE2    = 0x08000000,
+  BABL_CPU_ACCEL_X86_SSE3    = 0x02000000,
+
+  /* powerpc accelerations */
+  BABL_CPU_ACCEL_PPC_ALTIVEC = 0x04000000
+} BablCpuAccelFlags;
+
+
+BablCpuAccelFlags  babl_cpu_accel_get_support (void);
+void               babl_cpu_accel_set_use     (unsigned int use);
+
+
+#endif  /* _BABL_CPU_ACCEL_H */

Modified: trunk/babl/babl-internal.h
==============================================================================
--- trunk/babl/babl-internal.h	(original)
+++ trunk/babl/babl-internal.h	Fri May 23 13:36:35 2008
@@ -45,6 +45,7 @@
 #include "babl-ids.h"
 #include "babl-util.h"
 #include "babl-memory.h"
+#include "babl-cpuaccel.h"
 
 /* redefining some functions for the win32 platform */
 #ifdef _WIN32

Modified: trunk/babl/babl-memory.c
==============================================================================
--- trunk/babl/babl-memory.c	(original)
+++ trunk/babl/babl-memory.c	Fri May 23 13:36:35 2008
@@ -47,9 +47,9 @@
   size_t size;
 } BablAllocInfo;
 
-#define OFFSET    (sizeof (BablAllocInfo))
-
-#define BAI(ptr)       ((BablAllocInfo *) (((char *) ptr) - OFFSET))
+#define BABL_ALIGN     16
+#define BABL_ALLOC     (sizeof (BablAllocInfo) + sizeof (void *))
+#define BAI(ptr)       ((BablAllocInfo *) *((void **) ptr - 1))
 #define IS_BAI(ptr)    (BAI (ptr)->signature == signature)
 
 /* runtime statistics: */
@@ -96,18 +96,23 @@
 babl_malloc (size_t size)
 {
   char *ret;
+  int  offset;
 
   babl_assert (size);
 
   functions_sanity ();
-  ret = malloc_f (size + OFFSET);
+  ret = malloc_f (BABL_ALLOC + BABL_ALIGN + size);
   if (!ret)
     babl_fatal ("args=(%i): failed", size);
 
-  BAI (ret + OFFSET)->signature = signature;
-  BAI (ret + OFFSET)->size      = size;
+  offset = BABL_ALIGN - ((unsigned int) ret + BABL_ALLOC) % BABL_ALIGN;
+  ret = ret + BABL_ALLOC + offset;
+
+  *((void **) ret - 1) = ret - BABL_ALLOC - offset;
+  BAI (ret)->signature = signature;
+  BAI (ret)->size      = size;
   mallocs++;
-  return (void *) (ret + OFFSET);
+  return (void *) (ret);
 }
 
 /* Create a duplicate allocation of the same size, note

Modified: trunk/babl/babl.c
==============================================================================
--- trunk/babl/babl.c	(original)
+++ trunk/babl/babl.c	Fri May 23 13:36:35 2008
@@ -23,6 +23,8 @@
 void
 babl_init (void)
 {
+  babl_cpu_accel_set_use (1);
+
   if (ref_count++ == 0)
     {
       babl_internal_init ();

Modified: trunk/configure.ac
==============================================================================
--- trunk/configure.ac	(original)
+++ trunk/configure.ac	Fri May 23 13:36:35 2008
@@ -244,6 +244,125 @@
 AM_CONDITIONAL(OS_WIN32, test "$os_win32" = "yes")
 AM_CONDITIONAL(OS_UNIX, test "$os_win32" != "yes")
 
+
+dnl ===========================================================================
+
+
+########################
+# Check for MMX assembly
+########################
+
+AC_ARG_ENABLE(mmx,
+  [  --enable-mmx            enable MMX support (default=auto)],,
+  enable_mmx=$have_x86)
+
+AC_ARG_ENABLE(sse,
+  [  --enable-sse            enable SSE support (default=auto)],,
+  enable_sse=$enable_mmx)
+
+if test "x$enable_mmx" = xyes; then
+  BABL_DETECT_CFLAGS(MMX_EXTRA_CFLAGS, '-mmmx')
+  SSE_EXTRA_CFLAGS=
+
+  AC_MSG_CHECKING(whether we can compile MMX code)
+
+  mmx_save_CFLAGS="$CFLAGS"
+  CFLAGS="$mmx_save_CFLAGS $MMX_EXTRA_CFLAGS"
+
+  AC_COMPILE_IFELSE([asm ("movq 0, %mm0");],
+
+    AC_DEFINE(USE_MMX, 1, [Define to 1 if MMX assembly is available.])
+    AC_MSG_RESULT(yes)
+
+    if test "x$enable_sse" = xyes; then
+      BABL_DETECT_CFLAGS(sse_flag, '-msse')
+      SSE_EXTRA_CFLAGS="$MMX_EXTRA_CFLAGS $sse_flag"
+
+      AC_MSG_CHECKING(whether we can compile SSE code)
+
+      CFLAGS="$CFLAGS $sse_flag"
+
+      AC_COMPILE_IFELSE([asm ("movntps %xmm0, 0");],
+        AC_DEFINE(USE_SSE, 1, [Define to 1 if SSE assembly is available.])
+        AC_MSG_RESULT(yes)
+      ,
+        enable_sse=no
+        AC_MSG_RESULT(no)
+        AC_MSG_WARN([The assembler does not support the SSE command set.])
+      )
+
+    fi
+  ,
+    enable_mmx=no
+    AC_MSG_RESULT(no)
+    AC_MSG_WARN([The assembler does not support the MMX command set.])
+  )
+
+  CFLAGS="$mmx_save_CFLAGS"
+
+  AC_SUBST(MMX_EXTRA_CFLAGS)
+  AC_SUBST(SSE_EXTRA_CFLAGS)
+fi
+
+
+############################
+# Check for AltiVec assembly
+############################
+
+AC_ARG_ENABLE(altivec,
+  [  --enable-altivec        enable AltiVec support (default=auto)],,
+  enable_altivec=$have_ppc)
+
+if test "x$enable_altivec" = xyes; then
+
+  BABL_DETECT_CFLAGS(altivec_flag, '-faltivec' '-maltivec -mabi=altivec')
+
+  ALTIVEC_EXTRA_CFLAGS=
+  case "$altivec_flag" in
+    -maltivec*)
+      altivec_save_CPPFLAGS="$CPPFLAGS"
+      CPPFLAGS="$altivec_save_CPPFLAGS $altivec_flag"
+      AC_CHECK_HEADERS(altivec.h, [ALTIVEC_EXTRA_CFLAGS="$altivec_flag"])
+      CPPFLAGS="$altivec_save_CPPFLAGS"
+      ;;
+    *)
+      ALTIVEC_EXTRA_CFLAGS="$altivec_flag"
+      ;;
+  esac
+  AC_SUBST(ALTIVEC_EXTRA_CFLAGS)
+
+  AC_MSG_CHECKING(whether we can compile AltiVec code)
+
+  can_use_altivec=no
+  if test -z "$ALTIVEC_EXTRA_CFLAGS"; then
+    AC_MSG_RESULT(no)
+    AC_MSG_WARN([The compiler does not support the AltiVec command set.])
+  else
+    case "$target_or_host" in
+      *-*-darwin*)
+        can_use_altivec=yes
+        AC_DEFINE(HAVE_ALTIVEC_SYSCTL, 1,
+         [Define to 1 if the altivec runtime test should use a sysctl.])
+        ;;
+      *)
+        AC_COMPILE_IFELSE([asm ("vand %v0, %v0, %v0");],
+          can_use_altivec=yes, can_use_altivec=no)
+        ;;
+    esac
+    AC_MSG_RESULT($can_use_altivec)
+
+    if test "x$can_use_altivec" = "xyes"; then
+      AC_DEFINE(USE_ALTIVEC, 1, [Define to 1 if AltiVec support is available.])
+    else
+      enable_altivec=no
+      AC_MSG_WARN([The assembler does not support the AltiVec command set.])
+    fi
+  fi
+
+  enable_altivec="$can_use_altivec"
+fi
+
+
 dnl ===========================================================================
 
 AC_SEARCH_LIBS([dlopen], [dl])

Modified: trunk/extensions/Makefile.am
==============================================================================
--- trunk/extensions/Makefile.am	(original)
+++ trunk/extensions/Makefile.am	Fri May 23 13:36:35 2008
@@ -38,6 +38,9 @@
 
 CIE-Lab$(SHREXT): CIE-Lab.c
 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LDADD)
+
+sse-fixups$(SHREXT): sse-fixups.c
+	$(CC) $(CFLAGS) $(MMX_EXTRA_CFLAGS) $(SSE_EXTRA_CFLAGS) $(LDFLAGS) -o $@ $< $(LDADD)
 #lcms$(SHREXT): lcms.c
 #	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LDADD) `pkg-config lcms --cflags --libs`
 #############################################################################

Added: trunk/extensions/sse-fixups.c
==============================================================================
--- (empty file)
+++ trunk/extensions/sse-fixups.c	Fri May 23 13:36:35 2008
@@ -0,0 +1,159 @@
+/* babl - dynamically extendable universal pixel conversion library.
+ * Copyright (C) 2005-2008, Ãyvind KolÃs and others.
+ *
+ * SSE optimized conversion routines.
+ * Copyright (C) 2008, Jan Heller.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#if defined(__GNUC__) && (__GNUC__ >= 4) && defined(USE_SSE) && defined(USE_MMX)
+
+#include "babl.h"
+#include "babl-cpuaccel.h"
+
+#define INLINE inline
+
+typedef float g4float __attribute__ ((vector_size (4*sizeof(float))));
+typedef int   g2int   __attribute__ ((vector_size (2*sizeof(int))));
+
+#define g4float(a,b,c,d)  ((g4float){a,b,c,d})
+#define g4float_all(val)  g4float(val,val,val,val)
+#define g4float_zero      g4float_all(0.0)
+#define g4float_ff        g4float_all(255.0)
+
+#define g4float_max(a,b)    __builtin_ia32_maxps(a, b)
+#define g4float_min(a,b)    __builtin_ia32_minps(a, b)
+#define g4float_cvt2pi(a)   __builtin_ia32_cvtps2pi(a)
+#define g4float_movhl(a,b)  __builtin_ia32_movhlps(a, b)
+#define g4float_emms        __builtin_ia32_emms
+
+
+static INLINE long
+conv_rgbaF_linear_rgb8_linear (unsigned char *src, 
+                               unsigned char *dst, 
+                               long           samples)
+{
+  long n = samples;
+  g4float *g4src = (g4float *) src;
+  g4float v;
+
+  union {
+   g2int si; 
+   unsigned char c[8];
+  } u;
+
+  while (n--)
+    {
+       v = *g4src++ * g4float_ff;
+       v = g4float_min(v, g4float_ff);
+       v = g4float_max(v, g4float_zero);
+       u.si = g4float_cvt2pi (v);
+       *dst++  = u.c[0];
+       *dst++  = u.c[4];
+       v = g4float_movhl (v, v);
+       u.si = g4float_cvt2pi (v);  
+       *dst++  = u.c[0];
+    }
+
+  g4float_emms ();
+
+  return samples;
+}
+
+
+static INLINE long
+conv_rgbaF_linear_rgba8_linear (unsigned char *src, 
+                                unsigned char *dst, 
+                                long           samples)
+{
+  long n = samples;
+  g4float *g4src = (g4float *) src;
+  g4float v;
+
+  union {
+   g2int si; 
+   unsigned char c[8];
+  } u;
+
+  while (n--)
+    {
+       v = *g4src++ * g4float_ff;
+       v = g4float_min(v, g4float_ff);
+       v = g4float_max(v, g4float_zero);
+       u.si = g4float_cvt2pi (v);
+       *dst++  = u.c[0];
+       *dst++  = u.c[4];
+       v = g4float_movhl (v, v);
+       u.si = g4float_cvt2pi (v);  
+       *dst++  = u.c[0];
+       *dst++  = u.c[4];
+    }
+
+  g4float_emms ();
+
+  return samples;
+}
+
+#endif
+
+#define o(src, dst) \
+  babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL)
+
+int init (void);
+
+int
+init (void)
+{
+  Babl *rgbaF_linear = babl_format_new (
+    babl_model ("RGBA"),
+    babl_type ("float"),
+    babl_component ("R"),
+    babl_component ("G"),
+    babl_component ("B"),
+    babl_component ("A"),
+    NULL);
+  Babl *rgba8_linear = babl_format_new (
+    babl_model ("RGBA"),
+    babl_type ("u8"),
+    babl_component ("R"),
+    babl_component ("G"),
+    babl_component ("B"),
+    babl_component ("A"),
+    NULL);
+  Babl *rgb8_linear = babl_format_new (
+    babl_model ("RGB"),
+    babl_type ("u8"),
+    babl_component ("R"),
+    babl_component ("G"),
+    babl_component ("B"),
+    NULL);
+
+#if defined(__GNUC__) && (__GNUC__ >= 4) && defined(USE_SSE) && defined(USE_MMX)
+
+  if ((babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_MMX) &&
+      (babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE))
+    {
+      o (rgbaF_linear, rgb8_linear);
+      o (rgbaF_linear, rgba8_linear);
+    }
+
+#endif
+
+  return 0;
+}
+



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]