babl r316 - in trunk: . babl extensions
- From: jheller svn gnome org
- To: svn-commits-list gnome org
- Subject: babl r316 - in trunk: . babl extensions
- Date: Fri, 23 May 2008 13:36:36 +0000 (UTC)
Author: jheller
Date: Fri May 23 13:36:35 2008
New Revision: 316
URL: http://svn.gnome.org/viewvc/babl?rev=316&view=rev
Log:
2008-05-23 Jan Heller <jheller svn gnome org>
Added support for CPU acceleration, RGBA float -> RGB u8 and
RGBA float -> RGBA u8 SSE accelerated conversions.
* babl/Makefile.am: Added babl-cpuaccel.[ch].
* babl/babl-cpuaccel.h:
* babl/babl-cpuaccel.c (babl_cpu_accel_get_support),
(babl_cpu_accel_set_use), (arch_get_vendor), (arch_accel_intel),
(arch_accel_amd), (arch_accel_centaur), (arch_accel_cyrix),
(sigill_handler), (arch_accel_sse_os_support), (arch_accel),
(cpu_accel): Runtime CPU detection code. Ported from GIMP.
* babl/babl-internal.h: Include babl-cpuaccel.h.
* babl/babl-memory.c (babl_malloc): Make babl_malloc align memory to
BABL_ALIGN==16 boundaries.
* babl/babl.c (babl_init): Enabled CPU acceleration.
* configure.ac: Added compile time MMX/SSE/AltiVec detection. Ported
from GIMP.
* extensions/Makefile.am: Added SSE_EXTRA_CFLAGS for sse-fixups.c
compilation.
* extensions/sse-fixups.c (conv_rgbaF_linear_rgb8_linear),
(conv_rgbaF_linear_rgba8_linear), (init): Added RGBA float -> RGB u8
and RGBA float -> RGBA u8 SSE accelerated conversions.
Added:
trunk/babl/babl-cpuaccel.c
trunk/babl/babl-cpuaccel.h
trunk/extensions/sse-fixups.c
Modified:
trunk/ChangeLog
trunk/babl/Makefile.am
trunk/babl/babl-internal.h
trunk/babl/babl-memory.c
trunk/babl/babl.c
trunk/configure.ac
trunk/extensions/Makefile.am
Modified: trunk/babl/Makefile.am
==============================================================================
--- trunk/babl/Makefile.am (original)
+++ trunk/babl/Makefile.am Fri May 23 13:36:35 2008
@@ -29,7 +29,8 @@
babl-type.c \
babl-util.c \
babl-list.c \
- babl-hash-table.c
+ babl-hash-table.c \
+ babl-cpuaccel.c
h_sources = \
babl-db.h \
@@ -39,7 +40,8 @@
babl-util.h \
babl.h \
babl-list.h \
- babl-hash-table.h
+ babl-hash-table.h \
+ babl-cpuaccel.h
library_includedir=$(includedir)/babl-$(BABL_API_VERSION)/babl
library_include_HEADERS = \
Added: trunk/babl/babl-cpuaccel.c
==============================================================================
--- (empty file)
+++ trunk/babl/babl-cpuaccel.c Fri May 23 13:36:35 2008
@@ -0,0 +1,497 @@
+/* babl - dynamically extendable universal pixel conversion library.
+ * Copyright (C) 2005-2008, Ãyvind KolÃs and others.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * x86 bits Copyright (C) Manish Singh <yosh gimp org>
+ */
+
+/*
+ * PPC CPU acceleration detection was taken from DirectFB but seems to be
+ * originating from mpeg2dec with the following copyright:
+ *
+ * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma ess engr uvic ca>
+ */
+
+#include "config.h"
+
+#include <string.h>
+#include <signal.h>
+#include <setjmp.h>
+
+#include "babl-cpuaccel.h"
+
+typedef unsigned int gboolean;
+typedef unsigned int guint32;
+typedef int gint;
+typedef char gchar;
+#define TRUE 1
+#define FALSE 0
+#define G_GNUC_CONST
+
+static BablCpuAccelFlags cpu_accel (void) G_GNUC_CONST;
+
+static gboolean use_cpu_accel = TRUE;
+
+
+/**
+ * babl_cpu_accel_get_support:
+ *
+ * Query for CPU acceleration support.
+ *
+ * Return value: #BablCpuAccelFlags as supported by the CPU.
+ */
+BablCpuAccelFlags
+babl_cpu_accel_get_support (void)
+{
+ return use_cpu_accel ? cpu_accel () : BABL_CPU_ACCEL_NONE;
+}
+
+/**
+ * babl_cpu_accel_set_use:
+ * @use: whether to use CPU acceleration features or not
+ *
+ * This function is for internal use only.
+ */
+void
+babl_cpu_accel_set_use (gboolean use)
+{
+ use_cpu_accel = use ? TRUE : FALSE;
+}
+
+
+#if defined(ARCH_X86) && defined(USE_MMX) && defined(__GNUC__)
+
+#define HAVE_ACCEL 1
+
+
+typedef enum
+{
+ ARCH_X86_VENDOR_NONE,
+ ARCH_X86_VENDOR_INTEL,
+ ARCH_X86_VENDOR_AMD,
+ ARCH_X86_VENDOR_CENTAUR,
+ ARCH_X86_VENDOR_CYRIX,
+ ARCH_X86_VENDOR_NSC,
+ ARCH_X86_VENDOR_TRANSMETA,
+ ARCH_X86_VENDOR_NEXGEN,
+ ARCH_X86_VENDOR_RISE,
+ ARCH_X86_VENDOR_UMC,
+ ARCH_X86_VENDOR_SIS,
+ ARCH_X86_VENDOR_UNKNOWN = 0xff
+} X86Vendor;
+
+enum
+{
+ ARCH_X86_INTEL_FEATURE_MMX = 1 << 23,
+ ARCH_X86_INTEL_FEATURE_XMM = 1 << 25,
+ ARCH_X86_INTEL_FEATURE_XMM2 = 1 << 26,
+
+ ARCH_X86_AMD_FEATURE_MMXEXT = 1 << 22,
+ ARCH_X86_AMD_FEATURE_3DNOW = 1 << 31,
+
+ ARCH_X86_CENTAUR_FEATURE_MMX = 1 << 23,
+ ARCH_X86_CENTAUR_FEATURE_MMXEXT = 1 << 24,
+ ARCH_X86_CENTAUR_FEATURE_3DNOW = 1 << 31,
+
+ ARCH_X86_CYRIX_FEATURE_MMX = 1 << 23,
+ ARCH_X86_CYRIX_FEATURE_MMXEXT = 1 << 24
+};
+
+enum
+{
+ ARCH_X86_INTEL_FEATURE_PNI = 1 << 0
+};
+
+#if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__))
+#define cpuid(op,eax,ebx,ecx,edx) \
+ __asm__ ("movl %%ebx, %%esi\n\t" \
+ "cpuid\n\t" \
+ "xchgl %%ebx,%%esi" \
+ : "=a" (eax), \
+ "=S" (ebx), \
+ "=c" (ecx), \
+ "=d" (edx) \
+ : "0" (op))
+#else
+#define cpuid(op,eax,ebx,ecx,edx) \
+ __asm__ ("cpuid" \
+ : "=a" (eax), \
+ "=b" (ebx), \
+ "=c" (ecx), \
+ "=d" (edx) \
+ : "0" (op))
+#endif
+
+
+static X86Vendor
+arch_get_vendor (void)
+{
+ guint32 eax, ebx, ecx, edx;
+ gchar id[16];
+
+#ifndef ARCH_X86_64
+ /* Only need to check this on ia32 */
+ __asm__ ("pushfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "movl %0,%1\n\t"
+ "xorl $0x200000,%0\n\t"
+ "pushl %0\n\t"
+ "popfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "popfl"
+ : "=a" (eax),
+ "=c" (ecx)
+ :
+ : "cc");
+
+ if (eax == ecx)
+ return ARCH_X86_VENDOR_NONE;
+#endif
+
+ cpuid (0, eax, ebx, ecx, edx);
+
+ if (eax == 0)
+ return ARCH_X86_VENDOR_NONE;
+
+ *(int *)&id[0] = ebx;
+ *(int *)&id[4] = edx;
+ *(int *)&id[8] = ecx;
+
+ id[12] = '\0';
+
+#ifdef ARCH_X86_64
+ if (strcmp (id, "AuthenticAMD") == 0)
+ return ARCH_X86_VENDOR_AMD;
+ else if (strcmp (id, "GenuineIntel") == 0)
+ return ARCH_X86_VENDOR_INTEL;
+#else
+ if (strcmp (id, "GenuineIntel") == 0)
+ return ARCH_X86_VENDOR_INTEL;
+ else if (strcmp (id, "AuthenticAMD") == 0)
+ return ARCH_X86_VENDOR_AMD;
+ else if (strcmp (id, "CentaurHauls") == 0)
+ return ARCH_X86_VENDOR_CENTAUR;
+ else if (strcmp (id, "CyrixInstead") == 0)
+ return ARCH_X86_VENDOR_CYRIX;
+ else if (strcmp (id, "Geode by NSC") == 0)
+ return ARCH_X86_VENDOR_NSC;
+ else if (strcmp (id, "GenuineTMx86") == 0 ||
+ strcmp (id, "TransmetaCPU") == 0)
+ return ARCH_X86_VENDOR_TRANSMETA;
+ else if (strcmp (id, "NexGenDriven") == 0)
+ return ARCH_X86_VENDOR_NEXGEN;
+ else if (strcmp (id, "RiseRiseRise") == 0)
+ return ARCH_X86_VENDOR_RISE;
+ else if (strcmp (id, "UMC UMC UMC ") == 0)
+ return ARCH_X86_VENDOR_UMC;
+ else if (strcmp (id, "SiS SiS SiS ") == 0)
+ return ARCH_X86_VENDOR_SIS;
+#endif
+
+ return ARCH_X86_VENDOR_UNKNOWN;
+}
+
+static guint32
+arch_accel_intel (void)
+{
+ guint32 caps = 0;
+
+#ifdef USE_MMX
+ {
+ guint32 eax, ebx, ecx, edx;
+
+ cpuid (1, eax, ebx, ecx, edx);
+
+ if ((edx & ARCH_X86_INTEL_FEATURE_MMX) == 0)
+ return 0;
+
+ caps = BABL_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+ if (edx & ARCH_X86_INTEL_FEATURE_XMM)
+ caps |= BABL_CPU_ACCEL_X86_SSE | BABL_CPU_ACCEL_X86_MMXEXT;
+
+ if (edx & ARCH_X86_INTEL_FEATURE_XMM2)
+ caps |= BABL_CPU_ACCEL_X86_SSE2;
+
+ if (ecx & ARCH_X86_INTEL_FEATURE_PNI)
+ caps |= BABL_CPU_ACCEL_X86_SSE3;
+#endif /* USE_SSE */
+ }
+#endif /* USE_MMX */
+
+ return caps;
+}
+
+static guint32
+arch_accel_amd (void)
+{
+ guint32 caps;
+
+ caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+ {
+ guint32 eax, ebx, ecx, edx;
+
+ cpuid (0x80000000, eax, ebx, ecx, edx);
+
+ if (eax < 0x80000001)
+ return caps;
+
+#ifdef USE_SSE
+ cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (edx & ARCH_X86_AMD_FEATURE_3DNOW)
+ caps |= BABL_CPU_ACCEL_X86_3DNOW;
+
+ if (edx & ARCH_X86_AMD_FEATURE_MMXEXT)
+ caps |= BABL_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+ }
+#endif /* USE_MMX */
+
+ return caps;
+}
+
+static guint32
+arch_accel_centaur (void)
+{
+ guint32 caps;
+
+ caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+ {
+ guint32 eax, ebx, ecx, edx;
+
+ cpuid (0x80000000, eax, ebx, ecx, edx);
+
+ if (eax < 0x80000001)
+ return caps;
+
+ cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (edx & ARCH_X86_CENTAUR_FEATURE_MMX)
+ caps |= BABL_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+ if (edx & ARCH_X86_CENTAUR_FEATURE_3DNOW)
+ caps |= BABL_CPU_ACCEL_X86_3DNOW;
+
+ if (edx & ARCH_X86_CENTAUR_FEATURE_MMXEXT)
+ caps |= BABL_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+ }
+#endif /* USE_MMX */
+
+ return caps;
+}
+
+static guint32
+arch_accel_cyrix (void)
+{
+ guint32 caps;
+
+ caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+ {
+ guint32 eax, ebx, ecx, edx;
+
+ cpuid (0, eax, ebx, ecx, edx);
+
+ if (eax != 2)
+ return caps;
+
+ cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (edx & ARCH_X86_CYRIX_FEATURE_MMX)
+ caps |= BABL_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+ if (edx & ARCH_X86_CYRIX_FEATURE_MMXEXT)
+ caps |= BABL_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+ }
+#endif /* USE_MMX */
+
+ return caps;
+}
+
+#ifdef USE_SSE
+static jmp_buf sigill_return;
+
+static void
+sigill_handler (gint n)
+{
+ longjmp (sigill_return, 1);
+}
+
+static gboolean
+arch_accel_sse_os_support (void)
+{
+ if (setjmp (sigill_return))
+ {
+ return FALSE;
+ }
+ else
+ {
+ signal (SIGILL, sigill_handler);
+ __asm__ __volatile__ ("xorps %xmm0, %xmm0");
+ signal (SIGILL, SIG_DFL);
+ }
+
+ return TRUE;
+}
+#endif /* USE_SSE */
+
+static guint32
+arch_accel (void)
+{
+ guint32 caps;
+ X86Vendor vendor;
+
+ vendor = arch_get_vendor ();
+
+ switch (vendor)
+ {
+ case ARCH_X86_VENDOR_NONE:
+ caps = 0;
+ break;
+
+ case ARCH_X86_VENDOR_AMD:
+ caps = arch_accel_amd ();
+ break;
+
+ case ARCH_X86_VENDOR_CENTAUR:
+ caps = arch_accel_centaur ();
+ break;
+
+ case ARCH_X86_VENDOR_CYRIX:
+ case ARCH_X86_VENDOR_NSC:
+ caps = arch_accel_cyrix ();
+ break;
+
+ /* check for what Intel speced, even if UNKNOWN */
+ default:
+ caps = arch_accel_intel ();
+ break;
+ }
+
+#ifdef USE_SSE
+ if ((caps & BABL_CPU_ACCEL_X86_SSE) && !arch_accel_sse_os_support ())
+ caps &= ~(BABL_CPU_ACCEL_X86_SSE | BABL_CPU_ACCEL_X86_SSE2);
+#endif
+
+ return caps;
+}
+
+#endif /* ARCH_X86 && USE_MMX && __GNUC__ */
+
+
+#if defined(ARCH_PPC) && defined (USE_ALTIVEC)
+
+#if defined(HAVE_ALTIVEC_SYSCTL)
+
+#include <sys/sysctl.h>
+
+#define HAVE_ACCEL 1
+
+static guint32
+arch_accel (void)
+{
+ gint sels[2] = { CTL_HW, HW_VECTORUNIT };
+ gboolean has_vu = FALSE;
+ gsize length = sizeof(has_vu);
+ gint err;
+
+ err = sysctl (sels, 2, &has_vu, &length, NULL, 0);
+
+ if (err == 0 && has_vu)
+ return BABL_CPU_ACCEL_PPC_ALTIVEC;
+
+ return 0;
+}
+
+#elif defined(__GNUC__)
+
+#define HAVE_ACCEL 1
+
+static sigjmp_buf jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static void
+sigill_handler (gint sig)
+{
+ if (!canjump)
+ {
+ signal (sig, SIG_DFL);
+ raise (sig);
+ }
+
+ canjump = 0;
+ siglongjmp (jmpbuf, 1);
+}
+
+static guint32
+arch_accel (void)
+{
+ signal (SIGILL, sigill_handler);
+
+ if (sigsetjmp (jmpbuf, 1))
+ {
+ signal (SIGILL, SIG_DFL);
+ return 0;
+ }
+
+ canjump = 1;
+
+ asm volatile ("mtspr 256, %0\n\t"
+ "vand %%v0, %%v0, %%v0"
+ :
+ : "r" (-1));
+
+ signal (SIGILL, SIG_DFL);
+
+ return BABL_CPU_ACCEL_PPC_ALTIVEC;
+}
+#endif /* __GNUC__ */
+
+#endif /* ARCH_PPC && USE_ALTIVEC */
+
+
+static BablCpuAccelFlags
+cpu_accel (void)
+{
+#ifdef HAVE_ACCEL
+ static guint32 accel = ~0U;
+
+ if (accel != ~0U)
+ return accel;
+
+ accel = arch_accel ();
+
+ return (BablCpuAccelFlags) accel;
+
+#else /* !HAVE_ACCEL */
+ return BABL_CPU_ACCEL_NONE;
+#endif
+}
Added: trunk/babl/babl-cpuaccel.h
==============================================================================
--- (empty file)
+++ trunk/babl/babl-cpuaccel.h Fri May 23 13:36:35 2008
@@ -0,0 +1,43 @@
+/* babl - dynamically extendable universal pixel conversion library.
+ * Copyright (C) 2005-2008, Ãyvind KolÃs and others.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _BABL_CPU_ACCEL_H
+#define _BABL_CPU_ACCEL_H
+
+typedef enum
+{
+ BABL_CPU_ACCEL_NONE = 0x0,
+
+ /* x86 accelerations */
+ BABL_CPU_ACCEL_X86_MMX = 0x80000000,
+ BABL_CPU_ACCEL_X86_3DNOW = 0x40000000,
+ BABL_CPU_ACCEL_X86_MMXEXT = 0x20000000,
+ BABL_CPU_ACCEL_X86_SSE = 0x10000000,
+ BABL_CPU_ACCEL_X86_SSE2 = 0x08000000,
+ BABL_CPU_ACCEL_X86_SSE3 = 0x02000000,
+
+ /* powerpc accelerations */
+ BABL_CPU_ACCEL_PPC_ALTIVEC = 0x04000000
+} BablCpuAccelFlags;
+
+
+BablCpuAccelFlags babl_cpu_accel_get_support (void);
+void babl_cpu_accel_set_use (unsigned int use);
+
+
+#endif /* _BABL_CPU_ACCEL_H */
Modified: trunk/babl/babl-internal.h
==============================================================================
--- trunk/babl/babl-internal.h (original)
+++ trunk/babl/babl-internal.h Fri May 23 13:36:35 2008
@@ -45,6 +45,7 @@
#include "babl-ids.h"
#include "babl-util.h"
#include "babl-memory.h"
+#include "babl-cpuaccel.h"
/* redefining some functions for the win32 platform */
#ifdef _WIN32
Modified: trunk/babl/babl-memory.c
==============================================================================
--- trunk/babl/babl-memory.c (original)
+++ trunk/babl/babl-memory.c Fri May 23 13:36:35 2008
@@ -47,9 +47,9 @@
size_t size;
} BablAllocInfo;
-#define OFFSET (sizeof (BablAllocInfo))
-
-#define BAI(ptr) ((BablAllocInfo *) (((char *) ptr) - OFFSET))
+#define BABL_ALIGN 16
+#define BABL_ALLOC (sizeof (BablAllocInfo) + sizeof (void *))
+#define BAI(ptr) ((BablAllocInfo *) *((void **) ptr - 1))
#define IS_BAI(ptr) (BAI (ptr)->signature == signature)
/* runtime statistics: */
@@ -96,18 +96,23 @@
babl_malloc (size_t size)
{
char *ret;
+ int offset;
babl_assert (size);
functions_sanity ();
- ret = malloc_f (size + OFFSET);
+ ret = malloc_f (BABL_ALLOC + BABL_ALIGN + size);
if (!ret)
babl_fatal ("args=(%i): failed", size);
- BAI (ret + OFFSET)->signature = signature;
- BAI (ret + OFFSET)->size = size;
+ offset = BABL_ALIGN - ((unsigned int) ret + BABL_ALLOC) % BABL_ALIGN;
+ ret = ret + BABL_ALLOC + offset;
+
+ *((void **) ret - 1) = ret - BABL_ALLOC - offset;
+ BAI (ret)->signature = signature;
+ BAI (ret)->size = size;
mallocs++;
- return (void *) (ret + OFFSET);
+ return (void *) (ret);
}
/* Create a duplicate allocation of the same size, note
Modified: trunk/babl/babl.c
==============================================================================
--- trunk/babl/babl.c (original)
+++ trunk/babl/babl.c Fri May 23 13:36:35 2008
@@ -23,6 +23,8 @@
void
babl_init (void)
{
+ babl_cpu_accel_set_use (1);
+
if (ref_count++ == 0)
{
babl_internal_init ();
Modified: trunk/configure.ac
==============================================================================
--- trunk/configure.ac (original)
+++ trunk/configure.ac Fri May 23 13:36:35 2008
@@ -244,6 +244,125 @@
AM_CONDITIONAL(OS_WIN32, test "$os_win32" = "yes")
AM_CONDITIONAL(OS_UNIX, test "$os_win32" != "yes")
+
+dnl ===========================================================================
+
+
+########################
+# Check for MMX assembly
+########################
+
+AC_ARG_ENABLE(mmx,
+ [ --enable-mmx enable MMX support (default=auto)],,
+ enable_mmx=$have_x86)
+
+AC_ARG_ENABLE(sse,
+ [ --enable-sse enable SSE support (default=auto)],,
+ enable_sse=$enable_mmx)
+
+if test "x$enable_mmx" = xyes; then
+ BABL_DETECT_CFLAGS(MMX_EXTRA_CFLAGS, '-mmmx')
+ SSE_EXTRA_CFLAGS=
+
+ AC_MSG_CHECKING(whether we can compile MMX code)
+
+ mmx_save_CFLAGS="$CFLAGS"
+ CFLAGS="$mmx_save_CFLAGS $MMX_EXTRA_CFLAGS"
+
+ AC_COMPILE_IFELSE([asm ("movq 0, %mm0");],
+
+ AC_DEFINE(USE_MMX, 1, [Define to 1 if MMX assembly is available.])
+ AC_MSG_RESULT(yes)
+
+ if test "x$enable_sse" = xyes; then
+ BABL_DETECT_CFLAGS(sse_flag, '-msse')
+ SSE_EXTRA_CFLAGS="$MMX_EXTRA_CFLAGS $sse_flag"
+
+ AC_MSG_CHECKING(whether we can compile SSE code)
+
+ CFLAGS="$CFLAGS $sse_flag"
+
+ AC_COMPILE_IFELSE([asm ("movntps %xmm0, 0");],
+ AC_DEFINE(USE_SSE, 1, [Define to 1 if SSE assembly is available.])
+ AC_MSG_RESULT(yes)
+ ,
+ enable_sse=no
+ AC_MSG_RESULT(no)
+ AC_MSG_WARN([The assembler does not support the SSE command set.])
+ )
+
+ fi
+ ,
+ enable_mmx=no
+ AC_MSG_RESULT(no)
+ AC_MSG_WARN([The assembler does not support the MMX command set.])
+ )
+
+ CFLAGS="$mmx_save_CFLAGS"
+
+ AC_SUBST(MMX_EXTRA_CFLAGS)
+ AC_SUBST(SSE_EXTRA_CFLAGS)
+fi
+
+
+############################
+# Check for AltiVec assembly
+############################
+
+AC_ARG_ENABLE(altivec,
+ [ --enable-altivec enable AltiVec support (default=auto)],,
+ enable_altivec=$have_ppc)
+
+if test "x$enable_altivec" = xyes; then
+
+ BABL_DETECT_CFLAGS(altivec_flag, '-faltivec' '-maltivec -mabi=altivec')
+
+ ALTIVEC_EXTRA_CFLAGS=
+ case "$altivec_flag" in
+ -maltivec*)
+ altivec_save_CPPFLAGS="$CPPFLAGS"
+ CPPFLAGS="$altivec_save_CPPFLAGS $altivec_flag"
+ AC_CHECK_HEADERS(altivec.h, [ALTIVEC_EXTRA_CFLAGS="$altivec_flag"])
+ CPPFLAGS="$altivec_save_CPPFLAGS"
+ ;;
+ *)
+ ALTIVEC_EXTRA_CFLAGS="$altivec_flag"
+ ;;
+ esac
+ AC_SUBST(ALTIVEC_EXTRA_CFLAGS)
+
+ AC_MSG_CHECKING(whether we can compile AltiVec code)
+
+ can_use_altivec=no
+ if test -z "$ALTIVEC_EXTRA_CFLAGS"; then
+ AC_MSG_RESULT(no)
+ AC_MSG_WARN([The compiler does not support the AltiVec command set.])
+ else
+ case "$target_or_host" in
+ *-*-darwin*)
+ can_use_altivec=yes
+ AC_DEFINE(HAVE_ALTIVEC_SYSCTL, 1,
+ [Define to 1 if the altivec runtime test should use a sysctl.])
+ ;;
+ *)
+ AC_COMPILE_IFELSE([asm ("vand %v0, %v0, %v0");],
+ can_use_altivec=yes, can_use_altivec=no)
+ ;;
+ esac
+ AC_MSG_RESULT($can_use_altivec)
+
+ if test "x$can_use_altivec" = "xyes"; then
+ AC_DEFINE(USE_ALTIVEC, 1, [Define to 1 if AltiVec support is available.])
+ else
+ enable_altivec=no
+ AC_MSG_WARN([The assembler does not support the AltiVec command set.])
+ fi
+ fi
+
+ enable_altivec="$can_use_altivec"
+fi
+
+
dnl ===========================================================================
AC_SEARCH_LIBS([dlopen], [dl])
Modified: trunk/extensions/Makefile.am
==============================================================================
--- trunk/extensions/Makefile.am (original)
+++ trunk/extensions/Makefile.am Fri May 23 13:36:35 2008
@@ -38,6 +38,9 @@
CIE-Lab$(SHREXT): CIE-Lab.c
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LDADD)
+
+sse-fixups$(SHREXT): sse-fixups.c
+ $(CC) $(CFLAGS) $(MMX_EXTRA_CFLAGS) $(SSE_EXTRA_CFLAGS) $(LDFLAGS) -o $@ $< $(LDADD)
#lcms$(SHREXT): lcms.c
# $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LDADD) `pkg-config lcms --cflags --libs`
#############################################################################
Added: trunk/extensions/sse-fixups.c
==============================================================================
--- (empty file)
+++ trunk/extensions/sse-fixups.c Fri May 23 13:36:35 2008
@@ -0,0 +1,159 @@
+/* babl - dynamically extendable universal pixel conversion library.
+ * Copyright (C) 2005-2008, Ãyvind KolÃs and others.
+ *
+ * SSE optimized conversion routines.
+ * Copyright (C) 2008, Jan Heller.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#if defined(__GNUC__) && (__GNUC__ >= 4) && defined(USE_SSE) && defined(USE_MMX)
+
+#include "babl.h"
+#include "babl-cpuaccel.h"
+
+#define INLINE inline
+
+typedef float g4float __attribute__ ((vector_size (4*sizeof(float))));
+typedef int g2int __attribute__ ((vector_size (2*sizeof(int))));
+
+#define g4float(a,b,c,d) ((g4float){a,b,c,d})
+#define g4float_all(val) g4float(val,val,val,val)
+#define g4float_zero g4float_all(0.0)
+#define g4float_ff g4float_all(255.0)
+
+#define g4float_max(a,b) __builtin_ia32_maxps(a, b)
+#define g4float_min(a,b) __builtin_ia32_minps(a, b)
+#define g4float_cvt2pi(a) __builtin_ia32_cvtps2pi(a)
+#define g4float_movhl(a,b) __builtin_ia32_movhlps(a, b)
+#define g4float_emms __builtin_ia32_emms
+
+
+static INLINE long
+conv_rgbaF_linear_rgb8_linear (unsigned char *src,
+ unsigned char *dst,
+ long samples)
+{
+ long n = samples;
+ g4float *g4src = (g4float *) src;
+ g4float v;
+
+ union {
+ g2int si;
+ unsigned char c[8];
+ } u;
+
+ while (n--)
+ {
+ v = *g4src++ * g4float_ff;
+ v = g4float_min(v, g4float_ff);
+ v = g4float_max(v, g4float_zero);
+ u.si = g4float_cvt2pi (v);
+ *dst++ = u.c[0];
+ *dst++ = u.c[4];
+ v = g4float_movhl (v, v);
+ u.si = g4float_cvt2pi (v);
+ *dst++ = u.c[0];
+ }
+
+ g4float_emms ();
+
+ return samples;
+}
+
+
+static INLINE long
+conv_rgbaF_linear_rgba8_linear (unsigned char *src,
+ unsigned char *dst,
+ long samples)
+{
+ long n = samples;
+ g4float *g4src = (g4float *) src;
+ g4float v;
+
+ union {
+ g2int si;
+ unsigned char c[8];
+ } u;
+
+ while (n--)
+ {
+ v = *g4src++ * g4float_ff;
+ v = g4float_min(v, g4float_ff);
+ v = g4float_max(v, g4float_zero);
+ u.si = g4float_cvt2pi (v);
+ *dst++ = u.c[0];
+ *dst++ = u.c[4];
+ v = g4float_movhl (v, v);
+ u.si = g4float_cvt2pi (v);
+ *dst++ = u.c[0];
+ *dst++ = u.c[4];
+ }
+
+ g4float_emms ();
+
+ return samples;
+}
+
+#endif
+
+#define o(src, dst) \
+ babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL)
+
+int init (void);
+
+int
+init (void)
+{
+ Babl *rgbaF_linear = babl_format_new (
+ babl_model ("RGBA"),
+ babl_type ("float"),
+ babl_component ("R"),
+ babl_component ("G"),
+ babl_component ("B"),
+ babl_component ("A"),
+ NULL);
+ Babl *rgba8_linear = babl_format_new (
+ babl_model ("RGBA"),
+ babl_type ("u8"),
+ babl_component ("R"),
+ babl_component ("G"),
+ babl_component ("B"),
+ babl_component ("A"),
+ NULL);
+ Babl *rgb8_linear = babl_format_new (
+ babl_model ("RGB"),
+ babl_type ("u8"),
+ babl_component ("R"),
+ babl_component ("G"),
+ babl_component ("B"),
+ NULL);
+
+#if defined(__GNUC__) && (__GNUC__ >= 4) && defined(USE_SSE) && defined(USE_MMX)
+
+ if ((babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_MMX) &&
+ (babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE))
+ {
+ o (rgbaF_linear, rgb8_linear);
+ o (rgbaF_linear, rgba8_linear);
+ }
+
+#endif
+
+ return 0;
+}
+
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]