r3883 - in trunk/bse: . tests

From: stw svn gnome org
To: svn-commits-list gnome org
Subject: r3883 - in trunk/bse: . tests
Date: Sat, 16 Sep 2006 04:02:21 -0400 (EDT)
Author: stw
Date: 2006-09-16 04:02:17 -0400 (Sat, 16 Sep 2006)
New Revision: 3883

Added:
   trunk/bse/bsedatahandle-resample.cc
   trunk/bse/bseresampler.cc
   trunk/bse/bseresampler.hh
   trunk/bse/bseresampler.tcc
   trunk/bse/tests/resamplehandle.cc
   trunk/bse/tests/resamplercapi.c
   trunk/bse/tests/testresampler.cc
   trunk/bse/tests/testresamplercoeffs.h
Modified:
   trunk/bse/ChangeLog
   trunk/bse/Makefile.am
   trunk/bse/bseblockutils.cc
   trunk/bse/bseblockutils.hh
   trunk/bse/gsldatahandle.h
   trunk/bse/tests/.cvsignore
   trunk/bse/tests/Makefile.am
Log:
Sat Sep 16 09:25:17 2006  Stefan Westerfeld  <stefan space twc de>

	* bseresampler.tcc bseresampler.cc bseresampler.hh: Added factor 2
	resampling code (up- and downsampling), which can use SSE instructions
	when available. The implementation was tuned for speed.

	* gsldatahandle.h bsedatahandle-resample.cc: Provide a datahandle,
	which does factor 2 upsampling.

	* bseblockutils.cc bseblockutils.hh: Provide factory method for the
	resampler, so that bse can automatically use the SSE version when SSE
	instructions are available.

	* tests/testresamplercoeffs.h tests/testresampler.cc: Standalone
	interactive test code for the resampler.

	* tests/resamplercapi.c tests/resamplehandle.cc: Tests for the
	resampler C API and the resampler data handle.

	* Makefile.am: Adapted Makefile rules for the new resampler sources.

	* tests/Makefile.am: Adapted Makefile rules for the new resampler
	tests. Fixed subnormals_CXXFLAGS, which now includes AM_CXXFLAGS.


Modified: trunk/bse/ChangeLog
===================================================================
--- trunk/bse/ChangeLog	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/ChangeLog	2006-09-16 08:02:17 UTC (rev 3883)
@@ -1,3 +1,27 @@
+Sat Sep 16 09:25:17 2006  Stefan Westerfeld  <stefan space twc de>
+
+	* bseresampler.tcc bseresampler.cc bseresampler.hh: Added factor 2
+	resampling code (up- and downsampling), which can use SSE instructions
+	when available. The implementation was tuned for speed.
+
+	* gsldatahandle.h bsedatahandle-resample.cc: Provide a datahandle,
+	which does factor 2 upsampling.
+
+	* bseblockutils.cc bseblockutils.hh: Provide factory method for the
+	resampler, so that bse can automatically use the SSE version when SSE
+	instructions are available.
+
+	* tests/testresamplercoeffs.h tests/testresampler.cc: Standalone
+	interactive test code for the resampler.
+
+	* tests/resamplercapi.c tests/resamplehandle.cc: Tests for the
+	resampler C API and the resampler data handle.
+
+	* Makefile.am: Adapted Makefile rules for the new resampler sources.
+
+	* tests/Makefile.am: Adapted Makefile rules for the new resampler
+	tests. Fixed subnormals_CXXFLAGS, which now includes AM_CXXFLAGS.
+
 Sun Sep 10 22:46:55 2006  Tim Janik  <timj gtk org>
 
 	* gsldatautils.h, gsldatautils.c: added bse_wave_file_from_fbuffer() and

Modified: trunk/bse/Makefile.am
===================================================================
--- trunk/bse/Makefile.am	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/Makefile.am	2006-09-16 08:02:17 UTC (rev 3883)
@@ -55,6 +55,7 @@
 	bsenote.h		bsemidifile.h		bseblockutils.hh		\
 	bsecxxvalue.hh		bsecxxutils.hh		bsecxxbase.hh			bsecxxclosure.hh \
 	bsecxxarg.hh		bsecxxmodule.hh		bsecxxplugin.hh			bseloader.h \
+	bseresampler.hh		bseresampler.tcc	\
 )
 # BSE C sources
 bse_c_sources = $(strip \
@@ -86,6 +87,7 @@
 	bsenote.c		bsemidifile.c		bseblockutils.cc		\
 	bsecxxvalue.cc		bsecxxutils.cc		bsecxxbase.cc			bsecxxclosure.cc \
 	bsecxxarg.cc		bsecxxmodule.cc		bsecxxplugin.cc			bseloader.c \
+	bseresampler.cc 	bsedatahandle-resample.cc				\
 	bseloader-aiff.c	bseloader-guspatch.cc	bseloader-oggvorbis.c		bseloader-bsewave.c \
 	bseloader-mad.c		bseloader-wav.c		\
 )

Modified: trunk/bse/bseblockutils.cc
===================================================================
--- trunk/bse/bseblockutils.cc	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/bseblockutils.cc	2006-09-16 08:02:17 UTC (rev 3883)
@@ -18,6 +18,8 @@
  * Boston, MA 02111-1307, USA.
  */
 #include "bseblockutils.hh"
+#include "bseresampler.hh"
+#include "bseresampler.tcc"
 
 namespace {
 class BlockImpl : virtual public Bse::Block::Impl {
@@ -144,6 +146,12 @@
     max_value = maxv;
     return square_sum;
   }
+  virtual Bse::Resampler::Resampler2*
+  create_resampler2 (BseResampler2Mode      mode,
+                     BseResampler2Precision precision)
+  {
+    return Bse::Resampler::Resampler2::create_impl<false> (mode, precision);
+  }
 };
 static BlockImpl default_block_impl;
 } // Anon

Modified: trunk/bse/bseblockutils.hh
===================================================================
--- trunk/bse/bseblockutils.hh	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/bseblockutils.hh	2006-09-16 08:02:17 UTC (rev 3883)
@@ -1,5 +1,6 @@
 /* BSE - Bedevilled Sound Engine
  * Copyright (C) 2006 Tim Janik
+ * Copyright (C) 2006 Stefan Westerfeld
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -76,6 +77,7 @@
 G_END_DECLS
 
 #ifdef  __cplusplus
+#include <bse/bseresampler.hh>
 namespace Bse {
 
 /* --- C++ API --- */
@@ -124,6 +126,13 @@
                                               const float    *ivalues,
 					      float&          min_value,
 					      float&          max_value)     { return singleton->range_and_square_sum (n_values, ivalues, min_value, max_value); }
+
+  typedef Resampler::Resampler2 Resampler2;
+  static inline
+  Resampler2*           create_resampler2    (BseResampler2Mode      mode,
+                                              BseResampler2Precision precision)	  { return singleton->create_resampler2 (mode, precision); }
+
+
     
   class Impl {
   protected:
@@ -159,7 +168,10 @@
                                          const float    *ivalues,
 					 float&          min_value,
 					 float&          max_value) = 0;
-    friend class Block;
+    virtual
+    Resampler2*   create_resampler2     (BseResampler2Mode      mode,
+                                         BseResampler2Precision precision) = 0;
+  friend class Block;
     static  void  substitute            (Impl           *substitute_impl);
   };
   static Impl*  default_singleton       ();

Added: trunk/bse/bsedatahandle-resample.cc
===================================================================
--- trunk/bse/bsedatahandle-resample.cc	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/bsedatahandle-resample.cc	2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,312 @@
+/* BSE Resampling Datahandles
+ * Copyright (C) 2001-2003 Tim Janik
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "bseresampler.hh"
+#include "gsldatahandle.h"
+#include <sfi/sficxx.hh>
+#include <vector>
+
+namespace Bse
+{
+
+namespace Resampler
+{
+
+}
+
+using std::vector;
+using Resampler::Resampler2;
+
+class DataHandleUpsample2 : public GslDataHandle,
+			    public Sfi::GNewable /* 0 initialization */
+{
+  GslDataHandle	       *src_handle;
+  int                   precision_bits;
+
+public:
+  gboolean		init_ok;
+  vector<Resampler2 *>  upsamplers;
+  int64			pcm_frame;
+  vector<float>		pcm_data;
+  int64			frame_size;
+  int64                 filter_delay;
+  int64                 filter_order;
+
+  DataHandleUpsample2 (GslDataHandle *src_handle, int precision_bits)
+    : src_handle (src_handle),
+      precision_bits (precision_bits),
+      init_ok (false)
+  {
+    g_return_if_fail (src_handle != NULL);
+  
+    init_ok = gsl_data_handle_common_init (this, NULL);
+    if (init_ok)
+      {
+	name = g_strconcat (src_handle->name, "// #upsample2 /", NULL);
+	src_handle = gsl_data_handle_ref (src_handle);
+      }
+  }
+
+  ~DataHandleUpsample2()
+  {
+    gsl_data_handle_unref (src_handle);
+    gsl_data_handle_common_free (this);
+  }
+
+  BseErrorType
+  open (GslDataHandleSetup *setup)
+  {
+    BseErrorType error = gsl_data_handle_open (src_handle);
+    if (error != BSE_ERROR_NONE)
+      return error;
+
+    *setup = src_handle->setup; /* copies setup.xinfos by pointer */
+    setup->mix_freq *= 2.0;
+    setup->n_values *= 2;
+
+    frame_size = 1024 * setup->n_channels;
+    pcm_frame = -2;
+    pcm_data.resize (frame_size);
+
+    for (guint i = 0; i < setup->n_channels; i++)
+      {
+	BseResampler2Precision precision = static_cast<BseResampler2Precision> (precision_bits);
+	Resampler2 *resampler = Resampler2::create (BSE_RESAMPLER2_MODE_UPSAMPLE, precision);
+	g_assert (resampler); /* FIXME: better error handling */
+
+	upsamplers.push_back (resampler);
+	filter_order = resampler->order();
+
+	g_assert (filter_order % 2 == 0);
+	filter_delay = filter_order / 2 + 1;
+      }
+    return BSE_ERROR_NONE;
+  }
+
+  void
+  close()
+  {
+    for (guint i = 0; i < setup.n_channels; i++)
+      delete upsamplers[i];
+
+    upsamplers.clear();
+    pcm_data.clear();
+    
+    setup.xinfos = NULL;	/* cleanup pointer reference */
+    gsl_data_handle_close (src_handle);
+  }
+
+  int64
+  src_read (int64   voffset,
+	    int64   n_values,
+	    gfloat *values)
+  {
+    voffset += filter_delay * setup.n_channels;	/* compensate filter delay */
+
+    int64 left = n_values;
+    do
+      {
+	int64 l;
+	if (voffset >= 0 && voffset < src_handle->setup.n_values)
+	  {
+	    l = gsl_data_handle_read (src_handle, voffset, std::min (left, src_handle->setup.n_values - voffset), values);
+	    if (l < 0)
+	      return l;	/* pass on errors */
+	  }
+	else
+	  {
+	    /*
+	     * the resampler needs data before and after the data provided
+	     * by src_handle; so we offer zero values here
+	     */
+	    *values = 0;
+	    l = 1;
+	  }
+
+	voffset += l;
+	left -= l;
+	values += l;
+      }
+    while (left > 0);
+
+    return n_values;
+  }
+
+  void
+  deinterleave (float* src, float *dest, int64 n_values)
+  {
+    const int64 n_channels = setup.n_channels;
+
+    for (int64 ch = 0; ch < n_channels; ch++)
+      for (int64 v = ch; v < n_values; v += n_channels)
+	*dest++ = src[v];
+  }
+
+  void
+  interleave (float* src, float *dest, int64 n_values)
+  {
+    const int64 n_channels = setup.n_channels;
+
+    for (int64 ch = 0; ch < n_channels; ch++)
+      for (int64 v = ch; v < n_values; v += n_channels)
+	dest[v] = *src++;
+  }
+
+  int64
+  prepare_filter_history (int64 frame)
+  {
+    const int64 n_channels = setup.n_channels;
+    const int64 n_input_samples = filter_order;
+
+    float input_interleaved[n_input_samples * n_channels];
+    float input[n_input_samples * n_channels];
+
+    int64 l = src_read (frame * frame_size / 2 - n_input_samples * n_channels,
+	                n_input_samples * n_channels, input_interleaved);
+    if (l < 0)
+      return l; /* pass on errors */
+
+    deinterleave (input_interleaved, input, n_input_samples * setup.n_channels);
+
+    for (guint ch = 0; ch < setup.n_channels; ch++)
+      {
+	/* we don't need the output, this is just for filling the filter history */
+	float output[n_input_samples * 2];
+
+	upsamplers[ch]->process_block (input + ch * n_input_samples, n_input_samples, output);
+      }
+    return 1;
+  }
+
+  int64
+  read_frame (int64 frame)
+  {
+    /*
+     * if we're seeking (not reading data linearily), we need to reinitialize
+     * the filter history with new values
+     */
+    if (frame != pcm_frame + 1)
+      {
+	int64 l = prepare_filter_history (frame);
+	if (l < 0)
+	  return l; /* pass on errors */
+      }
+
+    float input_interleaved[frame_size / 2];
+    float input[frame_size / 2];
+    float output[frame_size];
+
+    int64 l = src_read (frame * frame_size / 2, frame_size / 2, input_interleaved);
+    if (l < 0)
+      return l; /* pass on errors */
+
+    deinterleave (input_interleaved, input, frame_size / 2);
+    for (guint ch = 0; ch < setup.n_channels; ch++)
+      {
+	const int64 output_per_channel = frame_size / setup.n_channels;
+	const int64 input_per_channel = output_per_channel / 2;
+
+	upsamplers[ch]->process_block (input + ch * input_per_channel, input_per_channel, output + ch * output_per_channel);
+      }
+    interleave (output, &pcm_data[0], frame_size);
+
+    pcm_frame = frame;
+    return 1;
+  }
+
+  int64
+  read (int64          voffset,
+	int64          n_values,
+	gfloat        *values)
+  {
+    int64 frame = voffset / pcm_data.size();
+    if (frame != pcm_frame)
+      {
+	int64 l = read_frame (frame);
+	if (l < 0)
+	  return l;
+      }
+    g_assert (pcm_frame == frame);
+
+    voffset -= pcm_frame * frame_size;
+    g_assert (voffset >= 0);
+
+    n_values = std::min (n_values, frame_size - voffset);
+    for (int64 i = 0; i < n_values; i++)
+      values[i] = pcm_data[voffset + i];
+
+    return n_values;
+  }
+};
+
+} // namespace Bse
+
+using namespace Bse;
+
+static BseErrorType
+dh_upsample2_open (GslDataHandle *dhandle, GslDataHandleSetup *setup)
+{
+  return reinterpret_cast<DataHandleUpsample2 *> (dhandle)->open (setup);
+}
+
+static void
+dh_upsample2_close (GslDataHandle *dhandle)
+{
+  reinterpret_cast<DataHandleUpsample2 *> (dhandle)->close();
+}
+
+
+static void
+dh_upsample2_destroy (GslDataHandle *dhandle)
+{
+  delete reinterpret_cast<DataHandleUpsample2 *> (dhandle);
+}
+
+static int64
+dh_upsample2_read (GslDataHandle *dhandle,
+		   int64          voffset,
+		   int64          n_values,
+		   gfloat        *values)
+{
+  return reinterpret_cast<DataHandleUpsample2 *> (dhandle)->read (voffset, n_values, values);
+}
+
+GslDataHandle*
+bse_data_handle_new_upsample2 (GslDataHandle *src_handle, int precision_bits)
+{
+  static GslDataHandleFuncs dh_upsample2_vtable = {
+    dh_upsample2_open,
+    dh_upsample2_read,
+    dh_upsample2_close,
+    NULL,
+    dh_upsample2_destroy,
+  };
+  DataHandleUpsample2 *dhandle = new DataHandleUpsample2 (src_handle, precision_bits);
+  if (dhandle->init_ok)
+    {
+      dhandle->vtable = &dh_upsample2_vtable;
+      return dhandle;
+    }
+  else
+    {
+      delete dhandle;
+      return NULL;
+    }
+}

Added: trunk/bse/bseresampler.cc
===================================================================
--- trunk/bse/bseresampler.cc	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/bseresampler.cc	2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,309 @@
+/* BseResampler - FPU and SSE optimized FIR Resampling code
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#include "bseresampler.hh"
+#include "bseblockutils.hh"
+
+namespace Bse
+{
+
+namespace Resampler
+{
+
+/*---- Resampler2 methods ----*/
+
+Resampler2*
+Resampler2::create (BseResampler2Mode      mode,
+                    BseResampler2Precision precision)
+{
+  return Block::create_resampler2 (mode, precision);
+}
+
+Resampler2::~Resampler2()
+{
+}
+
+/*---- coefficient sets for Resampler2 ----*/
+/* 
+ * halfband FIR filter for factor 2 resampling, created with octave
+ *
+ * design method: windowed sinc,  using ultraspherical window
+ *
+ *   coefficients = 32
+ *             x0 = 1.01065
+ *          alpha = 0.75
+ *
+ * design criteria (44100 Hz => 88200 Hz):
+ * 
+ *       passband = [     0, 18000 ]  1 - 2^-16 <= H(z) <= 1+2^-16
+ *     transition = [ 18000, 26100 ]
+ *       stopband = [ 26100, 44100 ]  | H(z) | <= -96 dB
+ *
+ * and for 48 kHz => 96 kHz:
+ *
+ *       passband = [     0, 19589 ]  1 - 2^-16 <= H(z) <= 1+2^-16
+ *     transition = [ 19588, 29386 ]
+ *       stopband = [ 29386, 48000 ]  | H(z) | <= -96 dB
+ *
+ * in order to keep the coefficient number down to 32, the filter
+ * does only "almost" fulfill the spec, but its really really close
+ * (no stopband ripple > -95 dB)
+ */
+
+const double Resampler2::halfband_fir_96db_coeffs[32] =
+{
+  -3.48616530828033e-05,
+   0.000112877490936198,
+  -0.000278961878372482,
+   0.000590495306376081,
+  -0.00112566995029848,
+   0.00198635062559427,
+  -0.00330178798332932,
+   0.00523534239035401,
+  -0.00799905465189065,
+   0.0118867161189188,
+  -0.0173508611368417,
+   0.0251928452706978,
+  -0.0370909694665106,
+   0.057408291607388,
+  -0.102239638342325,
+   0.317002929635456,
+   /* here, a 0.5 coefficient will be used */
+   0.317002929635456,
+  -0.102239638342325,
+   0.0574082916073878,
+  -0.0370909694665105,
+   0.0251928452706976,
+  -0.0173508611368415,
+   0.0118867161189186,
+  -0.00799905465189052,
+   0.0052353423903539,
+  -0.00330178798332923,
+   0.00198635062559419,
+  -0.00112566995029842,
+   0.000590495306376034,
+  -0.00027896187837245,
+   0.000112877490936177,
+  -3.48616530827983e-05
+};
+
+/*
+ *   coefficients = 16
+ *             x0 = 1.013
+ *          alpha = 0.2
+ */
+const double Resampler2::halfband_fir_48db_coeffs[16] =
+{
+  -0.00270578824181636,
+   0.00566964586625895,
+  -0.0106460585587187,
+   0.0185209590435965,
+  -0.0310433957594089,
+   0.0525722488176905,
+  -0.0991138314110143,
+   0.315921760444802,
+   /* here, a 0.5 coefficient will be used */
+   0.315921760444802,
+  -0.0991138314110145,
+   0.0525722488176907,
+  -0.031043395759409,
+   0.0185209590435966,
+  -0.0106460585587187,
+   0.00566964586625899,
+  -0.00270578824181638
+};
+
+/*
+ *   coefficients = 24
+ *             x0 = 1.0105
+ *          alpha = 0.93
+ */
+const double Resampler2::halfband_fir_72db_coeffs[24] =
+{
+  -0.0002622341634289771,
+   0.0007380549701258316,
+  -0.001634275943268986,
+   0.00315564206632209,
+  -0.005564668530702518,
+   0.009207977968023688,
+  -0.0145854155294611,
+   0.02253220964143239,
+  -0.03474055058489597,
+   0.05556350980411048,
+  -0.1010616834297558,
+   0.316597934725021,
+   /* here, a 0.5 coefficient will be used */
+   0.3165979347250216,
+  -0.1010616834297563,
+   0.0555635098041109,
+  -0.03474055058489638,
+   0.02253220964143274,
+  -0.01458541552946141,
+   0.00920797796802395,
+  -0.005564668530702722,
+   0.003155642066322248,
+  -0.001634275943269096,
+   0.000738054970125897,
+  -0.0002622341634290046,
+};
+
+/*
+ *   coefficients = 42
+ *             x0 = 1.0106
+ *          alpha = 0.8
+ */
+const double Resampler2::halfband_fir_120db_coeffs[42] = {
+   2.359361930421347e-06,
+  -9.506281154947505e-06,
+   2.748456705299089e-05,
+  -6.620621425709478e-05,
+   0.0001411845354098405,
+  -0.0002752082937581387,
+   0.0005000548069542907,
+  -0.0008581650926168509,
+   0.001404290771748464,
+  -0.002207303823772437,
+   0.003352696749689989,
+  -0.004946913550236211,
+   0.007125821223639453,
+  -0.01007206140806936,
+   0.01405163477932994,
+  -0.01949467352546547,
+   0.02718899890919871,
+  -0.038810852733035,
+   0.05873397010869939,
+  -0.1030762204838426,
+   0.317288892550808,
+   /* here, a 0.5 coefficient will be used */
+   0.3172888925508079,
+  -0.1030762204838425,
+   0.0587339701086993,
+  -0.03881085273303492,
+   0.02718899890919862,
+  -0.01949467352546535,
+   0.01405163477932982,
+  -0.01007206140806923,
+   0.007125821223639309,
+  -0.004946913550236062,
+   0.003352696749689839,
+  -0.00220730382377229,
+   0.001404290771748321,
+  -0.0008581650926167192,
+   0.0005000548069541726,
+  -0.0002752082937580344,
+   0.0001411845354097548,
+  -6.620621425702783e-05,
+   2.748456705294319e-05,
+  -9.506281154917077e-06,
+   2.359361930409472e-06
+};
+
+/*
+ *   coefficients = 52
+ *             x0 = 1.0104
+ *          alpha = 0.8
+ */
+const double Resampler2::halfband_fir_144db_coeffs[52] = {
+  -1.841826652087099e-07,
+   8.762360674826639e-07,
+  -2.867933918842901e-06,
+   7.670965310712155e-06,
+  -1.795091436711159e-05,
+   3.808294405088742e-05,
+  -7.483688716947913e-05,
+   0.0001381756990743866,
+  -0.0002421379200249195,
+   0.0004057667984715052,
+  -0.0006540521320531017,
+   0.001018873594538604,
+  -0.001539987101083099,
+   0.002266194978575507,
+  -0.003257014968854008,
+   0.004585469100383752,
+  -0.006343174213238195,
+   0.008650017657145861,
+  -0.01167305853124126,
+   0.01566484143899151,
+  -0.02104586507283325,
+   0.02859957136356252,
+  -0.04000402932277326,
+   0.05964131775019404,
+  -0.1036437507243546,
+   0.3174820359034792,
+   /* here, a 0.5 coefficient will be used */
+   0.3174820359034791,
+  -0.1036437507243545,
+   0.05964131775019401,
+  -0.04000402932277325,
+   0.0285995713635625,
+  -0.02104586507283322,
+   0.01566484143899148,
+  -0.01167305853124122,
+   0.008650017657145822,
+  -0.006343174213238157,
+   0.004585469100383712,
+  -0.003257014968853964,
+   0.002266194978575464,
+  -0.00153998710108306,
+   0.001018873594538566,
+  -0.0006540521320530672,
+   0.0004057667984714751,
+  -0.0002421379200248905,
+   0.0001381756990743623,
+  -7.483688716946011e-05,
+   3.808294405087123e-05,
+  -1.795091436709889e-05,
+   7.670965310702215e-06,
+  -2.867933918835638e-06,
+   8.762360674786308e-07,
+  -1.841826652067372e-07,
+};
+
+} /* namespace Resampler */
+
+} /* namespace Bse */
+
+/*---- Resampler2 C API ----*/
+
+BseResampler2*
+bse_resampler2_create (BseResampler2Mode      mode,
+                       BseResampler2Precision precision)
+{
+  return reinterpret_cast<BseResampler2 *> (Bse::Resampler::Resampler2::create (mode, precision));
+}
+
+void
+bse_resampler2_destroy (BseResampler2 *resampler)
+{
+  delete reinterpret_cast<Bse::Resampler::Resampler2 *> (resampler);
+}
+
+void
+bse_resampler2_process_block (BseResampler2 *resampler,
+                              const float   *input,
+                              unsigned int   n_input_samples,
+			      float         *output)
+{
+  reinterpret_cast<Bse::Resampler::Resampler2 *> (resampler)->process_block (input, n_input_samples, output);
+}
+
+guint
+bse_resampler2_order (BseResampler2 *resampler)
+{
+  return reinterpret_cast<Bse::Resampler::Resampler2 *> (resampler)->order();
+}

Added: trunk/bse/bseresampler.hh
===================================================================
--- trunk/bse/bseresampler.hh	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/bseresampler.hh	2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,135 @@
+/* BseResampler - FPU and SSE optimized FIR Resampling code
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#ifndef __BSE_RESAMPLER_HH__
+#define __BSE_RESAMPLER_HH__
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef struct BseResampler2 BseResampler2;
+
+/* keep synchronized with corresponding factory enum */
+typedef enum	/*< skip >*/
+{
+  BSE_RESAMPLER2_MODE_UPSAMPLE,
+  BSE_RESAMPLER2_MODE_DOWNSAMPLE
+} BseResampler2Mode;
+
+/* keep synchronized with corresponding factory enum */
+typedef enum	/*< skip >*/
+{
+  BSE_RESAMPLER2_PREC_48DB = 8,
+  BSE_RESAMPLER2_PREC_72DB = 12,
+  BSE_RESAMPLER2_PREC_96DB = 16,
+  BSE_RESAMPLER2_PREC_120DB = 20,
+  BSE_RESAMPLER2_PREC_144DB = 24
+} BseResampler2Precision;
+
+BseResampler2* bse_resampler2_create        (BseResampler2Mode      mode,
+                                             BseResampler2Precision precision);
+void           bse_resampler2_destroy       (BseResampler2         *resampler);
+void	       bse_resampler2_process_block (BseResampler2         *resampler,
+                                             const float           *input,
+                                             unsigned int           n_input_samples,
+					     float                 *output);
+guint	       bse_resampler2_order         (BseResampler2         *resampler);
+
+G_END_DECLS
+
+#ifdef __cplusplus
+
+#include <vector>
+
+namespace Bse
+{
+
+namespace Resampler
+{
+
+/**
+ * Interface for factor 2 resampling classes
+ */
+class Resampler2 {
+public:
+  /**
+   * creates a resampler instance fulfilling a given specification
+   */
+  static  Resampler2* create (BseResampler2Mode      mode,
+			      BseResampler2Precision precision);
+  /**
+   * virtual destructor for abstract class
+   */
+  virtual	      ~Resampler2();
+
+  /**
+   * resample a data block
+   */
+  virtual void	      process_block (const float *input, unsigned int n_input_samples, float *output) = 0;
+
+  /**
+   * return FIR filter order
+   */
+  virtual guint	      order() const = 0;
+
+protected:
+  static const double halfband_fir_48db_coeffs[16];
+  static const double halfband_fir_72db_coeffs[24];
+  static const double halfband_fir_96db_coeffs[32];
+  static const double halfband_fir_120db_coeffs[42];
+  static const double halfband_fir_144db_coeffs[52];
+
+  /* Creates implementation from filter coefficients and Filter implementation class
+   *
+   * Since up- and downsamplers use different (scaled) coefficients, its possible
+   * to specify a scaling factor. Usually 2 for upsampling and 1 for downsampling.
+   */
+  template<class Filter> static inline Resampler2*
+  create_impl_with_coeffs (const double *d,
+	                   guint         order,
+	                   double        scaling)
+  {
+    float taps[order];
+    for (guint i = 0; i < order; i++)
+      taps[i] = d[i] * scaling;
+
+    Resampler2 *filter = new Filter (taps);
+    g_assert (order == filter->order());
+    return filter;
+  }
+
+public: // FIXME: friend class Bse::Block::Impl;
+  /* creates the actual implementation; specifying USE_SSE=true will use
+   * SSE instructions, USE_SSE=false will use FPU instructions
+   *
+   * Don't use this directly - it's only public in order to be used by
+   * bseblockutils.cc's anonymous Impl classes.
+   */
+  template<bool USE_SSE> static inline Resampler2*
+  create_impl (BseResampler2Mode      mode,
+	       BseResampler2Precision precision);
+};
+
+} /* namespace Resampler */
+
+} /* namespace Bse */
+
+#endif /* __cplusplus */
+
+#endif /* __BSE_RESAMPLER_HH__ */

Added: trunk/bse/bseresampler.tcc
===================================================================
--- trunk/bse/bseresampler.tcc	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/bseresampler.tcc	2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,546 @@
+/* BseResampler - FPU and SSE optimized FIR Resampling code
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#ifndef __BSE_RESAMPLER_TCC__
+#define __BSE_RESAMPLER_TCC__
+
+#include <glib.h>
+#include <vector>
+#include <bse/bseresampler.hh>
+#include <birnet/birnetutils.h>
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
+namespace Bse
+{
+
+namespace Resampler
+{
+
+using std::vector;
+using std::min;
+using std::max;
+using std::copy;
+
+/* see: http://ds9a.nl/gcc-simd/ */
+union F4Vector 
+{
+  __m128 v;   // vector of four single floats
+  float f[4];
+};
+
+/**
+ * FIR filter routine
+ *
+ * A FIR filter has the characteristic that it has a finite impulse response,
+ * and can be computed by convolution of the input signal with that finite
+ * impulse response.
+ *
+ * Thus, we use this for computing the output of the FIR filter 
+ *
+ * output = input[0] * taps[0] + input[1] * taps[1] + ... + input[N-1] * taps[N-1]
+ *
+ * where input is the input signal, taps are the filter coefficients, in
+ * other texts sometimes called h[0]..h[N-1] (impulse response) or a[0]..a[N-1]
+ * (non recursive part of a digital filter), and N is the filter order.
+ */
+template<class Accumulator> static inline Accumulator
+fir_process_one_sample (const float *input,
+                        const float *taps, /* [0..order-1] */
+			const guint  order)
+{
+  Accumulator out = 0;
+  for (guint i = 0; i < order; i++)
+    out += input[i] * taps[i];
+  return out;
+}
+
+/**
+ * FIR filter routine for 4 samples simultaneously
+ *
+ * This routine produces (approximately) the same result as fir_process_one_sample
+ * but computes four consecutive output values at once using vectorized SSE
+ * instructions. Note that input and sse_taps need to be 16-byte aligned here.
+ *
+ * Also note that sse_taps is not a plain impulse response here, but a special
+ * version that needs to be computed with fir_compute_sse_taps.
+ */
+static inline void
+fir_process_4samples_sse (const float *input,
+                          const float *sse_taps,
+			  const guint  order,
+			  float       *out0,
+			  float       *out1,
+			  float       *out2,
+			  float       *out3)
+#ifdef __SSE__
+{
+  /* input and taps must be 16-byte aligned */
+  const F4Vector *input_v = reinterpret_cast<const F4Vector *> (input);
+  const F4Vector *sse_taps_v = reinterpret_cast<const F4Vector *> (sse_taps);
+  F4Vector out0_v, out1_v, out2_v, out3_v;
+
+  out0_v.v = _mm_mul_ps (input_v[0].v, sse_taps_v[0].v);
+  out1_v.v = _mm_mul_ps (input_v[0].v, sse_taps_v[1].v);
+  out2_v.v = _mm_mul_ps (input_v[0].v, sse_taps_v[2].v);
+  out3_v.v = _mm_mul_ps (input_v[0].v, sse_taps_v[3].v);
+
+  for (guint i = 1; i < (order + 6) / 4; i++)
+    {
+      out0_v.v = _mm_add_ps (out0_v.v, _mm_mul_ps (input_v[i].v, sse_taps_v[i * 4 + 0].v));
+      out1_v.v = _mm_add_ps (out1_v.v, _mm_mul_ps (input_v[i].v, sse_taps_v[i * 4 + 1].v));
+      out2_v.v = _mm_add_ps (out2_v.v, _mm_mul_ps (input_v[i].v, sse_taps_v[i * 4 + 2].v));
+      out3_v.v = _mm_add_ps (out3_v.v, _mm_mul_ps (input_v[i].v, sse_taps_v[i * 4 + 3].v));
+    }
+
+  *out0 = out0_v.f[0] + out0_v.f[1] + out0_v.f[2] + out0_v.f[3];
+  *out1 = out1_v.f[0] + out1_v.f[1] + out1_v.f[2] + out1_v.f[3];
+  *out2 = out2_v.f[0] + out2_v.f[1] + out2_v.f[2] + out2_v.f[3];
+  *out3 = out3_v.f[0] + out3_v.f[1] + out3_v.f[2] + out3_v.f[3];
+}
+#else
+{
+  g_assert_not_reached();
+}
+#endif
+
+
+/**
+ * fir_compute_sse_taps takes a normal vector of FIR taps as argument and
+ * computes a specially scrambled version of these taps, ready to be used
+ * for SSE operations (by fir_process_4samples_sse).
+ *
+ * we require a special ordering of the FIR taps, to get maximum benefit of the SSE operations
+ *
+ * example: suppose the FIR taps are [ x1 x2 x3 x4 x5 x6 x7 x8 x9 ], then the SSE taps become
+ *
+ * [ x1 x2 x3 x4   0 x1 x2 x3   0  0 x1 x2   0  0  0 x1      <- for input[0]
+ *   x5 x6 x7 x8  x4 x5 x6 x7  x3 x4 x5 x6  x2 x3 x4 x5      <- for input[1]
+ *   x9  0  0  0  x8 x9  0  0  x7 x8 x9  0  x6 x7 x8 x9 ]    <- for input[2]
+ * \------------/\-----------/\-----------/\-----------/
+ *    for out0     for out1      for out2     for out3
+ *
+ * so that we can compute out0, out1, out2 and out3 simultaneously
+ * from input[0]..input[2]
+ */
+static inline vector<float>
+fir_compute_sse_taps (const vector<float>& taps)
+{
+  const int order = taps.size();
+  vector<float> sse_taps ((order + 6) / 4 * 16);
+
+  for (int j = 0; j < 4; j++)
+    for (int i = 0; i < order; i++)
+      {
+	int k = i + j;
+	sse_taps[(k / 4) * 16 + (k % 4) + j * 4] = taps[i];
+      }
+
+  return sse_taps;
+}
+
+/* Helper class to allocate aligned memory */
+template<class T, int ALIGN>
+class AlignedMem {
+  unsigned char *unaligned_mem;
+  T *data;
+  unsigned int n_elements;
+
+  void
+  allocate_aligned_data()
+  {
+    g_assert ((ALIGN % sizeof (T)) == 0);
+    data = reinterpret_cast<T *> (birnet_malloc_aligned (n_elements * sizeof (T), ALIGN, &unaligned_mem));
+  }
+  /* no copy constructor */
+  AlignedMem (const AlignedMem&);
+  /* no assignment operator */
+  AlignedMem& operator= (const AlignedMem&);
+public:
+  AlignedMem (const vector<T>& elements)
+    : n_elements (elements.size())
+  {
+    allocate_aligned_data();
+
+    for (unsigned int i = 0; i < n_elements; i++)
+      new (data + i) T(elements[i]);
+  }
+  AlignedMem (unsigned int n_elements)
+    : n_elements (n_elements)
+  {
+    allocate_aligned_data();
+
+    for (unsigned int i = 0; i < n_elements; i++)
+      new (data + i) T();
+  }
+  ~AlignedMem()
+  {
+    /* C++ destruction order: last allocated element is deleted first */
+    while (n_elements)
+      data[--n_elements].~T();
+
+    g_free (unaligned_mem);
+  }
+  T&
+  operator[] (unsigned int pos)
+  {
+    return data[pos];
+  }
+  const T&
+  operator[] (unsigned int pos) const
+  {
+    return data[pos];
+  }
+  unsigned int
+  size()
+  {
+    return n_elements;
+  }
+};
+
+/**
+ * Factor 2 upsampling of a data stream
+ *
+ * Template arguments:
+ *   ORDER     number of resampling filter coefficients
+ *   USE_SSE   whether to use SSE (vectorized) instructions or not
+ */
+template<guint ORDER, bool USE_SSE>
+class Upsampler2 : public Resampler2 {
+  vector<float>        taps;
+  AlignedMem<float,16> history;
+  AlignedMem<float,16> sse_taps;
+protected:
+  /* fast SSE optimized convolution */
+  void
+  process_4samples_aligned (const float *input /* aligned */,
+                            float       *output)
+  {
+    const guint H = (ORDER / 2) - 1; /* half the filter length */
+
+    output[0] = input[H];
+    output[2] = input[H + 1];
+    output[4] = input[H + 2];
+    output[6] = input[H + 3];
+
+    fir_process_4samples_sse (input, &sse_taps[0], ORDER, &output[1], &output[3], &output[5], &output[7]);
+  }
+  /* slow convolution */
+  void
+  process_sample_unaligned (const float *input,
+                            float       *output)
+  {
+    const guint H = (ORDER / 2) - 1; /* half the filter length */
+    output[0] = input[H];
+    output[1] = fir_process_one_sample<float> (&input[0], &taps[0], ORDER);
+  }
+  void
+  process_block_aligned (const float *input,
+                         guint        n_input_samples,
+			 float       *output)
+  {
+    unsigned int i = 0;
+    if (USE_SSE)
+      {
+	while (i + 3 < n_input_samples)
+	  {
+	    process_4samples_aligned (&input[i], &output[i*2]);
+	    i += 4;
+	  }
+      }
+    while (i < n_input_samples)
+      {
+	process_sample_unaligned (&input[i], &output[2*i]);
+	i++;
+      }
+  }
+  void
+  process_block_unaligned (const float *input,
+                           guint        n_input_samples,
+			   float       *output)
+  {
+    unsigned int i = 0;
+    if (USE_SSE)
+      {
+	while ((reinterpret_cast<ptrdiff_t> (&input[i]) & 15) && i < n_input_samples)
+	  {
+	    process_sample_unaligned (&input[i], &output[2 * i]);
+	    i++;
+	  }
+      }
+    process_block_aligned (&input[i], n_input_samples - i, &output[2 * i]);
+  }
+public:
+  /**
+   * Constructs an Upsampler2 object with a given set of filter coefficients.
+   *
+   * init_taps: coefficients for the upsampling FIR halfband filter
+   */
+  Upsampler2 (float *init_taps)
+    : taps (init_taps, init_taps + ORDER),
+      history (2 * ORDER),
+      sse_taps (fir_compute_sse_taps (taps))
+  {
+    g_assert ((ORDER & 1) == 0);    /* even order filter */
+  }
+  /**
+   * The function process_block() takes a block of input samples and produces a
+   * block with twice the length, containing interpolated output samples.
+   */
+  void
+  process_block (const float *input,
+                 guint        n_input_samples,
+		 float       *output)
+  {
+    unsigned int history_todo = min (n_input_samples, ORDER);
+
+    copy (input, input + history_todo, &history[ORDER]);
+    process_block_aligned (&history[0], history_todo, output);
+    if (n_input_samples >= ORDER)
+      {
+	process_block_unaligned (input, n_input_samples - history_todo, &output [2 * history_todo]);
+
+	// build new history from new input
+	copy (input + n_input_samples - ORDER, input + n_input_samples, &history[0]);
+      }
+    else
+      {
+	// build new history from end of old history
+	// (very expensive if n_input_samples tends to be a lot smaller than ORDER often)
+	g_memmove (&history[0], &history[n_input_samples], sizeof (history[0]) * ORDER);
+      }
+  }
+  /**
+   * Returns the FIR filter order.
+   */
+  guint
+  order() const
+  {
+    return ORDER;
+  }
+};
+
+/**
+ * Factor 2 downsampling of a data stream
+ *
+ * Template arguments:
+ *   ORDER    number of resampling filter coefficients
+ *   USE_SSE  whether to use SSE (vectorized) instructions or not
+ */
+template<guint ORDER, bool USE_SSE>
+class Downsampler2 : public Resampler2 {
+  vector<float>        taps;
+  AlignedMem<float,16> history_even;
+  AlignedMem<float,16> history_odd;
+  AlignedMem<float,16> sse_taps;
+  /* fast SSE optimized convolution */
+  template<int ODD_STEPPING> void
+  process_4samples_aligned (const float *input_even /* aligned */,
+                            const float *input_odd,
+			    float       *output)
+  {
+    const guint H = (ORDER / 2) - 1; /* half the filter length */
+
+    fir_process_4samples_sse (input_even, &sse_taps[0], ORDER, &output[0], &output[1], &output[2], &output[3]);
+
+    output[0] += 0.5 * input_odd[H * ODD_STEPPING];
+    output[1] += 0.5 * input_odd[(H + 1) * ODD_STEPPING];
+    output[2] += 0.5 * input_odd[(H + 2) * ODD_STEPPING];
+    output[3] += 0.5 * input_odd[(H + 3) * ODD_STEPPING];
+  }
+  /* slow convolution */
+  template<int ODD_STEPPING> float
+  process_sample_unaligned (const float *input_even,
+                            const float *input_odd)
+  {
+    const guint H = (ORDER / 2) - 1; /* half the filter length */
+
+    return fir_process_one_sample<float> (&input_even[0], &taps[0], ORDER) + 0.5 * input_odd[H * ODD_STEPPING];
+  }
+  template<int ODD_STEPPING> void
+  process_block_aligned (const float *input_even,
+                         const float *input_odd,
+			 float       *output,
+			 guint        n_output_samples)
+  {
+    unsigned int i = 0;
+    if (USE_SSE)
+      {
+	while (i + 3 < n_output_samples)
+	  {
+	    process_4samples_aligned<ODD_STEPPING> (&input_even[i], &input_odd[i * ODD_STEPPING], &output[i]);
+	    i += 4;
+	  }
+      }
+    while (i < n_output_samples)
+      {
+	output[i] = process_sample_unaligned<ODD_STEPPING> (&input_even[i], &input_odd[i * ODD_STEPPING]);
+	i++;
+      }
+  }
+  template<int ODD_STEPPING> void
+  process_block_unaligned (const float *input_even,
+                           const float *input_odd,
+			   float       *output,
+			   guint        n_output_samples)
+  {
+    unsigned int i = 0;
+    if (USE_SSE)
+      {
+	while ((reinterpret_cast<ptrdiff_t> (&input_even[i]) & 15) && i < n_output_samples)
+	  {
+	    output[i] = process_sample_unaligned<ODD_STEPPING> (&input_even[i], &input_odd[i * ODD_STEPPING]);
+	    i++;
+	  }
+      }
+    process_block_aligned<ODD_STEPPING> (&input_even[i], &input_odd[i * ODD_STEPPING], &output[i], n_output_samples);
+  }
+  void
+  deinterleave2 (const float *data,
+                 guint        n_data_values,
+		 float       *output)
+  {
+    for (unsigned int i = 0; i < n_data_values; i += 2)
+      output[i / 2] = data[i];
+  }
+public:
+  /**
+   * Constructs a Downsampler2 class using a given set of filter coefficients.
+   *
+   * init_taps: coefficients for the downsampling FIR halfband filter
+   */
+  Downsampler2 (float *init_taps)
+    : taps (init_taps, init_taps + ORDER),
+      history_even (2 * ORDER),
+      history_odd (2 * ORDER),
+      sse_taps (fir_compute_sse_taps (taps))
+  {
+    g_assert ((ORDER & 1) == 0);    /* even order filter */
+  }
+  /**
+   * The function process_block() takes a block of input samples and produces
+   * a block with half the length, containing downsampled output samples.
+   */
+  void
+  process_block (const float *input,
+                 guint        n_input_samples,
+		 float       *output)
+  {
+    g_assert ((n_input_samples & 1) == 0);
+
+    const unsigned int BLOCKSIZE = 1024;
+
+    F4Vector  block[BLOCKSIZE / 4]; /* using F4Vector ensures 16-byte alignment */
+    float    *input_even = &block[0].f[0];
+
+    while (n_input_samples)
+      {
+	unsigned int n_input_todo = min (n_input_samples, BLOCKSIZE * 2);
+
+	/*
+	 * since the halfband filter contains zeros every other sample
+	 * and since we're using SSE instructions, which expect the
+	 * data to be consecutively represented in memory, we prepare
+	 * a block of samples containing only even-indexed samples
+	 *
+	 * we keep the deinterleaved data on the stack (instead of per-class
+	 * allocated memory), to ensure that even running a lot of these
+	 * downsampler streams will not result in cache trashing
+	 */
+	/*
+	 * FIXME: this implementation is suboptimal for non-SSE, because it
+	 * performs an extra deinterleaving step in any case, but deinterleaving
+	 * is only required for SSE instructions
+	 */
+	deinterleave2 (input, n_input_todo, input_even);
+
+	const float       *input_odd = input + 1; /* we process this one with a stepping of 2 */
+
+	const unsigned int n_output_todo = n_input_todo / 2;
+	const unsigned int history_todo = min (n_output_todo, ORDER);
+
+	copy (input_even, input_even + history_todo, &history_even[ORDER]);
+	deinterleave2 (input_odd, history_todo * 2, &history_odd[ORDER]);
+
+	process_block_aligned <1> (&history_even[0], &history_odd[0], output, history_todo);
+	if (n_output_todo >= ORDER)
+	  {
+	    process_block_unaligned<2> (input_even, input_odd, &output[history_todo], n_output_todo - history_todo);
+
+	    // build new history from new input
+	    copy (input_even + n_output_todo - ORDER, input_even + n_output_todo, &history_even[0]);
+	    deinterleave2 (input_odd + n_input_todo - ORDER * 2, ORDER * 2, &history_odd[0]); /* FIXME: can be optimized */
+	  }
+	else
+	  {
+	    // build new history from end of old history
+	    // (very expensive if n_output_todo tends to be a lot smaller than ORDER often)
+	    g_memmove (&history_even[0], &history_even[n_output_todo], sizeof (history_even[0]) * ORDER);
+	    g_memmove (&history_odd[0], &history_odd[n_output_todo], sizeof (history_odd[0]) * ORDER);
+	  }
+
+	n_input_samples -= n_input_todo;
+	input += n_input_todo;
+	output += n_output_todo;
+      }
+  }
+  /**
+   * Returns the filter order.
+   */
+  guint
+  order() const
+  {
+    return ORDER;
+  }
+};
+
+template<bool USE_SSE> Resampler2*
+Resampler2::create_impl (BseResampler2Mode      mode,
+	                 BseResampler2Precision precision)
+{
+  if (mode == BSE_RESAMPLER2_MODE_UPSAMPLE)
+    {
+      switch (precision)
+	{
+	case BSE_RESAMPLER2_PREC_48DB:   return create_impl_with_coeffs <Upsampler2<16, USE_SSE> > (halfband_fir_48db_coeffs, 16, 2.0);
+	case BSE_RESAMPLER2_PREC_72DB:   return create_impl_with_coeffs <Upsampler2<24, USE_SSE> > (halfband_fir_72db_coeffs, 24, 2.0);
+	case BSE_RESAMPLER2_PREC_96DB:   return create_impl_with_coeffs <Upsampler2<32, USE_SSE> > (halfband_fir_96db_coeffs, 32, 2.0);
+	case BSE_RESAMPLER2_PREC_120DB:  return create_impl_with_coeffs <Upsampler2<42, USE_SSE> > (halfband_fir_120db_coeffs, 42, 2.0);
+	case BSE_RESAMPLER2_PREC_144DB:  return create_impl_with_coeffs <Upsampler2<52, USE_SSE> > (halfband_fir_144db_coeffs, 52, 2.0);
+	}
+    }
+  else if (mode == BSE_RESAMPLER2_MODE_DOWNSAMPLE)
+    {
+      switch (precision)
+	{
+	case BSE_RESAMPLER2_PREC_48DB:   return create_impl_with_coeffs <Downsampler2<16, USE_SSE> > (halfband_fir_48db_coeffs, 16, 1.0);
+	case BSE_RESAMPLER2_PREC_72DB:   return create_impl_with_coeffs <Downsampler2<24, USE_SSE> > (halfband_fir_72db_coeffs, 24, 1.0);
+	case BSE_RESAMPLER2_PREC_96DB:   return create_impl_with_coeffs <Downsampler2<32, USE_SSE> > (halfband_fir_96db_coeffs, 32, 1.0);
+	case BSE_RESAMPLER2_PREC_120DB:  return create_impl_with_coeffs <Downsampler2<42, USE_SSE> > (halfband_fir_120db_coeffs, 42, 1.0);
+	case BSE_RESAMPLER2_PREC_144DB:  return create_impl_with_coeffs <Downsampler2<52, USE_SSE> > (halfband_fir_144db_coeffs, 52, 1.0);
+	}
+    }
+  return 0;
+}
+
+} /* namespace Resampler */
+
+} /* namespace Bse */
+
+#endif /* __BSE_RESAMPLER_TCC__ */

Modified: trunk/bse/gsldatahandle.h
===================================================================
--- trunk/bse/gsldatahandle.h	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/gsldatahandle.h	2006-09-16 08:02:17 UTC (rev 3883)
@@ -1,5 +1,6 @@
 /* GSL - Generic Sound Layer
  * Copyright (C) 2001-2003 Tim Janik
+ * Copyright (C) 2006 Stefan Westerfeld
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -111,6 +112,11 @@
 						 int64		   loop_first,
 						 int64		   loop_last);
 
+/* --- resampling datahandles with the factor 2 --- */
+GslDataHandle*	  bse_data_handle_new_upsample2	  (GslDataHandle    *src_handle,  // implemented in bsedatahandle-resample.cc
+						   int precision_bits);
+GslDataHandle*	  bse_data_handle_new_downsample2 (GslDataHandle    *src_handle); // implemented in bsedatahandle-resample.cc
+
 /* --- xinfo handling --- */
 GslDataHandle* gsl_data_handle_new_add_xinfos     (GslDataHandle *src_handle,
                                                    gchar        **xinfos);

Modified: trunk/bse/tests/.cvsignore
===================================================================
--- trunk/bse/tests/.cvsignore	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/tests/.cvsignore	2006-09-16 08:02:17 UTC (rev 3883)
@@ -8,3 +8,6 @@
 subnormals
 testcxx
 testfft
+resamplercapi
+testresampler
+resamplehandle

Modified: trunk/bse/tests/Makefile.am
===================================================================
--- trunk/bse/tests/Makefile.am	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/tests/Makefile.am	2006-09-16 08:02:17 UTC (rev 3883)
@@ -12,7 +12,7 @@
 
 EXTRA_DIST += arrows.gp filter-defs.gp
 
-TESTS = testfft loophandle testcxx subnormals blocktests
+TESTS = testfft loophandle testcxx subnormals blocktests testresampler resamplehandle resamplercapi
 noinst_PROGRAMS = $(TESTS)
 progs_ldadd = $(top_builddir)/bse/libbse.la
 testfft_SOURCES = testfft.c cxxdummy.cc
@@ -21,9 +21,15 @@
 testcxx_LDADD = $(progs_ldadd)
 subnormals_SOURCES = subnormals.cc subnormals-aux.cc
 subnormals_LDADD = $(progs_ldadd)
-subnormals_CXXFLAGS = -ffast-math
+subnormals_CXXFLAGS = $(AM_CXXFLAGS) -ffast-math
 loophandle_SOURCES = loophandle.c
 loophandle_LDADD = $(progs_ldadd)
 blocktests_SOURCES = blocktests.cc
 blocktests_LDADD = $(progs_ldadd)
-
+testresampler_SOURCES = testresampler.cc
+testresampler_LDADD = $(progs_ldadd)
+resamplehandle_SOURCES = resamplehandle.cc
+resamplehandle_LDADD = $(progs_ldadd)
+resamplercapi_SOURCES = resamplercapi.c
+resamplercapi_LDADD = $(progs_ldadd)
+ 

Added: trunk/bse/tests/resamplehandle.cc
===================================================================
--- trunk/bse/tests/resamplehandle.cc	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/tests/resamplehandle.cc	2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,225 @@
+/* BSE Resampling Datahandles Test
+ * Copyright (C) 2001-2002 Tim Janik
+ * Copyright (C) 2006 Stefan Westerfeld
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General 
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#include <bse/gsldatahandle.h>
+#include <bse/bsemain.h>
+#include <bse/bsemathsignal.h>
+#include <bse/gsldatautils.h>
+#include <bse/bseblockutils.hh>
+// #define TEST_VERBOSE
+#include <birnet/birnettests.h>
+#include <vector>
+
+using std::vector;
+using std::max;
+using std::min;
+
+void
+read_through (GslDataHandle *handle)
+{
+  int64 n_values = gsl_data_handle_n_values (handle);
+  int64 offset = 0;
+
+  while (offset < n_values)
+    {
+      gfloat values[1024];
+      int64 values_read = gsl_data_handle_read (handle, offset, 1024, values);
+      g_assert (values_read > 0);
+      offset += values_read;
+    }
+
+  g_assert (offset == n_values);
+}
+
+double
+check (const vector<float>& input, const vector<double>& expected, int n_channels, int precision_bits, double max_db)
+{
+  g_return_val_if_fail (input.size() * 2 == expected.size(), 0);
+  g_return_val_if_fail (input.size() % n_channels == 0, 0);
+  
+  GslDataHandle *ihandle = gsl_data_handle_new_mem (n_channels, 32, 44100, 440, input.size(), &input[0], NULL);
+  GslDataHandle *rhandle = bse_data_handle_new_upsample2 (ihandle, precision_bits); 
+  gsl_data_handle_unref (ihandle);
+
+  BseErrorType error = gsl_data_handle_open (rhandle);
+  if (error)
+    exit (1);
+
+  double worst_diff, worst_diff_db;
+
+  /* read through the datahandle linearily _twice_ */
+  for (int repeat = 1; repeat <= 2; repeat++)
+    {
+      GslDataPeekBuffer peek_buffer = { +1 /* incremental direction */, 0, };
+      worst_diff = 0.0;
+      for (int64 i = 0; i < rhandle->setup.n_values; i++)
+	{
+	  double resampled = gsl_data_handle_peek_value (rhandle, i, &peek_buffer);
+	  worst_diff = max (resampled - expected[i], worst_diff);
+	}
+      worst_diff_db = bse_db_from_factor (worst_diff, -200);
+      TPRINT ("linear(%dst read) read worst_diff = %f (%f dB)\n", repeat, worst_diff, worst_diff_db);
+      TASSERT (worst_diff_db < max_db);
+    }
+
+  /* test seeking */
+  worst_diff = 0.0;
+  for (int i = 0; i < 1000; i++)
+    {
+      int64 start = rand() % rhandle->setup.n_values;
+      int64 len = rand() % 1024;
+
+      GslDataPeekBuffer peek_buffer = { +1 /* incremental direction */, 0, };
+      for (int64 i = start; i < std::min (start + len, rhandle->setup.n_values); i++)
+	{
+	  double resampled = gsl_data_handle_peek_value (rhandle, i, &peek_buffer);
+	  worst_diff = max (resampled - expected[i], worst_diff);
+	}
+    }
+  worst_diff_db = bse_db_from_factor (worst_diff, -200);
+  TPRINT ("seek worst_diff = %f (%f dB)\n", worst_diff, worst_diff_db);
+  TASSERT (worst_diff_db < max_db);
+
+  /* test speed */
+  const guint RUNS = 10;
+  GTimer *timer = g_timer_new();
+  const guint dups = TEST_CALIBRATION (50.0, read_through (rhandle));
+ 
+  double m = 9e300;
+  for (guint i = 0; i < RUNS; i++)
+    {
+      g_timer_start (timer);
+      for (guint j = 0; j < dups; j++)
+	read_through (rhandle);
+      g_timer_stop (timer);
+      double e = g_timer_elapsed (timer, NULL);
+      if (e < m)
+        m = e;
+    }
+  double samples_per_second = input.size() / (m / dups);
+  TPRINT ("  samples / second = %f\n", samples_per_second);
+  TPRINT ("  which means the resampler can process %.2f 44100 Hz streams simultaneusly\n", samples_per_second / 44100.0);
+  TPRINT ("  or one 44100 Hz stream takes %f %% CPU usage\n", 100.0 / (samples_per_second / 44100.0));
+
+  gsl_data_handle_close (rhandle);
+  gsl_data_handle_unref (rhandle);
+
+  return samples_per_second / 44100.0;
+}
+
+void
+run_tests()
+{
+  struct TestParameters {
+    int bits;
+    double mono_db;
+    double stereo_db;
+  } params[] =
+  {
+    { 8, -48, -48 },
+    { 12, -72, -72 },
+    { 16, -98, -95 },
+    { 20, -120, -117 },
+    { 24, -125, -125 }, /* this is not _really_ 24 bit, because the filter is computed using floats */
+    { 0, 0, 0 }
+  };
+
+  for (int p = 0; params[p].bits; p++)
+    {
+      const int LEN = 44100*10;
+      vector<float> input;
+      vector<double> expected;
+      for (int i = 0; i < LEN; i++)
+	{
+	  input.push_back (sin (i * 2 * M_PI * 440.0 / 44100.0) * bse_window_blackman (double (i * 2 - LEN) / LEN));
+
+	  double j = i + 0.5; /* compute perfectly interpolated result */
+	  expected.push_back (sin (i * 2 * M_PI * 440.0 / 44100.0) * bse_window_blackman (double (i * 2 - LEN) / LEN));
+	  expected.push_back (sin (j * 2 * M_PI * 440.0 / 44100.0) * bse_window_blackman (double (j * 2 - LEN) / LEN));
+	}
+
+      // mono test
+      {
+	char *x = g_strdup_printf ("ResampleHandle %dbits mono", params[p].bits);
+	TSTART (x);
+	g_free (x);
+
+	double streams = check (input, expected, 1, params[p].bits, params[p].mono_db);
+	TDONE();
+
+	g_printerr ("    ===> speed is equivalent to %.2f simultaneous 44100 Hz streams\n", streams);
+      }
+
+      input.clear();
+      expected.clear();
+      for (int i = 0; i < LEN; i++)
+	{
+	  input.push_back (sin (i * 2 * M_PI * 440.0 / 44100.0) * bse_window_blackman (double (i * 2 - LEN) / LEN));
+	  input.push_back (sin (i * 2 * M_PI * 1000.0 / 44100.0) * bse_window_blackman (double (i * 2 - LEN) / LEN));
+
+	  double j = i + 0.5; /* compute perfectly interpolated result */
+	  expected.push_back (sin (i * 2 * M_PI * 440.0 / 44100.0) * bse_window_blackman (double (i * 2 - LEN) / LEN));
+	  expected.push_back (sin (i * 2 * M_PI * 1000.0 / 44100.0) * bse_window_blackman (double (i * 2 - LEN) / LEN));
+	  expected.push_back (sin (j * 2 * M_PI * 440.0 / 44100.0) * bse_window_blackman (double (j * 2 - LEN) / LEN));
+	  expected.push_back (sin (j * 2 * M_PI * 1000.0 / 44100.0) * bse_window_blackman (double (j * 2 - LEN) / LEN));
+	}
+
+      // stereo test
+      {
+	char *x = g_strdup_printf ("ResampleHandle %dbits stereo", params[p].bits);
+	TSTART (x);
+	g_free (x);
+
+	double streams = check (input, expected, 2, params[p].bits, params[p].stereo_db);
+	TDONE();
+
+	g_printerr ("    ===> speed is equivalent to %.2f simultaneous 44100 Hz streams\n", streams);
+      }
+    }
+}
+
+int
+main (int   argc,
+      char *argv[])
+{
+  birnet_init_test (&argc, &argv);
+  //g_thread_init (NULL);
+  //birnet_init_test (&argc, &argv);
+  //bse_init_intern (&argc, &argv, "TestResample", NULL);
+  //std::set_terminate (__gnu_cxx::__verbose_terminate_handler);
+
+  printf ("*** tests on FPU\n");
+  run_tests();
+
+  /* load plugins */
+  BirnetInitValue config[] = {
+    { "load-core-plugins", "1" },
+    { NULL },
+  };
+  bse_init_test (&argc, &argv, config);
+
+  /* check for possible specialization */
+  if (Bse::Block::default_singleton() == Bse::Block::current_singleton())
+    return 0;   /* nothing changed */
+
+  printf ("*** tests on SSE\n");
+  run_tests();
+
+  return 0;
+}

Added: trunk/bse/tests/resamplercapi.c
===================================================================
--- trunk/bse/tests/resamplercapi.c	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/tests/resamplercapi.c	2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,85 @@
+/* BSE Resampling Datahandles Test
+ * Copyright (C) 2001-2002 Tim Janik
+ * Copyright (C) 2006 Stefan Westerfeld
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General 
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#include <bse/bsemathsignal.h>
+#include <bse/bseresampler.hh>
+#include <bse/bsemain.h>
+// #define TEST_VERBOSE
+#include <birnet/birnettests.h>
+
+static void
+test_c_api()
+{
+  BseResampler2 *resampler = bse_resampler2_create (BSE_RESAMPLER2_MODE_UPSAMPLE, BSE_RESAMPLER2_PREC_96DB);
+  const int INPUT_SIZE = 1024, OUTPUT_SIZE = 2048;
+  float in[INPUT_SIZE];
+  float out[OUTPUT_SIZE];
+  double error = 0;
+  int i;
+
+  for (i = 0; i < INPUT_SIZE; i++)
+    in[i] = sin (i * 440 * 2 * M_PI / 44100) * bse_window_blackman ((double) (i * 2 - INPUT_SIZE) / INPUT_SIZE);
+
+  bse_resampler2_process_block (resampler, in, INPUT_SIZE, out);
+
+  int delay = bse_resampler2_order (resampler) + 2;
+  for (i = 0; i < 2048; i++)
+    {
+      double expected = sin ((i - delay) * 220 * 2 * M_PI / 44100)
+	              * bse_window_blackman ((double) ((i - delay) * 2 - OUTPUT_SIZE) / OUTPUT_SIZE);
+      error = MAX (error, out[i] - expected);
+    }
+
+  double error_db = bse_db_from_factor (error, -200);
+
+  bse_resampler2_destroy (resampler);
+
+  TPRINT ("Test C API error: %f\n", error_db);
+  TASSERT (error_db < -95);
+}
+
+int
+main (int   argc,
+      char *argv[])
+{
+  birnet_init_test (&argc, &argv);
+
+  TSTART ("Resampler C API (FPU)");
+  test_c_api();
+  TDONE();
+
+  /* load plugins */
+  BirnetInitValue config[] = {
+    { "load-core-plugins", "1" },
+    { NULL },
+  };
+  bse_init_test (&argc, &argv, config);
+
+#if 0 /* FIXME: C? */
+  /* check for possible specialization */
+  if (Bse::Block::default_singleton() == Bse::Block::current_singleton())
+    return 0;   /* nothing changed */
+#endif
+
+  TSTART ("Resampler C API (SSE)");
+  test_c_api();
+  TDONE();
+
+  return 0;
+}

Added: trunk/bse/tests/testresampler.cc
===================================================================
--- trunk/bse/tests/testresampler.cc	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/tests/testresampler.cc	2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,408 @@
+/* SSE optimized FIR Resampling code
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#include <bse/bseresampler.hh>
+#include <bse/bseresampler.tcc>
+
+#include <stdio.h>
+#include <math.h>
+#include <vector>
+#include <sys/time.h>
+#include <time.h>
+
+using std::vector;
+using std::min;
+using std::max;
+using std::copy;
+using namespace Bse::Resampler;
+
+#include "testresamplercoeffs.h"
+
+void
+usage ()
+{
+  printf ("usage: testresampler <options> [ <block_size> ] [ <test_frequency> ]\n");
+  printf ("\n");
+  printf ("  p - performance\n");
+  printf ("  a - accuracy\n");
+  printf ("  g - generate output for gnuplot\n");
+  printf ("  i - impulse response\n");
+  printf ("  F - filter implementation\n");
+  printf ("\n");
+  printf ("  d - downsample\n");
+  printf ("  u - upsample\n");
+  printf ("  s - subsample (= downsample and upsample after that)\n");
+  printf ("  o - oversample (= upsample and downsample after that)\n");
+  printf ("\n");
+  printf ("  f - fast implementation (sse)\n");
+  printf ("\n");
+  printf ("  block_size defaults to 128 values\n");
+  printf ("  test_frequency defaults to 440 Hz\n");
+  printf ("\n");
+  printf ("examples:\n");
+  printf ("  testresampler puf 256     # check performance of fast upsampling with 256 value blocks\n");
+  printf ("  testresampler au          # check accuracy of standard upsampling\n");
+  printf ("  testresampler au 128 500  # check accuracy of standard upsampling using a 500 Hz frequency\n");
+}
+
+enum TestType
+{
+  TEST_NONE,
+  TEST_PERFORMANCE,
+  TEST_ACCURACY,
+  TEST_GNUPLOT,
+  TEST_IMPULSE,
+  TEST_FILTER_IMPL
+} test_type = TEST_NONE;
+
+enum ResampleType
+{
+  RES_NONE,
+  RES_DOWNSAMPLE,
+  RES_UPSAMPLE,
+  RES_SUBSAMPLE,
+  RES_OVERSAMPLE
+} resample_type = RES_NONE;
+
+enum ImplType
+{
+  IMPL_NORMAL,
+  IMPL_SSE
+} impl_type = IMPL_NORMAL;
+
+unsigned int block_size = 128;
+double test_frequency = 440.0;
+
+double
+gettime ()
+{
+  timeval tv;
+  gettimeofday (&tv, 0);
+
+  return tv.tv_sec + tv.tv_usec / 1000000.0;
+}
+
+int
+test_filter_impl()
+{
+  int errors = 0;
+  printf ("testing filter implementation:\n\n");
+
+  for (guint order = 0; order < 64; order++)
+    {
+      vector<float> taps (order);
+      for (guint i = 0; i < order; i++)
+	taps[i] = i + 1;
+
+      AlignedMem<float,16> sse_taps (fir_compute_sse_taps (taps));
+      for (unsigned int i = 0; i < sse_taps.size(); i++)
+	{
+	  printf ("%3d", (int) (sse_taps[i] + 0.5));
+	  if (i % 4 == 3)
+	    printf ("  |");
+	  if (i % 16 == 15)
+	    printf ("   ||| upper bound = %d\n", (order + 6) / 4);
+	}
+      printf ("\n\n");
+
+      AlignedMem<float,16> random_mem (order + 4);
+      for (guint i = 0; i < order + 4; i++)
+	random_mem[i] = 1.0 - rand() / (0.5 * RAND_MAX);
+
+      /* FIXME: the problem with this test is that we explicitely test SSE code
+       * here, but the test case is not compiled with -msse within the BEAST tree
+       */
+      float out[4];
+      fir_process_4samples_sse (&random_mem[0], &sse_taps[0], order,
+	                        &out[0], &out[1], &out[2], &out[3]);
+
+      double adiff = 0.0;
+      for (int i = 0; i < 4; i++)
+	{
+	  double diff = fir_process_one_sample<double> (&random_mem[i], &taps[0], order) - out[i];
+	  adiff += fabs (diff);
+	}
+      if (adiff > 0.0001)
+	{
+	  printf ("*** order = %d, adiff = %f\n", order, adiff);
+	  errors++;
+	}
+    }
+  if (errors)
+    printf ("*** %d errors detected\n", errors);
+  else
+    printf ("filter implementation ok.\n");
+
+  return errors ? 1 : 0;
+}
+
+template <int TEST, int RESAMPLE, int IMPL> int
+perform_test()
+{
+  if (TEST == TEST_IMPULSE) /* need to have space for impulse response for all 4 tests */
+    block_size = 150;
+
+  /* initialize up- and downsampler */
+  const int T = 32;
+  const bool USE_SSE = (IMPL == IMPL_SSE);
+  float utaps[T], dtaps[T];
+  for (int i = 0; i < T; i++)
+    {
+      utaps[i] = halfband_fir_upsample2_96db_coeffs[i] * 2;
+      dtaps[i] = halfband_fir_upsample2_96db_coeffs[i];
+    }
+
+  Upsampler2<T,USE_SSE> ups (utaps);
+  Downsampler2<T,USE_SSE> downs (dtaps);
+
+  F4Vector in_v[block_size / 2 + 1], out_v[block_size / 2 + 1], out2_v[block_size / 2 + 1];
+  float *input = &in_v[0].f[0], *output = &out_v[0].f[0], *output2 = &out2_v[0].f[0]; /* ensure aligned data */
+
+  if (TEST == TEST_PERFORMANCE)
+    {
+      for (unsigned int i = 0; i < block_size; i++)
+	input[i] = sin (i * test_frequency / 44100.0 * 2 * M_PI);
+
+      double start_time = gettime();
+      long long k = 0;
+      for (int i = 0; i < 500000; i++)
+	{
+	  if (RESAMPLE == RES_DOWNSAMPLE || RESAMPLE == RES_SUBSAMPLE)
+	    {
+	      downs.process_block (input, block_size, output);
+	      if (RESAMPLE == RES_SUBSAMPLE)
+		ups.process_block (output, block_size / 2, output2);
+	    }
+	  if (RESAMPLE == RES_UPSAMPLE || RESAMPLE == RES_OVERSAMPLE)
+	    {
+	      ups.process_block (input, block_size, output);
+	      if (RESAMPLE == RES_OVERSAMPLE)
+		downs.process_block (output, block_size * 2, output2);
+	    }
+	  k += block_size;
+	}
+      double end_time = gettime();
+      if (RESAMPLE == RES_DOWNSAMPLE)
+	{
+	  printf ("  (performance will be normalized to downsampler output samples)\n");
+	  k /= 2;
+	}
+      else if (RESAMPLE == RES_UPSAMPLE)
+	{
+	  printf ("  (performance will be normalized to upsampler input samples)\n");
+	}
+      printf ("  total samples processed = %lld\n", k);
+      printf ("  processing_time = %f\n", end_time - start_time);
+      printf ("  samples / second = %f\n", k / (end_time - start_time));
+      printf ("  which means the resampler can process %.2f 44100 Hz streams simultaneusly\n",
+	      k / (end_time - start_time) / 44100.0);
+      printf ("  or one 44100 Hz stream takes %f %% CPU usage\n", 
+	        100.0 / (k / (end_time - start_time) / 44100.0));
+    }
+  else if (TEST == TEST_ACCURACY || TEST == TEST_GNUPLOT)
+    {
+      long long k = 0;
+      double phase = 0;
+      double max_diff = 0;
+      for (int b = 0; b < 1000; b++)
+	{
+	  int misalign = rand() % 4;
+	  int bs = rand() % (block_size - misalign);
+
+	  if (RESAMPLE == RES_DOWNSAMPLE || RESAMPLE == RES_SUBSAMPLE)
+	    bs -= bs & 1;
+
+	  for (int i = 0; i < bs; i++)
+	    {
+	      input[i+misalign] = sin (phase);
+	      phase += test_frequency/44100.0 * 2 * M_PI;
+	    }
+	  if (RESAMPLE == RES_DOWNSAMPLE || RESAMPLE == RES_SUBSAMPLE)
+	    {
+	      downs.process_block (input + misalign, bs, output);
+	      if (RESAMPLE == RES_SUBSAMPLE)
+		ups.process_block (output, bs / 2, output2);
+	    }
+	  if (RESAMPLE == RES_UPSAMPLE || RESAMPLE == RES_OVERSAMPLE)
+	    {
+	      ups.process_block (input + misalign, bs, output);
+	      if (RESAMPLE == RES_OVERSAMPLE)
+		downs.process_block (output, bs * 2, output2);
+	    }
+
+	  /* validate output */
+	  double sin_shift;
+	  double freq_factor;
+	  unsigned int out_bs;
+	  float *check = output;
+
+	  if (RESAMPLE == RES_UPSAMPLE)
+	    {
+	      sin_shift = 34;
+	      freq_factor = 0.5;
+	      out_bs = bs * 2;
+	    }
+	  else if (RESAMPLE == RES_DOWNSAMPLE)
+	    {
+	      sin_shift = 16.5;
+	      freq_factor = 2;
+	      out_bs = bs / 2;
+	    }
+	  else if (RESAMPLE == RES_OVERSAMPLE)
+	    {
+	      sin_shift = 33.5;
+	      freq_factor = 1;
+	      check = output2;
+	      out_bs = bs;
+	    }
+	  else if (RESAMPLE == RES_SUBSAMPLE)
+	    {
+	      sin_shift = 67;
+	      freq_factor = 1;
+	      check = output2;
+	      out_bs = bs;
+	    }
+
+	  for (unsigned int i = 0; i < out_bs; i++, k++)
+	    if (k > 100)
+	      {
+		double correct_output = sin ((k - sin_shift) * 2 * freq_factor * test_frequency / 44100.0 * M_PI);
+		if (TEST == TEST_GNUPLOT)
+		  printf ("%lld %.17f %.17f\n", k, check[i], correct_output);
+		else
+		  max_diff = max (max_diff, check[i] - correct_output);
+	      }
+	}
+      double max_diff_db = 20 * log (max_diff) / log (10);
+      if (TEST == TEST_ACCURACY)
+	{
+	  printf ("  input frequency used to perform test = %.2f Hz (SR = 44100.0 Hz)\n", test_frequency);
+	  printf ("  max difference between correct and computed output: %f = %f dB\n", max_diff, max_diff_db);
+	}
+    }
+  else if (TEST == TEST_IMPULSE)
+    {
+      input[0] = 1;
+      for (unsigned int i = 1; i < block_size; i++)
+	{
+	  input[i] = 0;
+	  output[i] = 0;
+	  output2[i] = 0;
+	}
+
+      if (RESAMPLE == RES_DOWNSAMPLE || RESAMPLE == RES_SUBSAMPLE)
+	{
+	  downs.process_block (input, block_size, output);
+	  if (RESAMPLE == RES_SUBSAMPLE)
+	    ups.process_block (output, block_size / 2, output2);
+	}
+      if (RESAMPLE == RES_UPSAMPLE || RESAMPLE == RES_OVERSAMPLE)
+	{
+	  ups.process_block (input, block_size, output);
+	  if (RESAMPLE == RES_OVERSAMPLE)
+	    downs.process_block (output, block_size * 2, output2);
+	}
+
+      float *check = output;
+      if (RESAMPLE == RES_OVERSAMPLE || RESAMPLE == RES_SUBSAMPLE)
+	check = output2;
+
+      for (unsigned int i = 0; i < block_size; i++)
+	printf ("%.17f\n", check[i]);
+    }
+  return 0;
+}
+
+template <int TEST, int RESAMPLE> int
+perform_test()
+{
+  switch (impl_type)
+    {
+    case IMPL_SSE:    printf ("using SSE instructions\n"); return perform_test<TEST, RESAMPLE, IMPL_SSE> ();
+    case IMPL_NORMAL: printf ("using FPU instructions\n"); return perform_test<TEST, RESAMPLE, IMPL_NORMAL> ();
+    }
+  g_assert_not_reached();
+  return 1;
+}
+
+template <int TEST> int
+perform_test ()
+{
+  switch (resample_type)
+    {
+    case RES_DOWNSAMPLE:  printf ("for factor 2 downsampling "); return perform_test<TEST, RES_DOWNSAMPLE> ();
+    case RES_UPSAMPLE:	  printf ("for factor 2 upsampling "); return perform_test<TEST, RES_UPSAMPLE> ();
+    case RES_SUBSAMPLE:	  printf ("for factor 2 subsampling "); return perform_test<TEST, RES_SUBSAMPLE> ();
+    case RES_OVERSAMPLE:  printf ("for factor 2 oversampling "); return perform_test<TEST, RES_OVERSAMPLE> ();
+    default:		  usage(); return 1;
+    }
+}
+
+int
+perform_test()
+{
+  switch (test_type)
+    {
+    case TEST_PERFORMANCE:  printf ("performance test "); return perform_test<TEST_PERFORMANCE> ();
+    case TEST_ACCURACY:	    printf ("accuracy test "); return perform_test<TEST_ACCURACY> ();
+    case TEST_GNUPLOT:	    printf ("# gnuplot test "); return perform_test<TEST_GNUPLOT> ();
+    case TEST_IMPULSE:	    printf ("# impulse response test "); return perform_test<TEST_IMPULSE> ();
+    case TEST_FILTER_IMPL:  return test_filter_impl();
+    default:		    usage(); return 1;
+    }
+}
+
+int
+main (int argc, char **argv)
+{
+  if (argc >= 2)
+    {
+      for (unsigned int i = 0; i < strlen (argv[1]); i++)
+	{
+	  switch (argv[1][i])
+	    {
+	    case 'F': test_type = TEST_FILTER_IMPL; break;
+	    case 'p': test_type = TEST_PERFORMANCE; break;
+	    case 'a': test_type = TEST_ACCURACY; break;
+	    case 'g': test_type = TEST_GNUPLOT; break;
+	    case 'i': test_type = TEST_IMPULSE; break;
+
+	    case 'd': resample_type = RES_DOWNSAMPLE; break;
+	    case 'u': resample_type = RES_UPSAMPLE; break;
+	    case 's': resample_type = RES_SUBSAMPLE; break;
+	    case 'o': resample_type = RES_OVERSAMPLE; break;
+
+	    case 'f': impl_type = IMPL_SSE; break;
+	    }
+	}
+    }
+
+  if (argc >= 3)
+    block_size = atoi (argv[2]);
+
+  if (argc >= 4)
+    test_frequency = atoi (argv[3]);
+
+  if ((block_size & 1) == 1)
+    block_size++;
+
+  if (block_size < 2)
+    block_size = 2;
+
+  return perform_test();
+}

Added: trunk/bse/tests/testresamplercoeffs.h
===================================================================
--- trunk/bse/tests/testresamplercoeffs.h	2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/tests/testresamplercoeffs.h	2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,80 @@
+/* SSE optimized FIR Resampling code
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/* 
+ * halfband FIR filter for factor 2 resampling, created with octave
+ *
+ * design method: windowed sinc,  using ultraspherical window
+ *
+ *   coefficients = 32
+ *             x0 = 1.01065
+ *          alpha = 0.75
+ *
+ * design criteria (44100 Hz => 88200 Hz):
+ * 
+ *       passband = [     0, 18000 ]  1 - 2^-16 <= H(z) <= 1+2^-16
+ *     transition = [ 18000, 26100 ]
+ *       stopband = [ 26100, 44100 ]  | H(z) | <= -96 dB
+ *
+ * and for 48 kHz => 96 kHz:
+ *
+ *       passband = [     0, 19589 ]  1 - 2^-16 <= H(z) <= 1+2^-16
+ *     transition = [ 19588, 29386 ]
+ *       stopband = [ 29386, 48000 ]  | H(z) | <= -96 dB
+ *
+ * in order to keep the coefficient number down to 32, the filter
+ * does only "almost" fulfill the spec, but its really really close
+ * (no stopband ripple > -95 dB)
+ */
+
+const double halfband_fir_upsample2_96db_coeffs[32] = {
+    -3.48616530828033e-05,
+     0.000112877490936198,
+    -0.000278961878372482,
+     0.000590495306376081,
+    -0.00112566995029848,
+     0.00198635062559427,
+    -0.00330178798332932,
+     0.00523534239035401,
+    -0.00799905465189065,
+     0.0118867161189188,
+    -0.0173508611368417,
+     0.0251928452706978,
+    -0.0370909694665106,
+     0.057408291607388,
+    -0.102239638342325,
+     0.317002929635456,
+     /* here, a 0.5 coefficient will be used */
+     0.317002929635456,
+    -0.102239638342325,
+     0.0574082916073878,
+    -0.0370909694665105,
+     0.0251928452706976,
+    -0.0173508611368415,
+     0.0118867161189186,
+    -0.00799905465189052,
+     0.0052353423903539,
+    -0.00330178798332923,
+     0.00198635062559419,
+    -0.00112566995029842,
+     0.000590495306376034,
+    -0.00027896187837245,
+     0.000112877490936177,
+    -3.48616530827983e-05
+};
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]