r3883 - in trunk/bse: . tests
- From: stw svn gnome org
- To: svn-commits-list gnome org
- Subject: r3883 - in trunk/bse: . tests
- Date: Sat, 16 Sep 2006 04:02:21 -0400 (EDT)
Author: stw
Date: 2006-09-16 04:02:17 -0400 (Sat, 16 Sep 2006)
New Revision: 3883
Added:
trunk/bse/bsedatahandle-resample.cc
trunk/bse/bseresampler.cc
trunk/bse/bseresampler.hh
trunk/bse/bseresampler.tcc
trunk/bse/tests/resamplehandle.cc
trunk/bse/tests/resamplercapi.c
trunk/bse/tests/testresampler.cc
trunk/bse/tests/testresamplercoeffs.h
Modified:
trunk/bse/ChangeLog
trunk/bse/Makefile.am
trunk/bse/bseblockutils.cc
trunk/bse/bseblockutils.hh
trunk/bse/gsldatahandle.h
trunk/bse/tests/.cvsignore
trunk/bse/tests/Makefile.am
Log:
Sat Sep 16 09:25:17 2006 Stefan Westerfeld <stefan space twc de>
* bseresampler.tcc bseresampler.cc bseresampler.hh: Added factor 2
resampling code (up- and downsampling), which can use SSE instructions
when available. The implementation was tuned for speed.
* gsldatahandle.h bsedatahandle-resample.cc: Provide a datahandle,
which does factor 2 upsampling.
* bseblockutils.cc bseblockutils.hh: Provide factory method for the
resampler, so that bse can automatically use the SSE version when SSE
instructions are available.
* tests/testresamplercoeffs.h tests/testresampler.cc: Standalone
interactive test code for the resampler.
* tests/resamplercapi.c tests/resamplehandle.cc: Tests for the
resampler C API and the resampler data handle.
* Makefile.am: Adapted Makefile rules for the new resampler sources.
* tests/Makefile.am: Adapted Makefile rules for the new resampler
tests. Fixed subnormals_CXXFLAGS, which now includes AM_CXXFLAGS.
Modified: trunk/bse/ChangeLog
===================================================================
--- trunk/bse/ChangeLog 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/ChangeLog 2006-09-16 08:02:17 UTC (rev 3883)
@@ -1,3 +1,27 @@
+Sat Sep 16 09:25:17 2006 Stefan Westerfeld <stefan space twc de>
+
+ * bseresampler.tcc bseresampler.cc bseresampler.hh: Added factor 2
+ resampling code (up- and downsampling), which can use SSE instructions
+ when available. The implementation was tuned for speed.
+
+ * gsldatahandle.h bsedatahandle-resample.cc: Provide a datahandle,
+ which does factor 2 upsampling.
+
+ * bseblockutils.cc bseblockutils.hh: Provide factory method for the
+ resampler, so that bse can automatically use the SSE version when SSE
+ instructions are available.
+
+ * tests/testresamplercoeffs.h tests/testresampler.cc: Standalone
+ interactive test code for the resampler.
+
+ * tests/resamplercapi.c tests/resamplehandle.cc: Tests for the
+ resampler C API and the resampler data handle.
+
+ * Makefile.am: Adapted Makefile rules for the new resampler sources.
+
+ * tests/Makefile.am: Adapted Makefile rules for the new resampler
+ tests. Fixed subnormals_CXXFLAGS, which now includes AM_CXXFLAGS.
+
Sun Sep 10 22:46:55 2006 Tim Janik <timj gtk org>
* gsldatautils.h, gsldatautils.c: added bse_wave_file_from_fbuffer() and
Modified: trunk/bse/Makefile.am
===================================================================
--- trunk/bse/Makefile.am 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/Makefile.am 2006-09-16 08:02:17 UTC (rev 3883)
@@ -55,6 +55,7 @@
bsenote.h bsemidifile.h bseblockutils.hh \
bsecxxvalue.hh bsecxxutils.hh bsecxxbase.hh bsecxxclosure.hh \
bsecxxarg.hh bsecxxmodule.hh bsecxxplugin.hh bseloader.h \
+ bseresampler.hh bseresampler.tcc \
)
# BSE C sources
bse_c_sources = $(strip \
@@ -86,6 +87,7 @@
bsenote.c bsemidifile.c bseblockutils.cc \
bsecxxvalue.cc bsecxxutils.cc bsecxxbase.cc bsecxxclosure.cc \
bsecxxarg.cc bsecxxmodule.cc bsecxxplugin.cc bseloader.c \
+ bseresampler.cc bsedatahandle-resample.cc \
bseloader-aiff.c bseloader-guspatch.cc bseloader-oggvorbis.c bseloader-bsewave.c \
bseloader-mad.c bseloader-wav.c \
)
Modified: trunk/bse/bseblockutils.cc
===================================================================
--- trunk/bse/bseblockutils.cc 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/bseblockutils.cc 2006-09-16 08:02:17 UTC (rev 3883)
@@ -18,6 +18,8 @@
* Boston, MA 02111-1307, USA.
*/
#include "bseblockutils.hh"
+#include "bseresampler.hh"
+#include "bseresampler.tcc"
namespace {
class BlockImpl : virtual public Bse::Block::Impl {
@@ -144,6 +146,12 @@
max_value = maxv;
return square_sum;
}
+ virtual Bse::Resampler::Resampler2*
+ create_resampler2 (BseResampler2Mode mode,
+ BseResampler2Precision precision)
+ {
+ return Bse::Resampler::Resampler2::create_impl<false> (mode, precision);
+ }
};
static BlockImpl default_block_impl;
} // Anon
Modified: trunk/bse/bseblockutils.hh
===================================================================
--- trunk/bse/bseblockutils.hh 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/bseblockutils.hh 2006-09-16 08:02:17 UTC (rev 3883)
@@ -1,5 +1,6 @@
/* BSE - Bedevilled Sound Engine
* Copyright (C) 2006 Tim Janik
+ * Copyright (C) 2006 Stefan Westerfeld
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -76,6 +77,7 @@
G_END_DECLS
#ifdef __cplusplus
+#include <bse/bseresampler.hh>
namespace Bse {
/* --- C++ API --- */
@@ -124,6 +126,13 @@
const float *ivalues,
float& min_value,
float& max_value) { return singleton->range_and_square_sum (n_values, ivalues, min_value, max_value); }
+
+ typedef Resampler::Resampler2 Resampler2;
+ static inline
+ Resampler2* create_resampler2 (BseResampler2Mode mode,
+ BseResampler2Precision precision) { return singleton->create_resampler2 (mode, precision); }
+
+
class Impl {
protected:
@@ -159,7 +168,10 @@
const float *ivalues,
float& min_value,
float& max_value) = 0;
- friend class Block;
+ virtual
+ Resampler2* create_resampler2 (BseResampler2Mode mode,
+ BseResampler2Precision precision) = 0;
+ friend class Block;
static void substitute (Impl *substitute_impl);
};
static Impl* default_singleton ();
Added: trunk/bse/bsedatahandle-resample.cc
===================================================================
--- trunk/bse/bsedatahandle-resample.cc 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/bsedatahandle-resample.cc 2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,312 @@
+/* BSE Resampling Datahandles
+ * Copyright (C) 2001-2003 Tim Janik
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "bseresampler.hh"
+#include "gsldatahandle.h"
+#include <sfi/sficxx.hh>
+#include <vector>
+
+namespace Bse
+{
+
+namespace Resampler
+{
+
+}
+
+using std::vector;
+using Resampler::Resampler2;
+
+class DataHandleUpsample2 : public GslDataHandle,
+ public Sfi::GNewable /* 0 initialization */
+{
+ GslDataHandle *src_handle;
+ int precision_bits;
+
+public:
+ gboolean init_ok;
+ vector<Resampler2 *> upsamplers;
+ int64 pcm_frame;
+ vector<float> pcm_data;
+ int64 frame_size;
+ int64 filter_delay;
+ int64 filter_order;
+
+ DataHandleUpsample2 (GslDataHandle *src_handle, int precision_bits)
+ : src_handle (src_handle),
+ precision_bits (precision_bits),
+ init_ok (false)
+ {
+ g_return_if_fail (src_handle != NULL);
+
+ init_ok = gsl_data_handle_common_init (this, NULL);
+ if (init_ok)
+ {
+ name = g_strconcat (src_handle->name, "// #upsample2 /", NULL);
+ src_handle = gsl_data_handle_ref (src_handle);
+ }
+ }
+
+ ~DataHandleUpsample2()
+ {
+ gsl_data_handle_unref (src_handle);
+ gsl_data_handle_common_free (this);
+ }
+
+ BseErrorType
+ open (GslDataHandleSetup *setup)
+ {
+ BseErrorType error = gsl_data_handle_open (src_handle);
+ if (error != BSE_ERROR_NONE)
+ return error;
+
+ *setup = src_handle->setup; /* copies setup.xinfos by pointer */
+ setup->mix_freq *= 2.0;
+ setup->n_values *= 2;
+
+ frame_size = 1024 * setup->n_channels;
+ pcm_frame = -2;
+ pcm_data.resize (frame_size);
+
+ for (guint i = 0; i < setup->n_channels; i++)
+ {
+ BseResampler2Precision precision = static_cast<BseResampler2Precision> (precision_bits);
+ Resampler2 *resampler = Resampler2::create (BSE_RESAMPLER2_MODE_UPSAMPLE, precision);
+ g_assert (resampler); /* FIXME: better error handling */
+
+ upsamplers.push_back (resampler);
+ filter_order = resampler->order();
+
+ g_assert (filter_order % 2 == 0);
+ filter_delay = filter_order / 2 + 1;
+ }
+ return BSE_ERROR_NONE;
+ }
+
+ void
+ close()
+ {
+ for (guint i = 0; i < setup.n_channels; i++)
+ delete upsamplers[i];
+
+ upsamplers.clear();
+ pcm_data.clear();
+
+ setup.xinfos = NULL; /* cleanup pointer reference */
+ gsl_data_handle_close (src_handle);
+ }
+
+ int64
+ src_read (int64 voffset,
+ int64 n_values,
+ gfloat *values)
+ {
+ voffset += filter_delay * setup.n_channels; /* compensate filter delay */
+
+ int64 left = n_values;
+ do
+ {
+ int64 l;
+ if (voffset >= 0 && voffset < src_handle->setup.n_values)
+ {
+ l = gsl_data_handle_read (src_handle, voffset, std::min (left, src_handle->setup.n_values - voffset), values);
+ if (l < 0)
+ return l; /* pass on errors */
+ }
+ else
+ {
+ /*
+ * the resampler needs data before and after the data provided
+ * by src_handle; so we offer zero values here
+ */
+ *values = 0;
+ l = 1;
+ }
+
+ voffset += l;
+ left -= l;
+ values += l;
+ }
+ while (left > 0);
+
+ return n_values;
+ }
+
+ void
+ deinterleave (float* src, float *dest, int64 n_values)
+ {
+ const int64 n_channels = setup.n_channels;
+
+ for (int64 ch = 0; ch < n_channels; ch++)
+ for (int64 v = ch; v < n_values; v += n_channels)
+ *dest++ = src[v];
+ }
+
+ void
+ interleave (float* src, float *dest, int64 n_values)
+ {
+ const int64 n_channels = setup.n_channels;
+
+ for (int64 ch = 0; ch < n_channels; ch++)
+ for (int64 v = ch; v < n_values; v += n_channels)
+ dest[v] = *src++;
+ }
+
+ int64
+ prepare_filter_history (int64 frame)
+ {
+ const int64 n_channels = setup.n_channels;
+ const int64 n_input_samples = filter_order;
+
+ float input_interleaved[n_input_samples * n_channels];
+ float input[n_input_samples * n_channels];
+
+ int64 l = src_read (frame * frame_size / 2 - n_input_samples * n_channels,
+ n_input_samples * n_channels, input_interleaved);
+ if (l < 0)
+ return l; /* pass on errors */
+
+ deinterleave (input_interleaved, input, n_input_samples * setup.n_channels);
+
+ for (guint ch = 0; ch < setup.n_channels; ch++)
+ {
+ /* we don't need the output, this is just for filling the filter history */
+ float output[n_input_samples * 2];
+
+ upsamplers[ch]->process_block (input + ch * n_input_samples, n_input_samples, output);
+ }
+ return 1;
+ }
+
+ int64
+ read_frame (int64 frame)
+ {
+ /*
+ * if we're seeking (not reading data linearily), we need to reinitialize
+ * the filter history with new values
+ */
+ if (frame != pcm_frame + 1)
+ {
+ int64 l = prepare_filter_history (frame);
+ if (l < 0)
+ return l; /* pass on errors */
+ }
+
+ float input_interleaved[frame_size / 2];
+ float input[frame_size / 2];
+ float output[frame_size];
+
+ int64 l = src_read (frame * frame_size / 2, frame_size / 2, input_interleaved);
+ if (l < 0)
+ return l; /* pass on errors */
+
+ deinterleave (input_interleaved, input, frame_size / 2);
+ for (guint ch = 0; ch < setup.n_channels; ch++)
+ {
+ const int64 output_per_channel = frame_size / setup.n_channels;
+ const int64 input_per_channel = output_per_channel / 2;
+
+ upsamplers[ch]->process_block (input + ch * input_per_channel, input_per_channel, output + ch * output_per_channel);
+ }
+ interleave (output, &pcm_data[0], frame_size);
+
+ pcm_frame = frame;
+ return 1;
+ }
+
+ int64
+ read (int64 voffset,
+ int64 n_values,
+ gfloat *values)
+ {
+ int64 frame = voffset / pcm_data.size();
+ if (frame != pcm_frame)
+ {
+ int64 l = read_frame (frame);
+ if (l < 0)
+ return l;
+ }
+ g_assert (pcm_frame == frame);
+
+ voffset -= pcm_frame * frame_size;
+ g_assert (voffset >= 0);
+
+ n_values = std::min (n_values, frame_size - voffset);
+ for (int64 i = 0; i < n_values; i++)
+ values[i] = pcm_data[voffset + i];
+
+ return n_values;
+ }
+};
+
+} // namespace Bse
+
+using namespace Bse;
+
+static BseErrorType
+dh_upsample2_open (GslDataHandle *dhandle, GslDataHandleSetup *setup)
+{
+ return reinterpret_cast<DataHandleUpsample2 *> (dhandle)->open (setup);
+}
+
+static void
+dh_upsample2_close (GslDataHandle *dhandle)
+{
+ reinterpret_cast<DataHandleUpsample2 *> (dhandle)->close();
+}
+
+
+static void
+dh_upsample2_destroy (GslDataHandle *dhandle)
+{
+ delete reinterpret_cast<DataHandleUpsample2 *> (dhandle);
+}
+
+static int64
+dh_upsample2_read (GslDataHandle *dhandle,
+ int64 voffset,
+ int64 n_values,
+ gfloat *values)
+{
+ return reinterpret_cast<DataHandleUpsample2 *> (dhandle)->read (voffset, n_values, values);
+}
+
+GslDataHandle*
+bse_data_handle_new_upsample2 (GslDataHandle *src_handle, int precision_bits)
+{
+ static GslDataHandleFuncs dh_upsample2_vtable = {
+ dh_upsample2_open,
+ dh_upsample2_read,
+ dh_upsample2_close,
+ NULL,
+ dh_upsample2_destroy,
+ };
+ DataHandleUpsample2 *dhandle = new DataHandleUpsample2 (src_handle, precision_bits);
+ if (dhandle->init_ok)
+ {
+ dhandle->vtable = &dh_upsample2_vtable;
+ return dhandle;
+ }
+ else
+ {
+ delete dhandle;
+ return NULL;
+ }
+}
Added: trunk/bse/bseresampler.cc
===================================================================
--- trunk/bse/bseresampler.cc 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/bseresampler.cc 2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,309 @@
+/* BseResampler - FPU and SSE optimized FIR Resampling code
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#include "bseresampler.hh"
+#include "bseblockutils.hh"
+
+namespace Bse
+{
+
+namespace Resampler
+{
+
+/*---- Resampler2 methods ----*/
+
+Resampler2*
+Resampler2::create (BseResampler2Mode mode,
+ BseResampler2Precision precision)
+{
+ return Block::create_resampler2 (mode, precision);
+}
+
+Resampler2::~Resampler2()
+{
+}
+
+/*---- coefficient sets for Resampler2 ----*/
+/*
+ * halfband FIR filter for factor 2 resampling, created with octave
+ *
+ * design method: windowed sinc, using ultraspherical window
+ *
+ * coefficients = 32
+ * x0 = 1.01065
+ * alpha = 0.75
+ *
+ * design criteria (44100 Hz => 88200 Hz):
+ *
+ * passband = [ 0, 18000 ] 1 - 2^-16 <= H(z) <= 1+2^-16
+ * transition = [ 18000, 26100 ]
+ * stopband = [ 26100, 44100 ] | H(z) | <= -96 dB
+ *
+ * and for 48 kHz => 96 kHz:
+ *
+ * passband = [ 0, 19589 ] 1 - 2^-16 <= H(z) <= 1+2^-16
+ * transition = [ 19588, 29386 ]
+ * stopband = [ 29386, 48000 ] | H(z) | <= -96 dB
+ *
+ * in order to keep the coefficient number down to 32, the filter
+ * does only "almost" fulfill the spec, but its really really close
+ * (no stopband ripple > -95 dB)
+ */
+
+const double Resampler2::halfband_fir_96db_coeffs[32] =
+{
+ -3.48616530828033e-05,
+ 0.000112877490936198,
+ -0.000278961878372482,
+ 0.000590495306376081,
+ -0.00112566995029848,
+ 0.00198635062559427,
+ -0.00330178798332932,
+ 0.00523534239035401,
+ -0.00799905465189065,
+ 0.0118867161189188,
+ -0.0173508611368417,
+ 0.0251928452706978,
+ -0.0370909694665106,
+ 0.057408291607388,
+ -0.102239638342325,
+ 0.317002929635456,
+ /* here, a 0.5 coefficient will be used */
+ 0.317002929635456,
+ -0.102239638342325,
+ 0.0574082916073878,
+ -0.0370909694665105,
+ 0.0251928452706976,
+ -0.0173508611368415,
+ 0.0118867161189186,
+ -0.00799905465189052,
+ 0.0052353423903539,
+ -0.00330178798332923,
+ 0.00198635062559419,
+ -0.00112566995029842,
+ 0.000590495306376034,
+ -0.00027896187837245,
+ 0.000112877490936177,
+ -3.48616530827983e-05
+};
+
+/*
+ * coefficients = 16
+ * x0 = 1.013
+ * alpha = 0.2
+ */
+const double Resampler2::halfband_fir_48db_coeffs[16] =
+{
+ -0.00270578824181636,
+ 0.00566964586625895,
+ -0.0106460585587187,
+ 0.0185209590435965,
+ -0.0310433957594089,
+ 0.0525722488176905,
+ -0.0991138314110143,
+ 0.315921760444802,
+ /* here, a 0.5 coefficient will be used */
+ 0.315921760444802,
+ -0.0991138314110145,
+ 0.0525722488176907,
+ -0.031043395759409,
+ 0.0185209590435966,
+ -0.0106460585587187,
+ 0.00566964586625899,
+ -0.00270578824181638
+};
+
+/*
+ * coefficients = 24
+ * x0 = 1.0105
+ * alpha = 0.93
+ */
+const double Resampler2::halfband_fir_72db_coeffs[24] =
+{
+ -0.0002622341634289771,
+ 0.0007380549701258316,
+ -0.001634275943268986,
+ 0.00315564206632209,
+ -0.005564668530702518,
+ 0.009207977968023688,
+ -0.0145854155294611,
+ 0.02253220964143239,
+ -0.03474055058489597,
+ 0.05556350980411048,
+ -0.1010616834297558,
+ 0.316597934725021,
+ /* here, a 0.5 coefficient will be used */
+ 0.3165979347250216,
+ -0.1010616834297563,
+ 0.0555635098041109,
+ -0.03474055058489638,
+ 0.02253220964143274,
+ -0.01458541552946141,
+ 0.00920797796802395,
+ -0.005564668530702722,
+ 0.003155642066322248,
+ -0.001634275943269096,
+ 0.000738054970125897,
+ -0.0002622341634290046,
+};
+
+/*
+ * coefficients = 42
+ * x0 = 1.0106
+ * alpha = 0.8
+ */
+const double Resampler2::halfband_fir_120db_coeffs[42] = {
+ 2.359361930421347e-06,
+ -9.506281154947505e-06,
+ 2.748456705299089e-05,
+ -6.620621425709478e-05,
+ 0.0001411845354098405,
+ -0.0002752082937581387,
+ 0.0005000548069542907,
+ -0.0008581650926168509,
+ 0.001404290771748464,
+ -0.002207303823772437,
+ 0.003352696749689989,
+ -0.004946913550236211,
+ 0.007125821223639453,
+ -0.01007206140806936,
+ 0.01405163477932994,
+ -0.01949467352546547,
+ 0.02718899890919871,
+ -0.038810852733035,
+ 0.05873397010869939,
+ -0.1030762204838426,
+ 0.317288892550808,
+ /* here, a 0.5 coefficient will be used */
+ 0.3172888925508079,
+ -0.1030762204838425,
+ 0.0587339701086993,
+ -0.03881085273303492,
+ 0.02718899890919862,
+ -0.01949467352546535,
+ 0.01405163477932982,
+ -0.01007206140806923,
+ 0.007125821223639309,
+ -0.004946913550236062,
+ 0.003352696749689839,
+ -0.00220730382377229,
+ 0.001404290771748321,
+ -0.0008581650926167192,
+ 0.0005000548069541726,
+ -0.0002752082937580344,
+ 0.0001411845354097548,
+ -6.620621425702783e-05,
+ 2.748456705294319e-05,
+ -9.506281154917077e-06,
+ 2.359361930409472e-06
+};
+
+/*
+ * coefficients = 52
+ * x0 = 1.0104
+ * alpha = 0.8
+ */
+const double Resampler2::halfband_fir_144db_coeffs[52] = {
+ -1.841826652087099e-07,
+ 8.762360674826639e-07,
+ -2.867933918842901e-06,
+ 7.670965310712155e-06,
+ -1.795091436711159e-05,
+ 3.808294405088742e-05,
+ -7.483688716947913e-05,
+ 0.0001381756990743866,
+ -0.0002421379200249195,
+ 0.0004057667984715052,
+ -0.0006540521320531017,
+ 0.001018873594538604,
+ -0.001539987101083099,
+ 0.002266194978575507,
+ -0.003257014968854008,
+ 0.004585469100383752,
+ -0.006343174213238195,
+ 0.008650017657145861,
+ -0.01167305853124126,
+ 0.01566484143899151,
+ -0.02104586507283325,
+ 0.02859957136356252,
+ -0.04000402932277326,
+ 0.05964131775019404,
+ -0.1036437507243546,
+ 0.3174820359034792,
+ /* here, a 0.5 coefficient will be used */
+ 0.3174820359034791,
+ -0.1036437507243545,
+ 0.05964131775019401,
+ -0.04000402932277325,
+ 0.0285995713635625,
+ -0.02104586507283322,
+ 0.01566484143899148,
+ -0.01167305853124122,
+ 0.008650017657145822,
+ -0.006343174213238157,
+ 0.004585469100383712,
+ -0.003257014968853964,
+ 0.002266194978575464,
+ -0.00153998710108306,
+ 0.001018873594538566,
+ -0.0006540521320530672,
+ 0.0004057667984714751,
+ -0.0002421379200248905,
+ 0.0001381756990743623,
+ -7.483688716946011e-05,
+ 3.808294405087123e-05,
+ -1.795091436709889e-05,
+ 7.670965310702215e-06,
+ -2.867933918835638e-06,
+ 8.762360674786308e-07,
+ -1.841826652067372e-07,
+};
+
+} /* namespace Resampler */
+
+} /* namespace Bse */
+
+/*---- Resampler2 C API ----*/
+
+BseResampler2*
+bse_resampler2_create (BseResampler2Mode mode,
+ BseResampler2Precision precision)
+{
+ return reinterpret_cast<BseResampler2 *> (Bse::Resampler::Resampler2::create (mode, precision));
+}
+
+void
+bse_resampler2_destroy (BseResampler2 *resampler)
+{
+ delete reinterpret_cast<Bse::Resampler::Resampler2 *> (resampler);
+}
+
+void
+bse_resampler2_process_block (BseResampler2 *resampler,
+ const float *input,
+ unsigned int n_input_samples,
+ float *output)
+{
+ reinterpret_cast<Bse::Resampler::Resampler2 *> (resampler)->process_block (input, n_input_samples, output);
+}
+
+guint
+bse_resampler2_order (BseResampler2 *resampler)
+{
+ return reinterpret_cast<Bse::Resampler::Resampler2 *> (resampler)->order();
+}
Added: trunk/bse/bseresampler.hh
===================================================================
--- trunk/bse/bseresampler.hh 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/bseresampler.hh 2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,135 @@
+/* BseResampler - FPU and SSE optimized FIR Resampling code
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#ifndef __BSE_RESAMPLER_HH__
+#define __BSE_RESAMPLER_HH__
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef struct BseResampler2 BseResampler2;
+
+/* keep synchronized with corresponding factory enum */
+typedef enum /*< skip >*/
+{
+ BSE_RESAMPLER2_MODE_UPSAMPLE,
+ BSE_RESAMPLER2_MODE_DOWNSAMPLE
+} BseResampler2Mode;
+
+/* keep synchronized with corresponding factory enum */
+typedef enum /*< skip >*/
+{
+ BSE_RESAMPLER2_PREC_48DB = 8,
+ BSE_RESAMPLER2_PREC_72DB = 12,
+ BSE_RESAMPLER2_PREC_96DB = 16,
+ BSE_RESAMPLER2_PREC_120DB = 20,
+ BSE_RESAMPLER2_PREC_144DB = 24
+} BseResampler2Precision;
+
+BseResampler2* bse_resampler2_create (BseResampler2Mode mode,
+ BseResampler2Precision precision);
+void bse_resampler2_destroy (BseResampler2 *resampler);
+void bse_resampler2_process_block (BseResampler2 *resampler,
+ const float *input,
+ unsigned int n_input_samples,
+ float *output);
+guint bse_resampler2_order (BseResampler2 *resampler);
+
+G_END_DECLS
+
+#ifdef __cplusplus
+
+#include <vector>
+
+namespace Bse
+{
+
+namespace Resampler
+{
+
+/**
+ * Interface for factor 2 resampling classes
+ */
+class Resampler2 {
+public:
+ /**
+ * creates a resampler instance fulfilling a given specification
+ */
+ static Resampler2* create (BseResampler2Mode mode,
+ BseResampler2Precision precision);
+ /**
+ * virtual destructor for abstract class
+ */
+ virtual ~Resampler2();
+
+ /**
+ * resample a data block
+ */
+ virtual void process_block (const float *input, unsigned int n_input_samples, float *output) = 0;
+
+ /**
+ * return FIR filter order
+ */
+ virtual guint order() const = 0;
+
+protected:
+ static const double halfband_fir_48db_coeffs[16];
+ static const double halfband_fir_72db_coeffs[24];
+ static const double halfband_fir_96db_coeffs[32];
+ static const double halfband_fir_120db_coeffs[42];
+ static const double halfband_fir_144db_coeffs[52];
+
+ /* Creates implementation from filter coefficients and Filter implementation class
+ *
+ * Since up- and downsamplers use different (scaled) coefficients, its possible
+ * to specify a scaling factor. Usually 2 for upsampling and 1 for downsampling.
+ */
+ template<class Filter> static inline Resampler2*
+ create_impl_with_coeffs (const double *d,
+ guint order,
+ double scaling)
+ {
+ float taps[order];
+ for (guint i = 0; i < order; i++)
+ taps[i] = d[i] * scaling;
+
+ Resampler2 *filter = new Filter (taps);
+ g_assert (order == filter->order());
+ return filter;
+ }
+
+public: // FIXME: friend class Bse::Block::Impl;
+ /* creates the actual implementation; specifying USE_SSE=true will use
+ * SSE instructions, USE_SSE=false will use FPU instructions
+ *
+ * Don't use this directly - it's only public in order to be used by
+ * bseblockutils.cc's anonymous Impl classes.
+ */
+ template<bool USE_SSE> static inline Resampler2*
+ create_impl (BseResampler2Mode mode,
+ BseResampler2Precision precision);
+};
+
+} /* namespace Resampler */
+
+} /* namespace Bse */
+
+#endif /* __cplusplus */
+
+#endif /* __BSE_RESAMPLER_HH__ */
Added: trunk/bse/bseresampler.tcc
===================================================================
--- trunk/bse/bseresampler.tcc 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/bseresampler.tcc 2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,546 @@
+/* BseResampler - FPU and SSE optimized FIR Resampling code
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#ifndef __BSE_RESAMPLER_TCC__
+#define __BSE_RESAMPLER_TCC__
+
+#include <glib.h>
+#include <vector>
+#include <bse/bseresampler.hh>
+#include <birnet/birnetutils.h>
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
+namespace Bse
+{
+
+namespace Resampler
+{
+
+using std::vector;
+using std::min;
+using std::max;
+using std::copy;
+
+/* see: http://ds9a.nl/gcc-simd/ */
+union F4Vector
+{
+ __m128 v; // vector of four single floats
+ float f[4];
+};
+
+/**
+ * FIR filter routine
+ *
+ * A FIR filter has the characteristic that it has a finite impulse response,
+ * and can be computed by convolution of the input signal with that finite
+ * impulse response.
+ *
+ * Thus, we use this for computing the output of the FIR filter
+ *
+ * output = input[0] * taps[0] + input[1] * taps[1] + ... + input[N-1] * taps[N-1]
+ *
+ * where input is the input signal, taps are the filter coefficients, in
+ * other texts sometimes called h[0]..h[N-1] (impulse response) or a[0]..a[N-1]
+ * (non recursive part of a digital filter), and N is the filter order.
+ */
+template<class Accumulator> static inline Accumulator
+fir_process_one_sample (const float *input,
+ const float *taps, /* [0..order-1] */
+ const guint order)
+{
+ Accumulator out = 0;
+ for (guint i = 0; i < order; i++)
+ out += input[i] * taps[i];
+ return out;
+}
+
+/**
+ * FIR filter routine for 4 samples simultaneously
+ *
+ * This routine produces (approximately) the same result as fir_process_one_sample
+ * but computes four consecutive output values at once using vectorized SSE
+ * instructions. Note that input and sse_taps need to be 16-byte aligned here.
+ *
+ * Also note that sse_taps is not a plain impulse response here, but a special
+ * version that needs to be computed with fir_compute_sse_taps.
+ */
+static inline void
+fir_process_4samples_sse (const float *input,
+ const float *sse_taps,
+ const guint order,
+ float *out0,
+ float *out1,
+ float *out2,
+ float *out3)
+#ifdef __SSE__
+{
+ /* input and taps must be 16-byte aligned */
+ const F4Vector *input_v = reinterpret_cast<const F4Vector *> (input);
+ const F4Vector *sse_taps_v = reinterpret_cast<const F4Vector *> (sse_taps);
+ F4Vector out0_v, out1_v, out2_v, out3_v;
+
+ out0_v.v = _mm_mul_ps (input_v[0].v, sse_taps_v[0].v);
+ out1_v.v = _mm_mul_ps (input_v[0].v, sse_taps_v[1].v);
+ out2_v.v = _mm_mul_ps (input_v[0].v, sse_taps_v[2].v);
+ out3_v.v = _mm_mul_ps (input_v[0].v, sse_taps_v[3].v);
+
+ for (guint i = 1; i < (order + 6) / 4; i++)
+ {
+ out0_v.v = _mm_add_ps (out0_v.v, _mm_mul_ps (input_v[i].v, sse_taps_v[i * 4 + 0].v));
+ out1_v.v = _mm_add_ps (out1_v.v, _mm_mul_ps (input_v[i].v, sse_taps_v[i * 4 + 1].v));
+ out2_v.v = _mm_add_ps (out2_v.v, _mm_mul_ps (input_v[i].v, sse_taps_v[i * 4 + 2].v));
+ out3_v.v = _mm_add_ps (out3_v.v, _mm_mul_ps (input_v[i].v, sse_taps_v[i * 4 + 3].v));
+ }
+
+ *out0 = out0_v.f[0] + out0_v.f[1] + out0_v.f[2] + out0_v.f[3];
+ *out1 = out1_v.f[0] + out1_v.f[1] + out1_v.f[2] + out1_v.f[3];
+ *out2 = out2_v.f[0] + out2_v.f[1] + out2_v.f[2] + out2_v.f[3];
+ *out3 = out3_v.f[0] + out3_v.f[1] + out3_v.f[2] + out3_v.f[3];
+}
+#else
+{
+ g_assert_not_reached();
+}
+#endif
+
+
+/**
+ * fir_compute_sse_taps takes a normal vector of FIR taps as argument and
+ * computes a specially scrambled version of these taps, ready to be used
+ * for SSE operations (by fir_process_4samples_sse).
+ *
+ * we require a special ordering of the FIR taps, to get maximum benefit of the SSE operations
+ *
+ * example: suppose the FIR taps are [ x1 x2 x3 x4 x5 x6 x7 x8 x9 ], then the SSE taps become
+ *
+ * [ x1 x2 x3 x4 0 x1 x2 x3 0 0 x1 x2 0 0 0 x1 <- for input[0]
+ * x5 x6 x7 x8 x4 x5 x6 x7 x3 x4 x5 x6 x2 x3 x4 x5 <- for input[1]
+ * x9 0 0 0 x8 x9 0 0 x7 x8 x9 0 x6 x7 x8 x9 ] <- for input[2]
+ * \------------/\-----------/\-----------/\-----------/
+ * for out0 for out1 for out2 for out3
+ *
+ * so that we can compute out0, out1, out2 and out3 simultaneously
+ * from input[0]..input[2]
+ */
+static inline vector<float>
+fir_compute_sse_taps (const vector<float>& taps)
+{
+ const int order = taps.size();
+ vector<float> sse_taps ((order + 6) / 4 * 16);
+
+ for (int j = 0; j < 4; j++)
+ for (int i = 0; i < order; i++)
+ {
+ int k = i + j;
+ sse_taps[(k / 4) * 16 + (k % 4) + j * 4] = taps[i];
+ }
+
+ return sse_taps;
+}
+
+/* Helper class to allocate aligned memory */
+template<class T, int ALIGN>
+class AlignedMem {
+ unsigned char *unaligned_mem;
+ T *data;
+ unsigned int n_elements;
+
+ void
+ allocate_aligned_data()
+ {
+ g_assert ((ALIGN % sizeof (T)) == 0);
+ data = reinterpret_cast<T *> (birnet_malloc_aligned (n_elements * sizeof (T), ALIGN, &unaligned_mem));
+ }
+ /* no copy constructor */
+ AlignedMem (const AlignedMem&);
+ /* no assignment operator */
+ AlignedMem& operator= (const AlignedMem&);
+public:
+ AlignedMem (const vector<T>& elements)
+ : n_elements (elements.size())
+ {
+ allocate_aligned_data();
+
+ for (unsigned int i = 0; i < n_elements; i++)
+ new (data + i) T(elements[i]);
+ }
+ AlignedMem (unsigned int n_elements)
+ : n_elements (n_elements)
+ {
+ allocate_aligned_data();
+
+ for (unsigned int i = 0; i < n_elements; i++)
+ new (data + i) T();
+ }
+ ~AlignedMem()
+ {
+ /* C++ destruction order: last allocated element is deleted first */
+ while (n_elements)
+ data[--n_elements].~T();
+
+ g_free (unaligned_mem);
+ }
+ T&
+ operator[] (unsigned int pos)
+ {
+ return data[pos];
+ }
+ const T&
+ operator[] (unsigned int pos) const
+ {
+ return data[pos];
+ }
+ unsigned int
+ size()
+ {
+ return n_elements;
+ }
+};
+
+/**
+ * Factor 2 upsampling of a data stream
+ *
+ * Template arguments:
+ * ORDER number of resampling filter coefficients
+ * USE_SSE whether to use SSE (vectorized) instructions or not
+ */
+template<guint ORDER, bool USE_SSE>
+class Upsampler2 : public Resampler2 {
+ vector<float> taps;
+ AlignedMem<float,16> history;
+ AlignedMem<float,16> sse_taps;
+protected:
+ /* fast SSE optimized convolution */
+ void
+ process_4samples_aligned (const float *input /* aligned */,
+ float *output)
+ {
+ const guint H = (ORDER / 2) - 1; /* half the filter length */
+
+ output[0] = input[H];
+ output[2] = input[H + 1];
+ output[4] = input[H + 2];
+ output[6] = input[H + 3];
+
+ fir_process_4samples_sse (input, &sse_taps[0], ORDER, &output[1], &output[3], &output[5], &output[7]);
+ }
+ /* slow convolution */
+ void
+ process_sample_unaligned (const float *input,
+ float *output)
+ {
+ const guint H = (ORDER / 2) - 1; /* half the filter length */
+ output[0] = input[H];
+ output[1] = fir_process_one_sample<float> (&input[0], &taps[0], ORDER);
+ }
+ void
+ process_block_aligned (const float *input,
+ guint n_input_samples,
+ float *output)
+ {
+ unsigned int i = 0;
+ if (USE_SSE)
+ {
+ while (i + 3 < n_input_samples)
+ {
+ process_4samples_aligned (&input[i], &output[i*2]);
+ i += 4;
+ }
+ }
+ while (i < n_input_samples)
+ {
+ process_sample_unaligned (&input[i], &output[2*i]);
+ i++;
+ }
+ }
+ void
+ process_block_unaligned (const float *input,
+ guint n_input_samples,
+ float *output)
+ {
+ unsigned int i = 0;
+ if (USE_SSE)
+ {
+ while ((reinterpret_cast<ptrdiff_t> (&input[i]) & 15) && i < n_input_samples)
+ {
+ process_sample_unaligned (&input[i], &output[2 * i]);
+ i++;
+ }
+ }
+ process_block_aligned (&input[i], n_input_samples - i, &output[2 * i]);
+ }
+public:
+ /**
+ * Constructs an Upsampler2 object with a given set of filter coefficients.
+ *
+ * init_taps: coefficients for the upsampling FIR halfband filter
+ */
+ Upsampler2 (float *init_taps)
+ : taps (init_taps, init_taps + ORDER),
+ history (2 * ORDER),
+ sse_taps (fir_compute_sse_taps (taps))
+ {
+ g_assert ((ORDER & 1) == 0); /* even order filter */
+ }
+ /**
+ * The function process_block() takes a block of input samples and produces a
+ * block with twice the length, containing interpolated output samples.
+ */
+ void
+ process_block (const float *input,
+ guint n_input_samples,
+ float *output)
+ {
+ unsigned int history_todo = min (n_input_samples, ORDER);
+
+ copy (input, input + history_todo, &history[ORDER]);
+ process_block_aligned (&history[0], history_todo, output);
+ if (n_input_samples >= ORDER)
+ {
+ process_block_unaligned (input, n_input_samples - history_todo, &output [2 * history_todo]);
+
+ // build new history from new input
+ copy (input + n_input_samples - ORDER, input + n_input_samples, &history[0]);
+ }
+ else
+ {
+ // build new history from end of old history
+ // (very expensive if n_input_samples tends to be a lot smaller than ORDER often)
+ g_memmove (&history[0], &history[n_input_samples], sizeof (history[0]) * ORDER);
+ }
+ }
+ /**
+ * Returns the FIR filter order.
+ */
+ guint
+ order() const
+ {
+ return ORDER;
+ }
+};
+
+/**
+ * Factor 2 downsampling of a data stream
+ *
+ * Template arguments:
+ * ORDER number of resampling filter coefficients
+ * USE_SSE whether to use SSE (vectorized) instructions or not
+ */
+template<guint ORDER, bool USE_SSE>
+class Downsampler2 : public Resampler2 {
+ vector<float> taps;
+ AlignedMem<float,16> history_even;
+ AlignedMem<float,16> history_odd;
+ AlignedMem<float,16> sse_taps;
+ /* fast SSE optimized convolution */
+ template<int ODD_STEPPING> void
+ process_4samples_aligned (const float *input_even /* aligned */,
+ const float *input_odd,
+ float *output)
+ {
+ const guint H = (ORDER / 2) - 1; /* half the filter length */
+
+ fir_process_4samples_sse (input_even, &sse_taps[0], ORDER, &output[0], &output[1], &output[2], &output[3]);
+
+ output[0] += 0.5 * input_odd[H * ODD_STEPPING];
+ output[1] += 0.5 * input_odd[(H + 1) * ODD_STEPPING];
+ output[2] += 0.5 * input_odd[(H + 2) * ODD_STEPPING];
+ output[3] += 0.5 * input_odd[(H + 3) * ODD_STEPPING];
+ }
+ /* slow convolution */
+ template<int ODD_STEPPING> float
+ process_sample_unaligned (const float *input_even,
+ const float *input_odd)
+ {
+ const guint H = (ORDER / 2) - 1; /* half the filter length */
+
+ return fir_process_one_sample<float> (&input_even[0], &taps[0], ORDER) + 0.5 * input_odd[H * ODD_STEPPING];
+ }
+ template<int ODD_STEPPING> void
+ process_block_aligned (const float *input_even,
+ const float *input_odd,
+ float *output,
+ guint n_output_samples)
+ {
+ unsigned int i = 0;
+ if (USE_SSE)
+ {
+ while (i + 3 < n_output_samples)
+ {
+ process_4samples_aligned<ODD_STEPPING> (&input_even[i], &input_odd[i * ODD_STEPPING], &output[i]);
+ i += 4;
+ }
+ }
+ while (i < n_output_samples)
+ {
+ output[i] = process_sample_unaligned<ODD_STEPPING> (&input_even[i], &input_odd[i * ODD_STEPPING]);
+ i++;
+ }
+ }
+ template<int ODD_STEPPING> void
+ process_block_unaligned (const float *input_even,
+ const float *input_odd,
+ float *output,
+ guint n_output_samples)
+ {
+ unsigned int i = 0;
+ if (USE_SSE)
+ {
+ while ((reinterpret_cast<ptrdiff_t> (&input_even[i]) & 15) && i < n_output_samples)
+ {
+ output[i] = process_sample_unaligned<ODD_STEPPING> (&input_even[i], &input_odd[i * ODD_STEPPING]);
+ i++;
+ }
+ }
+ process_block_aligned<ODD_STEPPING> (&input_even[i], &input_odd[i * ODD_STEPPING], &output[i], n_output_samples);
+ }
+ void
+ deinterleave2 (const float *data,
+ guint n_data_values,
+ float *output)
+ {
+ for (unsigned int i = 0; i < n_data_values; i += 2)
+ output[i / 2] = data[i];
+ }
+public:
+ /**
+ * Constructs a Downsampler2 class using a given set of filter coefficients.
+ *
+ * init_taps: coefficients for the downsampling FIR halfband filter
+ */
+ Downsampler2 (float *init_taps)
+ : taps (init_taps, init_taps + ORDER),
+ history_even (2 * ORDER),
+ history_odd (2 * ORDER),
+ sse_taps (fir_compute_sse_taps (taps))
+ {
+ g_assert ((ORDER & 1) == 0); /* even order filter */
+ }
+ /**
+ * The function process_block() takes a block of input samples and produces
+ * a block with half the length, containing downsampled output samples.
+ */
+ void
+ process_block (const float *input,
+ guint n_input_samples,
+ float *output)
+ {
+ g_assert ((n_input_samples & 1) == 0);
+
+ const unsigned int BLOCKSIZE = 1024;
+
+ F4Vector block[BLOCKSIZE / 4]; /* using F4Vector ensures 16-byte alignment */
+ float *input_even = &block[0].f[0];
+
+ while (n_input_samples)
+ {
+ unsigned int n_input_todo = min (n_input_samples, BLOCKSIZE * 2);
+
+ /*
+ * since the halfband filter contains zeros every other sample
+ * and since we're using SSE instructions, which expect the
+ * data to be consecutively represented in memory, we prepare
+ * a block of samples containing only even-indexed samples
+ *
+ * we keep the deinterleaved data on the stack (instead of per-class
+ * allocated memory), to ensure that even running a lot of these
+ * downsampler streams will not result in cache trashing
+ */
+ /*
+ * FIXME: this implementation is suboptimal for non-SSE, because it
+ * performs an extra deinterleaving step in any case, but deinterleaving
+ * is only required for SSE instructions
+ */
+ deinterleave2 (input, n_input_todo, input_even);
+
+ const float *input_odd = input + 1; /* we process this one with a stepping of 2 */
+
+ const unsigned int n_output_todo = n_input_todo / 2;
+ const unsigned int history_todo = min (n_output_todo, ORDER);
+
+ copy (input_even, input_even + history_todo, &history_even[ORDER]);
+ deinterleave2 (input_odd, history_todo * 2, &history_odd[ORDER]);
+
+ process_block_aligned <1> (&history_even[0], &history_odd[0], output, history_todo);
+ if (n_output_todo >= ORDER)
+ {
+ process_block_unaligned<2> (input_even, input_odd, &output[history_todo], n_output_todo - history_todo);
+
+ // build new history from new input
+ copy (input_even + n_output_todo - ORDER, input_even + n_output_todo, &history_even[0]);
+ deinterleave2 (input_odd + n_input_todo - ORDER * 2, ORDER * 2, &history_odd[0]); /* FIXME: can be optimized */
+ }
+ else
+ {
+ // build new history from end of old history
+ // (very expensive if n_output_todo tends to be a lot smaller than ORDER often)
+ g_memmove (&history_even[0], &history_even[n_output_todo], sizeof (history_even[0]) * ORDER);
+ g_memmove (&history_odd[0], &history_odd[n_output_todo], sizeof (history_odd[0]) * ORDER);
+ }
+
+ n_input_samples -= n_input_todo;
+ input += n_input_todo;
+ output += n_output_todo;
+ }
+ }
+ /**
+ * Returns the filter order.
+ */
+ guint
+ order() const
+ {
+ return ORDER;
+ }
+};
+
+template<bool USE_SSE> Resampler2*
+Resampler2::create_impl (BseResampler2Mode mode,
+ BseResampler2Precision precision)
+{
+ if (mode == BSE_RESAMPLER2_MODE_UPSAMPLE)
+ {
+ switch (precision)
+ {
+ case BSE_RESAMPLER2_PREC_48DB: return create_impl_with_coeffs <Upsampler2<16, USE_SSE> > (halfband_fir_48db_coeffs, 16, 2.0);
+ case BSE_RESAMPLER2_PREC_72DB: return create_impl_with_coeffs <Upsampler2<24, USE_SSE> > (halfband_fir_72db_coeffs, 24, 2.0);
+ case BSE_RESAMPLER2_PREC_96DB: return create_impl_with_coeffs <Upsampler2<32, USE_SSE> > (halfband_fir_96db_coeffs, 32, 2.0);
+ case BSE_RESAMPLER2_PREC_120DB: return create_impl_with_coeffs <Upsampler2<42, USE_SSE> > (halfband_fir_120db_coeffs, 42, 2.0);
+ case BSE_RESAMPLER2_PREC_144DB: return create_impl_with_coeffs <Upsampler2<52, USE_SSE> > (halfband_fir_144db_coeffs, 52, 2.0);
+ }
+ }
+ else if (mode == BSE_RESAMPLER2_MODE_DOWNSAMPLE)
+ {
+ switch (precision)
+ {
+ case BSE_RESAMPLER2_PREC_48DB: return create_impl_with_coeffs <Downsampler2<16, USE_SSE> > (halfband_fir_48db_coeffs, 16, 1.0);
+ case BSE_RESAMPLER2_PREC_72DB: return create_impl_with_coeffs <Downsampler2<24, USE_SSE> > (halfband_fir_72db_coeffs, 24, 1.0);
+ case BSE_RESAMPLER2_PREC_96DB: return create_impl_with_coeffs <Downsampler2<32, USE_SSE> > (halfband_fir_96db_coeffs, 32, 1.0);
+ case BSE_RESAMPLER2_PREC_120DB: return create_impl_with_coeffs <Downsampler2<42, USE_SSE> > (halfband_fir_120db_coeffs, 42, 1.0);
+ case BSE_RESAMPLER2_PREC_144DB: return create_impl_with_coeffs <Downsampler2<52, USE_SSE> > (halfband_fir_144db_coeffs, 52, 1.0);
+ }
+ }
+ return 0;
+}
+
+} /* namespace Resampler */
+
+} /* namespace Bse */
+
+#endif /* __BSE_RESAMPLER_TCC__ */
Modified: trunk/bse/gsldatahandle.h
===================================================================
--- trunk/bse/gsldatahandle.h 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/gsldatahandle.h 2006-09-16 08:02:17 UTC (rev 3883)
@@ -1,5 +1,6 @@
/* GSL - Generic Sound Layer
* Copyright (C) 2001-2003 Tim Janik
+ * Copyright (C) 2006 Stefan Westerfeld
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -111,6 +112,11 @@
int64 loop_first,
int64 loop_last);
+/* --- resampling datahandles with the factor 2 --- */
+GslDataHandle* bse_data_handle_new_upsample2 (GslDataHandle *src_handle, // implemented in bsedatahandle-resample.cc
+ int precision_bits);
+GslDataHandle* bse_data_handle_new_downsample2 (GslDataHandle *src_handle); // implemented in bsedatahandle-resample.cc
+
/* --- xinfo handling --- */
GslDataHandle* gsl_data_handle_new_add_xinfos (GslDataHandle *src_handle,
gchar **xinfos);
Modified: trunk/bse/tests/.cvsignore
===================================================================
--- trunk/bse/tests/.cvsignore 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/tests/.cvsignore 2006-09-16 08:02:17 UTC (rev 3883)
@@ -8,3 +8,6 @@
subnormals
testcxx
testfft
+resamplercapi
+testresampler
+resamplehandle
Modified: trunk/bse/tests/Makefile.am
===================================================================
--- trunk/bse/tests/Makefile.am 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/tests/Makefile.am 2006-09-16 08:02:17 UTC (rev 3883)
@@ -12,7 +12,7 @@
EXTRA_DIST += arrows.gp filter-defs.gp
-TESTS = testfft loophandle testcxx subnormals blocktests
+TESTS = testfft loophandle testcxx subnormals blocktests testresampler resamplehandle resamplercapi
noinst_PROGRAMS = $(TESTS)
progs_ldadd = $(top_builddir)/bse/libbse.la
testfft_SOURCES = testfft.c cxxdummy.cc
@@ -21,9 +21,15 @@
testcxx_LDADD = $(progs_ldadd)
subnormals_SOURCES = subnormals.cc subnormals-aux.cc
subnormals_LDADD = $(progs_ldadd)
-subnormals_CXXFLAGS = -ffast-math
+subnormals_CXXFLAGS = $(AM_CXXFLAGS) -ffast-math
loophandle_SOURCES = loophandle.c
loophandle_LDADD = $(progs_ldadd)
blocktests_SOURCES = blocktests.cc
blocktests_LDADD = $(progs_ldadd)
-
+testresampler_SOURCES = testresampler.cc
+testresampler_LDADD = $(progs_ldadd)
+resamplehandle_SOURCES = resamplehandle.cc
+resamplehandle_LDADD = $(progs_ldadd)
+resamplercapi_SOURCES = resamplercapi.c
+resamplercapi_LDADD = $(progs_ldadd)
+
Added: trunk/bse/tests/resamplehandle.cc
===================================================================
--- trunk/bse/tests/resamplehandle.cc 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/tests/resamplehandle.cc 2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,225 @@
+/* BSE Resampling Datahandles Test
+ * Copyright (C) 2001-2002 Tim Janik
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#include <bse/gsldatahandle.h>
+#include <bse/bsemain.h>
+#include <bse/bsemathsignal.h>
+#include <bse/gsldatautils.h>
+#include <bse/bseblockutils.hh>
+// #define TEST_VERBOSE
+#include <birnet/birnettests.h>
+#include <vector>
+
+using std::vector;
+using std::max;
+using std::min;
+
+void
+read_through (GslDataHandle *handle)
+{
+ int64 n_values = gsl_data_handle_n_values (handle);
+ int64 offset = 0;
+
+ while (offset < n_values)
+ {
+ gfloat values[1024];
+ int64 values_read = gsl_data_handle_read (handle, offset, 1024, values);
+ g_assert (values_read > 0);
+ offset += values_read;
+ }
+
+ g_assert (offset == n_values);
+}
+
+double
+check (const vector<float>& input, const vector<double>& expected, int n_channels, int precision_bits, double max_db)
+{
+ g_return_val_if_fail (input.size() * 2 == expected.size(), 0);
+ g_return_val_if_fail (input.size() % n_channels == 0, 0);
+
+ GslDataHandle *ihandle = gsl_data_handle_new_mem (n_channels, 32, 44100, 440, input.size(), &input[0], NULL);
+ GslDataHandle *rhandle = bse_data_handle_new_upsample2 (ihandle, precision_bits);
+ gsl_data_handle_unref (ihandle);
+
+ BseErrorType error = gsl_data_handle_open (rhandle);
+ if (error)
+ exit (1);
+
+ double worst_diff, worst_diff_db;
+
+ /* read through the datahandle linearily _twice_ */
+ for (int repeat = 1; repeat <= 2; repeat++)
+ {
+ GslDataPeekBuffer peek_buffer = { +1 /* incremental direction */, 0, };
+ worst_diff = 0.0;
+ for (int64 i = 0; i < rhandle->setup.n_values; i++)
+ {
+ double resampled = gsl_data_handle_peek_value (rhandle, i, &peek_buffer);
+ worst_diff = max (resampled - expected[i], worst_diff);
+ }
+ worst_diff_db = bse_db_from_factor (worst_diff, -200);
+ TPRINT ("linear(%dst read) read worst_diff = %f (%f dB)\n", repeat, worst_diff, worst_diff_db);
+ TASSERT (worst_diff_db < max_db);
+ }
+
+ /* test seeking */
+ worst_diff = 0.0;
+ for (int i = 0; i < 1000; i++)
+ {
+ int64 start = rand() % rhandle->setup.n_values;
+ int64 len = rand() % 1024;
+
+ GslDataPeekBuffer peek_buffer = { +1 /* incremental direction */, 0, };
+ for (int64 i = start; i < std::min (start + len, rhandle->setup.n_values); i++)
+ {
+ double resampled = gsl_data_handle_peek_value (rhandle, i, &peek_buffer);
+ worst_diff = max (resampled - expected[i], worst_diff);
+ }
+ }
+ worst_diff_db = bse_db_from_factor (worst_diff, -200);
+ TPRINT ("seek worst_diff = %f (%f dB)\n", worst_diff, worst_diff_db);
+ TASSERT (worst_diff_db < max_db);
+
+ /* test speed */
+ const guint RUNS = 10;
+ GTimer *timer = g_timer_new();
+ const guint dups = TEST_CALIBRATION (50.0, read_through (rhandle));
+
+ double m = 9e300;
+ for (guint i = 0; i < RUNS; i++)
+ {
+ g_timer_start (timer);
+ for (guint j = 0; j < dups; j++)
+ read_through (rhandle);
+ g_timer_stop (timer);
+ double e = g_timer_elapsed (timer, NULL);
+ if (e < m)
+ m = e;
+ }
+ double samples_per_second = input.size() / (m / dups);
+ TPRINT (" samples / second = %f\n", samples_per_second);
+ TPRINT (" which means the resampler can process %.2f 44100 Hz streams simultaneusly\n", samples_per_second / 44100.0);
+ TPRINT (" or one 44100 Hz stream takes %f %% CPU usage\n", 100.0 / (samples_per_second / 44100.0));
+
+ gsl_data_handle_close (rhandle);
+ gsl_data_handle_unref (rhandle);
+
+ return samples_per_second / 44100.0;
+}
+
+void
+run_tests()
+{
+ struct TestParameters {
+ int bits;
+ double mono_db;
+ double stereo_db;
+ } params[] =
+ {
+ { 8, -48, -48 },
+ { 12, -72, -72 },
+ { 16, -98, -95 },
+ { 20, -120, -117 },
+ { 24, -125, -125 }, /* this is not _really_ 24 bit, because the filter is computed using floats */
+ { 0, 0, 0 }
+ };
+
+ for (int p = 0; params[p].bits; p++)
+ {
+ const int LEN = 44100*10;
+ vector<float> input;
+ vector<double> expected;
+ for (int i = 0; i < LEN; i++)
+ {
+ input.push_back (sin (i * 2 * M_PI * 440.0 / 44100.0) * bse_window_blackman (double (i * 2 - LEN) / LEN));
+
+ double j = i + 0.5; /* compute perfectly interpolated result */
+ expected.push_back (sin (i * 2 * M_PI * 440.0 / 44100.0) * bse_window_blackman (double (i * 2 - LEN) / LEN));
+ expected.push_back (sin (j * 2 * M_PI * 440.0 / 44100.0) * bse_window_blackman (double (j * 2 - LEN) / LEN));
+ }
+
+ // mono test
+ {
+ char *x = g_strdup_printf ("ResampleHandle %dbits mono", params[p].bits);
+ TSTART (x);
+ g_free (x);
+
+ double streams = check (input, expected, 1, params[p].bits, params[p].mono_db);
+ TDONE();
+
+ g_printerr (" ===> speed is equivalent to %.2f simultaneous 44100 Hz streams\n", streams);
+ }
+
+ input.clear();
+ expected.clear();
+ for (int i = 0; i < LEN; i++)
+ {
+ input.push_back (sin (i * 2 * M_PI * 440.0 / 44100.0) * bse_window_blackman (double (i * 2 - LEN) / LEN));
+ input.push_back (sin (i * 2 * M_PI * 1000.0 / 44100.0) * bse_window_blackman (double (i * 2 - LEN) / LEN));
+
+ double j = i + 0.5; /* compute perfectly interpolated result */
+ expected.push_back (sin (i * 2 * M_PI * 440.0 / 44100.0) * bse_window_blackman (double (i * 2 - LEN) / LEN));
+ expected.push_back (sin (i * 2 * M_PI * 1000.0 / 44100.0) * bse_window_blackman (double (i * 2 - LEN) / LEN));
+ expected.push_back (sin (j * 2 * M_PI * 440.0 / 44100.0) * bse_window_blackman (double (j * 2 - LEN) / LEN));
+ expected.push_back (sin (j * 2 * M_PI * 1000.0 / 44100.0) * bse_window_blackman (double (j * 2 - LEN) / LEN));
+ }
+
+ // stereo test
+ {
+ char *x = g_strdup_printf ("ResampleHandle %dbits stereo", params[p].bits);
+ TSTART (x);
+ g_free (x);
+
+ double streams = check (input, expected, 2, params[p].bits, params[p].stereo_db);
+ TDONE();
+
+ g_printerr (" ===> speed is equivalent to %.2f simultaneous 44100 Hz streams\n", streams);
+ }
+ }
+}
+
+int
+main (int argc,
+ char *argv[])
+{
+ birnet_init_test (&argc, &argv);
+ //g_thread_init (NULL);
+ //birnet_init_test (&argc, &argv);
+ //bse_init_intern (&argc, &argv, "TestResample", NULL);
+ //std::set_terminate (__gnu_cxx::__verbose_terminate_handler);
+
+ printf ("*** tests on FPU\n");
+ run_tests();
+
+ /* load plugins */
+ BirnetInitValue config[] = {
+ { "load-core-plugins", "1" },
+ { NULL },
+ };
+ bse_init_test (&argc, &argv, config);
+
+ /* check for possible specialization */
+ if (Bse::Block::default_singleton() == Bse::Block::current_singleton())
+ return 0; /* nothing changed */
+
+ printf ("*** tests on SSE\n");
+ run_tests();
+
+ return 0;
+}
Added: trunk/bse/tests/resamplercapi.c
===================================================================
--- trunk/bse/tests/resamplercapi.c 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/tests/resamplercapi.c 2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,85 @@
+/* BSE Resampling Datahandles Test
+ * Copyright (C) 2001-2002 Tim Janik
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#include <bse/bsemathsignal.h>
+#include <bse/bseresampler.hh>
+#include <bse/bsemain.h>
+// #define TEST_VERBOSE
+#include <birnet/birnettests.h>
+
+static void
+test_c_api()
+{
+ BseResampler2 *resampler = bse_resampler2_create (BSE_RESAMPLER2_MODE_UPSAMPLE, BSE_RESAMPLER2_PREC_96DB);
+ const int INPUT_SIZE = 1024, OUTPUT_SIZE = 2048;
+ float in[INPUT_SIZE];
+ float out[OUTPUT_SIZE];
+ double error = 0;
+ int i;
+
+ for (i = 0; i < INPUT_SIZE; i++)
+ in[i] = sin (i * 440 * 2 * M_PI / 44100) * bse_window_blackman ((double) (i * 2 - INPUT_SIZE) / INPUT_SIZE);
+
+ bse_resampler2_process_block (resampler, in, INPUT_SIZE, out);
+
+ int delay = bse_resampler2_order (resampler) + 2;
+ for (i = 0; i < 2048; i++)
+ {
+ double expected = sin ((i - delay) * 220 * 2 * M_PI / 44100)
+ * bse_window_blackman ((double) ((i - delay) * 2 - OUTPUT_SIZE) / OUTPUT_SIZE);
+ error = MAX (error, out[i] - expected);
+ }
+
+ double error_db = bse_db_from_factor (error, -200);
+
+ bse_resampler2_destroy (resampler);
+
+ TPRINT ("Test C API error: %f\n", error_db);
+ TASSERT (error_db < -95);
+}
+
+int
+main (int argc,
+ char *argv[])
+{
+ birnet_init_test (&argc, &argv);
+
+ TSTART ("Resampler C API (FPU)");
+ test_c_api();
+ TDONE();
+
+ /* load plugins */
+ BirnetInitValue config[] = {
+ { "load-core-plugins", "1" },
+ { NULL },
+ };
+ bse_init_test (&argc, &argv, config);
+
+#if 0 /* FIXME: C? */
+ /* check for possible specialization */
+ if (Bse::Block::default_singleton() == Bse::Block::current_singleton())
+ return 0; /* nothing changed */
+#endif
+
+ TSTART ("Resampler C API (SSE)");
+ test_c_api();
+ TDONE();
+
+ return 0;
+}
Added: trunk/bse/tests/testresampler.cc
===================================================================
--- trunk/bse/tests/testresampler.cc 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/tests/testresampler.cc 2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,408 @@
+/* SSE optimized FIR Resampling code
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#include <bse/bseresampler.hh>
+#include <bse/bseresampler.tcc>
+
+#include <stdio.h>
+#include <math.h>
+#include <vector>
+#include <sys/time.h>
+#include <time.h>
+
+using std::vector;
+using std::min;
+using std::max;
+using std::copy;
+using namespace Bse::Resampler;
+
+#include "testresamplercoeffs.h"
+
+void
+usage ()
+{
+ printf ("usage: testresampler <options> [ <block_size> ] [ <test_frequency> ]\n");
+ printf ("\n");
+ printf (" p - performance\n");
+ printf (" a - accuracy\n");
+ printf (" g - generate output for gnuplot\n");
+ printf (" i - impulse response\n");
+ printf (" F - filter implementation\n");
+ printf ("\n");
+ printf (" d - downsample\n");
+ printf (" u - upsample\n");
+ printf (" s - subsample (= downsample and upsample after that)\n");
+ printf (" o - oversample (= upsample and downsample after that)\n");
+ printf ("\n");
+ printf (" f - fast implementation (sse)\n");
+ printf ("\n");
+ printf (" block_size defaults to 128 values\n");
+ printf (" test_frequency defaults to 440 Hz\n");
+ printf ("\n");
+ printf ("examples:\n");
+ printf (" testresampler puf 256 # check performance of fast upsampling with 256 value blocks\n");
+ printf (" testresampler au # check accuracy of standard upsampling\n");
+ printf (" testresampler au 128 500 # check accuracy of standard upsampling using a 500 Hz frequency\n");
+}
+
+enum TestType
+{
+ TEST_NONE,
+ TEST_PERFORMANCE,
+ TEST_ACCURACY,
+ TEST_GNUPLOT,
+ TEST_IMPULSE,
+ TEST_FILTER_IMPL
+} test_type = TEST_NONE;
+
+enum ResampleType
+{
+ RES_NONE,
+ RES_DOWNSAMPLE,
+ RES_UPSAMPLE,
+ RES_SUBSAMPLE,
+ RES_OVERSAMPLE
+} resample_type = RES_NONE;
+
+enum ImplType
+{
+ IMPL_NORMAL,
+ IMPL_SSE
+} impl_type = IMPL_NORMAL;
+
+unsigned int block_size = 128;
+double test_frequency = 440.0;
+
+double
+gettime ()
+{
+ timeval tv;
+ gettimeofday (&tv, 0);
+
+ return tv.tv_sec + tv.tv_usec / 1000000.0;
+}
+
+int
+test_filter_impl()
+{
+ int errors = 0;
+ printf ("testing filter implementation:\n\n");
+
+ for (guint order = 0; order < 64; order++)
+ {
+ vector<float> taps (order);
+ for (guint i = 0; i < order; i++)
+ taps[i] = i + 1;
+
+ AlignedMem<float,16> sse_taps (fir_compute_sse_taps (taps));
+ for (unsigned int i = 0; i < sse_taps.size(); i++)
+ {
+ printf ("%3d", (int) (sse_taps[i] + 0.5));
+ if (i % 4 == 3)
+ printf (" |");
+ if (i % 16 == 15)
+ printf (" ||| upper bound = %d\n", (order + 6) / 4);
+ }
+ printf ("\n\n");
+
+ AlignedMem<float,16> random_mem (order + 4);
+ for (guint i = 0; i < order + 4; i++)
+ random_mem[i] = 1.0 - rand() / (0.5 * RAND_MAX);
+
+ /* FIXME: the problem with this test is that we explicitely test SSE code
+ * here, but the test case is not compiled with -msse within the BEAST tree
+ */
+ float out[4];
+ fir_process_4samples_sse (&random_mem[0], &sse_taps[0], order,
+ &out[0], &out[1], &out[2], &out[3]);
+
+ double adiff = 0.0;
+ for (int i = 0; i < 4; i++)
+ {
+ double diff = fir_process_one_sample<double> (&random_mem[i], &taps[0], order) - out[i];
+ adiff += fabs (diff);
+ }
+ if (adiff > 0.0001)
+ {
+ printf ("*** order = %d, adiff = %f\n", order, adiff);
+ errors++;
+ }
+ }
+ if (errors)
+ printf ("*** %d errors detected\n", errors);
+ else
+ printf ("filter implementation ok.\n");
+
+ return errors ? 1 : 0;
+}
+
+template <int TEST, int RESAMPLE, int IMPL> int
+perform_test()
+{
+ if (TEST == TEST_IMPULSE) /* need to have space for impulse response for all 4 tests */
+ block_size = 150;
+
+ /* initialize up- and downsampler */
+ const int T = 32;
+ const bool USE_SSE = (IMPL == IMPL_SSE);
+ float utaps[T], dtaps[T];
+ for (int i = 0; i < T; i++)
+ {
+ utaps[i] = halfband_fir_upsample2_96db_coeffs[i] * 2;
+ dtaps[i] = halfband_fir_upsample2_96db_coeffs[i];
+ }
+
+ Upsampler2<T,USE_SSE> ups (utaps);
+ Downsampler2<T,USE_SSE> downs (dtaps);
+
+ F4Vector in_v[block_size / 2 + 1], out_v[block_size / 2 + 1], out2_v[block_size / 2 + 1];
+ float *input = &in_v[0].f[0], *output = &out_v[0].f[0], *output2 = &out2_v[0].f[0]; /* ensure aligned data */
+
+ if (TEST == TEST_PERFORMANCE)
+ {
+ for (unsigned int i = 0; i < block_size; i++)
+ input[i] = sin (i * test_frequency / 44100.0 * 2 * M_PI);
+
+ double start_time = gettime();
+ long long k = 0;
+ for (int i = 0; i < 500000; i++)
+ {
+ if (RESAMPLE == RES_DOWNSAMPLE || RESAMPLE == RES_SUBSAMPLE)
+ {
+ downs.process_block (input, block_size, output);
+ if (RESAMPLE == RES_SUBSAMPLE)
+ ups.process_block (output, block_size / 2, output2);
+ }
+ if (RESAMPLE == RES_UPSAMPLE || RESAMPLE == RES_OVERSAMPLE)
+ {
+ ups.process_block (input, block_size, output);
+ if (RESAMPLE == RES_OVERSAMPLE)
+ downs.process_block (output, block_size * 2, output2);
+ }
+ k += block_size;
+ }
+ double end_time = gettime();
+ if (RESAMPLE == RES_DOWNSAMPLE)
+ {
+ printf (" (performance will be normalized to downsampler output samples)\n");
+ k /= 2;
+ }
+ else if (RESAMPLE == RES_UPSAMPLE)
+ {
+ printf (" (performance will be normalized to upsampler input samples)\n");
+ }
+ printf (" total samples processed = %lld\n", k);
+ printf (" processing_time = %f\n", end_time - start_time);
+ printf (" samples / second = %f\n", k / (end_time - start_time));
+ printf (" which means the resampler can process %.2f 44100 Hz streams simultaneusly\n",
+ k / (end_time - start_time) / 44100.0);
+ printf (" or one 44100 Hz stream takes %f %% CPU usage\n",
+ 100.0 / (k / (end_time - start_time) / 44100.0));
+ }
+ else if (TEST == TEST_ACCURACY || TEST == TEST_GNUPLOT)
+ {
+ long long k = 0;
+ double phase = 0;
+ double max_diff = 0;
+ for (int b = 0; b < 1000; b++)
+ {
+ int misalign = rand() % 4;
+ int bs = rand() % (block_size - misalign);
+
+ if (RESAMPLE == RES_DOWNSAMPLE || RESAMPLE == RES_SUBSAMPLE)
+ bs -= bs & 1;
+
+ for (int i = 0; i < bs; i++)
+ {
+ input[i+misalign] = sin (phase);
+ phase += test_frequency/44100.0 * 2 * M_PI;
+ }
+ if (RESAMPLE == RES_DOWNSAMPLE || RESAMPLE == RES_SUBSAMPLE)
+ {
+ downs.process_block (input + misalign, bs, output);
+ if (RESAMPLE == RES_SUBSAMPLE)
+ ups.process_block (output, bs / 2, output2);
+ }
+ if (RESAMPLE == RES_UPSAMPLE || RESAMPLE == RES_OVERSAMPLE)
+ {
+ ups.process_block (input + misalign, bs, output);
+ if (RESAMPLE == RES_OVERSAMPLE)
+ downs.process_block (output, bs * 2, output2);
+ }
+
+ /* validate output */
+ double sin_shift;
+ double freq_factor;
+ unsigned int out_bs;
+ float *check = output;
+
+ if (RESAMPLE == RES_UPSAMPLE)
+ {
+ sin_shift = 34;
+ freq_factor = 0.5;
+ out_bs = bs * 2;
+ }
+ else if (RESAMPLE == RES_DOWNSAMPLE)
+ {
+ sin_shift = 16.5;
+ freq_factor = 2;
+ out_bs = bs / 2;
+ }
+ else if (RESAMPLE == RES_OVERSAMPLE)
+ {
+ sin_shift = 33.5;
+ freq_factor = 1;
+ check = output2;
+ out_bs = bs;
+ }
+ else if (RESAMPLE == RES_SUBSAMPLE)
+ {
+ sin_shift = 67;
+ freq_factor = 1;
+ check = output2;
+ out_bs = bs;
+ }
+
+ for (unsigned int i = 0; i < out_bs; i++, k++)
+ if (k > 100)
+ {
+ double correct_output = sin ((k - sin_shift) * 2 * freq_factor * test_frequency / 44100.0 * M_PI);
+ if (TEST == TEST_GNUPLOT)
+ printf ("%lld %.17f %.17f\n", k, check[i], correct_output);
+ else
+ max_diff = max (max_diff, check[i] - correct_output);
+ }
+ }
+ double max_diff_db = 20 * log (max_diff) / log (10);
+ if (TEST == TEST_ACCURACY)
+ {
+ printf (" input frequency used to perform test = %.2f Hz (SR = 44100.0 Hz)\n", test_frequency);
+ printf (" max difference between correct and computed output: %f = %f dB\n", max_diff, max_diff_db);
+ }
+ }
+ else if (TEST == TEST_IMPULSE)
+ {
+ input[0] = 1;
+ for (unsigned int i = 1; i < block_size; i++)
+ {
+ input[i] = 0;
+ output[i] = 0;
+ output2[i] = 0;
+ }
+
+ if (RESAMPLE == RES_DOWNSAMPLE || RESAMPLE == RES_SUBSAMPLE)
+ {
+ downs.process_block (input, block_size, output);
+ if (RESAMPLE == RES_SUBSAMPLE)
+ ups.process_block (output, block_size / 2, output2);
+ }
+ if (RESAMPLE == RES_UPSAMPLE || RESAMPLE == RES_OVERSAMPLE)
+ {
+ ups.process_block (input, block_size, output);
+ if (RESAMPLE == RES_OVERSAMPLE)
+ downs.process_block (output, block_size * 2, output2);
+ }
+
+ float *check = output;
+ if (RESAMPLE == RES_OVERSAMPLE || RESAMPLE == RES_SUBSAMPLE)
+ check = output2;
+
+ for (unsigned int i = 0; i < block_size; i++)
+ printf ("%.17f\n", check[i]);
+ }
+ return 0;
+}
+
+template <int TEST, int RESAMPLE> int
+perform_test()
+{
+ switch (impl_type)
+ {
+ case IMPL_SSE: printf ("using SSE instructions\n"); return perform_test<TEST, RESAMPLE, IMPL_SSE> ();
+ case IMPL_NORMAL: printf ("using FPU instructions\n"); return perform_test<TEST, RESAMPLE, IMPL_NORMAL> ();
+ }
+ g_assert_not_reached();
+ return 1;
+}
+
+template <int TEST> int
+perform_test ()
+{
+ switch (resample_type)
+ {
+ case RES_DOWNSAMPLE: printf ("for factor 2 downsampling "); return perform_test<TEST, RES_DOWNSAMPLE> ();
+ case RES_UPSAMPLE: printf ("for factor 2 upsampling "); return perform_test<TEST, RES_UPSAMPLE> ();
+ case RES_SUBSAMPLE: printf ("for factor 2 subsampling "); return perform_test<TEST, RES_SUBSAMPLE> ();
+ case RES_OVERSAMPLE: printf ("for factor 2 oversampling "); return perform_test<TEST, RES_OVERSAMPLE> ();
+ default: usage(); return 1;
+ }
+}
+
+int
+perform_test()
+{
+ switch (test_type)
+ {
+ case TEST_PERFORMANCE: printf ("performance test "); return perform_test<TEST_PERFORMANCE> ();
+ case TEST_ACCURACY: printf ("accuracy test "); return perform_test<TEST_ACCURACY> ();
+ case TEST_GNUPLOT: printf ("# gnuplot test "); return perform_test<TEST_GNUPLOT> ();
+ case TEST_IMPULSE: printf ("# impulse response test "); return perform_test<TEST_IMPULSE> ();
+ case TEST_FILTER_IMPL: return test_filter_impl();
+ default: usage(); return 1;
+ }
+}
+
+int
+main (int argc, char **argv)
+{
+ if (argc >= 2)
+ {
+ for (unsigned int i = 0; i < strlen (argv[1]); i++)
+ {
+ switch (argv[1][i])
+ {
+ case 'F': test_type = TEST_FILTER_IMPL; break;
+ case 'p': test_type = TEST_PERFORMANCE; break;
+ case 'a': test_type = TEST_ACCURACY; break;
+ case 'g': test_type = TEST_GNUPLOT; break;
+ case 'i': test_type = TEST_IMPULSE; break;
+
+ case 'd': resample_type = RES_DOWNSAMPLE; break;
+ case 'u': resample_type = RES_UPSAMPLE; break;
+ case 's': resample_type = RES_SUBSAMPLE; break;
+ case 'o': resample_type = RES_OVERSAMPLE; break;
+
+ case 'f': impl_type = IMPL_SSE; break;
+ }
+ }
+ }
+
+ if (argc >= 3)
+ block_size = atoi (argv[2]);
+
+ if (argc >= 4)
+ test_frequency = atoi (argv[3]);
+
+ if ((block_size & 1) == 1)
+ block_size++;
+
+ if (block_size < 2)
+ block_size = 2;
+
+ return perform_test();
+}
Added: trunk/bse/tests/testresamplercoeffs.h
===================================================================
--- trunk/bse/tests/testresamplercoeffs.h 2006-09-16 07:59:25 UTC (rev 3882)
+++ trunk/bse/tests/testresamplercoeffs.h 2006-09-16 08:02:17 UTC (rev 3883)
@@ -0,0 +1,80 @@
+/* SSE optimized FIR Resampling code
+ * Copyright (C) 2006 Stefan Westerfeld
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * halfband FIR filter for factor 2 resampling, created with octave
+ *
+ * design method: windowed sinc, using ultraspherical window
+ *
+ * coefficients = 32
+ * x0 = 1.01065
+ * alpha = 0.75
+ *
+ * design criteria (44100 Hz => 88200 Hz):
+ *
+ * passband = [ 0, 18000 ] 1 - 2^-16 <= H(z) <= 1+2^-16
+ * transition = [ 18000, 26100 ]
+ * stopband = [ 26100, 44100 ] | H(z) | <= -96 dB
+ *
+ * and for 48 kHz => 96 kHz:
+ *
+ * passband = [ 0, 19589 ] 1 - 2^-16 <= H(z) <= 1+2^-16
+ * transition = [ 19588, 29386 ]
+ * stopband = [ 29386, 48000 ] | H(z) | <= -96 dB
+ *
+ * in order to keep the coefficient number down to 32, the filter
+ * does only "almost" fulfill the spec, but its really really close
+ * (no stopband ripple > -95 dB)
+ */
+
+const double halfband_fir_upsample2_96db_coeffs[32] = {
+ -3.48616530828033e-05,
+ 0.000112877490936198,
+ -0.000278961878372482,
+ 0.000590495306376081,
+ -0.00112566995029848,
+ 0.00198635062559427,
+ -0.00330178798332932,
+ 0.00523534239035401,
+ -0.00799905465189065,
+ 0.0118867161189188,
+ -0.0173508611368417,
+ 0.0251928452706978,
+ -0.0370909694665106,
+ 0.057408291607388,
+ -0.102239638342325,
+ 0.317002929635456,
+ /* here, a 0.5 coefficient will be used */
+ 0.317002929635456,
+ -0.102239638342325,
+ 0.0574082916073878,
+ -0.0370909694665105,
+ 0.0251928452706976,
+ -0.0173508611368415,
+ 0.0118867161189186,
+ -0.00799905465189052,
+ 0.0052353423903539,
+ -0.00330178798332923,
+ 0.00198635062559419,
+ -0.00112566995029842,
+ 0.000590495306376034,
+ -0.00027896187837245,
+ 0.000112877490936177,
+ -3.48616530827983e-05
+};
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]