r3886 - in trunk/bse: . tests
- From: timj svn gnome org
- To: svn-commits-list gnome org
- Subject: r3886 - in trunk/bse: . tests
- Date: Sat, 16 Sep 2006 09:03:30 -0400 (EDT)
Author: timj
Date: 2006-09-16 09:03:27 -0400 (Sat, 16 Sep 2006)
New Revision: 3886
Modified:
trunk/bse/ChangeLog
trunk/bse/bseblockutils.cc
trunk/bse/bsedatahandle-resample.cc
trunk/bse/bseresampler.cc
trunk/bse/bseresampler.hh
trunk/bse/bseresampler.tcc
trunk/bse/gsldatahandle.h
trunk/bse/tests/testresampler.cc
Log:
Sat Sep 16 14:45:49 2006 Tim Janik <timj gtk org>
* bseblockutils.cc: access Resampler2::create_impl() via derivation.
* bseresampler.tcc: whitespace fixups. fixed includes. fixed
unconditional use of __m128. renamed AlignedMem to AlignedArray because
this class implements aligned array accesses instead of a memory
allocator. adapted callers.
* bsedatahandle-resample.cc:
* bseresampler.cc, gsldatahandle.h: whitespace and comment fixups.
* bse/bseresampler.hh: whitespace fixups. made create_impl() non-public,
it can be accessed by blockutil implementations via derivation.
Modified: trunk/bse/ChangeLog
===================================================================
--- trunk/bse/ChangeLog 2006-09-16 08:09:13 UTC (rev 3885)
+++ trunk/bse/ChangeLog 2006-09-16 13:03:27 UTC (rev 3886)
@@ -1,3 +1,18 @@
+Sat Sep 16 14:45:49 2006 Tim Janik <timj gtk org>
+
+ * bseblockutils.cc: access Resampler2::create_impl() via derivation.
+
+ * bseresampler.tcc: whitespace fixups. fixed includes. fixed
+ unconditional use of __m128. renamed AlignedMem to AlignedArray because
+ this class implements aligned array accesses instead of a memory
+ allocator. adapted callers.
+
+ * bsedatahandle-resample.cc:
+ * bseresampler.cc, gsldatahandle.h: whitespace and comment fixups.
+
+ * bse/bseresampler.hh: whitespace fixups. made create_impl() non-public,
+ it can be accessed by blockutil implementations via derivation.
+
Sat Sep 16 09:25:17 2006 Stefan Westerfeld <stefan space twc de>
* bseresampler.tcc bseresampler.cc bseresampler.hh: Added factor 2
Modified: trunk/bse/bseblockutils.cc
===================================================================
--- trunk/bse/bseblockutils.cc 2006-09-16 08:09:13 UTC (rev 3885)
+++ trunk/bse/bseblockutils.cc 2006-09-16 13:03:27 UTC (rev 3886)
@@ -150,7 +150,15 @@
create_resampler2 (BseResampler2Mode mode,
BseResampler2Precision precision)
{
- return Bse::Resampler::Resampler2::create_impl<false> (mode, precision);
+ struct FPUResampler2 : public Bse::Resampler::Resampler2 {
+ static inline Resampler2*
+ create_resampler (BseResampler2Mode mode,
+ BseResampler2Precision precision)
+ {
+ return create_impl<false> (mode, precision);
+ }
+ };
+ return FPUResampler2::create_resampler (mode, precision);
}
};
static BlockImpl default_block_impl;
Modified: trunk/bse/bsedatahandle-resample.cc
===================================================================
--- trunk/bse/bsedatahandle-resample.cc 2006-09-16 08:09:13 UTC (rev 3885)
+++ trunk/bse/bsedatahandle-resample.cc 2006-09-16 13:03:27 UTC (rev 3886)
@@ -17,42 +17,34 @@
* Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307, USA.
*/
-
#include "bseresampler.hh"
#include "gsldatahandle.h"
#include <sfi/sficxx.hh>
#include <vector>
-namespace Bse
-{
-
-namespace Resampler
-{
-
-}
-
-using std::vector;
+namespace Bse {
using Resampler::Resampler2;
+using std::vector;
class DataHandleUpsample2 : public GslDataHandle,
public Sfi::GNewable /* 0 initialization */
{
GslDataHandle *src_handle;
int precision_bits;
-
-public:
- gboolean init_ok;
vector<Resampler2 *> upsamplers;
int64 pcm_frame;
vector<float> pcm_data;
int64 frame_size;
int64 filter_delay;
int64 filter_order;
+public:
+ bool init_ok;
- DataHandleUpsample2 (GslDataHandle *src_handle, int precision_bits)
- : src_handle (src_handle),
- precision_bits (precision_bits),
- init_ok (false)
+ DataHandleUpsample2 (GslDataHandle *src_handle,
+ int precision_bits) :
+ src_handle (src_handle),
+ precision_bits (precision_bits),
+ init_ok (false)
{
g_return_if_fail (src_handle != NULL);
@@ -63,13 +55,11 @@
src_handle = gsl_data_handle_ref (src_handle);
}
}
-
~DataHandleUpsample2()
{
gsl_data_handle_unref (src_handle);
gsl_data_handle_common_free (this);
}
-
BseErrorType
open (GslDataHandleSetup *setup)
{
@@ -99,7 +89,6 @@
}
return BSE_ERROR_NONE;
}
-
void
close()
{
@@ -112,7 +101,6 @@
setup.xinfos = NULL; /* cleanup pointer reference */
gsl_data_handle_close (src_handle);
}
-
int64
src_read (int64 voffset,
int64 n_values,
@@ -148,9 +136,10 @@
return n_values;
}
-
void
- deinterleave (float* src, float *dest, int64 n_values)
+ deinterleave (float *src,
+ float *dest,
+ int64 n_values)
{
const int64 n_channels = setup.n_channels;
@@ -158,9 +147,10 @@
for (int64 v = ch; v < n_values; v += n_channels)
*dest++ = src[v];
}
-
void
- interleave (float* src, float *dest, int64 n_values)
+ interleave (float *src,
+ float *dest,
+ int64 n_values)
{
const int64 n_channels = setup.n_channels;
@@ -168,7 +158,6 @@
for (int64 v = ch; v < n_values; v += n_channels)
dest[v] = *src++;
}
-
int64
prepare_filter_history (int64 frame)
{
@@ -194,7 +183,6 @@
}
return 1;
}
-
int64
read_frame (int64 frame)
{
@@ -230,11 +218,10 @@
pcm_frame = frame;
return 1;
}
-
int64
- read (int64 voffset,
- int64 n_values,
- gfloat *values)
+ read (int64 voffset,
+ int64 n_values,
+ float *values)
{
int64 frame = voffset / pcm_data.size();
if (frame != pcm_frame)
@@ -256,7 +243,7 @@
}
};
-} // namespace Bse
+} // Bse
using namespace Bse;
Modified: trunk/bse/bseresampler.cc
===================================================================
--- trunk/bse/bseresampler.cc 2006-09-16 08:09:13 UTC (rev 3885)
+++ trunk/bse/bseresampler.cc 2006-09-16 13:03:27 UTC (rev 3886)
@@ -1,4 +1,4 @@
-/* BseResampler - FPU and SSE optimized FIR Resampling code
+/* BseResampler - common FIR Resampling code
* Copyright (C) 2006 Stefan Westerfeld
*
* This library is free software; you can redistribute it and/or
@@ -19,14 +19,10 @@
#include "bseresampler.hh"
#include "bseblockutils.hh"
-namespace Bse
-{
+namespace Bse {
+namespace Resampler {
-namespace Resampler
-{
-
-/*---- Resampler2 methods ----*/
-
+/* --- Resampler2 methods --- */
Resampler2*
Resampler2::create (BseResampler2Mode mode,
BseResampler2Precision precision)
@@ -35,12 +31,10 @@
}
Resampler2::~Resampler2()
-{
-}
+{}
-/*---- coefficient sets for Resampler2 ----*/
-/*
- * halfband FIR filter for factor 2 resampling, created with octave
+/* --- coefficient sets for Resampler2 --- */
+/* halfband FIR filter for factor 2 resampling, created with octave
*
* design method: windowed sinc, using ultraspherical window
*
@@ -68,218 +62,212 @@
const double Resampler2::halfband_fir_96db_coeffs[32] =
{
-3.48616530828033e-05,
- 0.000112877490936198,
+ 0.000112877490936198,
-0.000278961878372482,
- 0.000590495306376081,
+ 0.000590495306376081,
-0.00112566995029848,
- 0.00198635062559427,
+ 0.00198635062559427,
-0.00330178798332932,
- 0.00523534239035401,
+ 0.00523534239035401,
-0.00799905465189065,
- 0.0118867161189188,
+ 0.0118867161189188,
-0.0173508611368417,
- 0.0251928452706978,
+ 0.0251928452706978,
-0.0370909694665106,
- 0.057408291607388,
+ 0.057408291607388,
-0.102239638342325,
- 0.317002929635456,
- /* here, a 0.5 coefficient will be used */
- 0.317002929635456,
+ 0.317002929635456,
+ /* here, a 0.5 coefficient will be used */
+ 0.317002929635456,
-0.102239638342325,
- 0.0574082916073878,
+ 0.0574082916073878,
-0.0370909694665105,
- 0.0251928452706976,
+ 0.0251928452706976,
-0.0173508611368415,
- 0.0118867161189186,
+ 0.0118867161189186,
-0.00799905465189052,
- 0.0052353423903539,
+ 0.0052353423903539,
-0.00330178798332923,
- 0.00198635062559419,
+ 0.00198635062559419,
-0.00112566995029842,
- 0.000590495306376034,
+ 0.000590495306376034,
-0.00027896187837245,
- 0.000112877490936177,
+ 0.000112877490936177,
-3.48616530827983e-05
};
-/*
- * coefficients = 16
+/* coefficients = 16
* x0 = 1.013
* alpha = 0.2
*/
const double Resampler2::halfband_fir_48db_coeffs[16] =
{
-0.00270578824181636,
- 0.00566964586625895,
+ 0.00566964586625895,
-0.0106460585587187,
- 0.0185209590435965,
+ 0.0185209590435965,
-0.0310433957594089,
- 0.0525722488176905,
+ 0.0525722488176905,
-0.0991138314110143,
- 0.315921760444802,
- /* here, a 0.5 coefficient will be used */
- 0.315921760444802,
+ 0.315921760444802,
+ /* here, a 0.5 coefficient will be used */
+ 0.315921760444802,
-0.0991138314110145,
- 0.0525722488176907,
+ 0.0525722488176907,
-0.031043395759409,
- 0.0185209590435966,
+ 0.0185209590435966,
-0.0106460585587187,
- 0.00566964586625899,
+ 0.00566964586625899,
-0.00270578824181638
};
-/*
- * coefficients = 24
+/* coefficients = 24
* x0 = 1.0105
* alpha = 0.93
*/
const double Resampler2::halfband_fir_72db_coeffs[24] =
{
-0.0002622341634289771,
- 0.0007380549701258316,
+ 0.0007380549701258316,
-0.001634275943268986,
- 0.00315564206632209,
+ 0.00315564206632209,
-0.005564668530702518,
- 0.009207977968023688,
+ 0.009207977968023688,
-0.0145854155294611,
- 0.02253220964143239,
+ 0.02253220964143239,
-0.03474055058489597,
- 0.05556350980411048,
+ 0.05556350980411048,
-0.1010616834297558,
- 0.316597934725021,
- /* here, a 0.5 coefficient will be used */
- 0.3165979347250216,
+ 0.316597934725021,
+ /* here, a 0.5 coefficient will be used */
+ 0.3165979347250216,
-0.1010616834297563,
- 0.0555635098041109,
+ 0.0555635098041109,
-0.03474055058489638,
- 0.02253220964143274,
+ 0.02253220964143274,
-0.01458541552946141,
- 0.00920797796802395,
+ 0.00920797796802395,
-0.005564668530702722,
- 0.003155642066322248,
+ 0.003155642066322248,
-0.001634275943269096,
- 0.000738054970125897,
+ 0.000738054970125897,
-0.0002622341634290046,
};
-/*
- * coefficients = 42
+/* coefficients = 42
* x0 = 1.0106
* alpha = 0.8
*/
const double Resampler2::halfband_fir_120db_coeffs[42] = {
- 2.359361930421347e-06,
+ 2.359361930421347e-06,
-9.506281154947505e-06,
- 2.748456705299089e-05,
+ 2.748456705299089e-05,
-6.620621425709478e-05,
- 0.0001411845354098405,
+ 0.0001411845354098405,
-0.0002752082937581387,
- 0.0005000548069542907,
+ 0.0005000548069542907,
-0.0008581650926168509,
- 0.001404290771748464,
+ 0.001404290771748464,
-0.002207303823772437,
- 0.003352696749689989,
+ 0.003352696749689989,
-0.004946913550236211,
- 0.007125821223639453,
+ 0.007125821223639453,
-0.01007206140806936,
- 0.01405163477932994,
+ 0.01405163477932994,
-0.01949467352546547,
- 0.02718899890919871,
+ 0.02718899890919871,
-0.038810852733035,
- 0.05873397010869939,
+ 0.05873397010869939,
-0.1030762204838426,
- 0.317288892550808,
- /* here, a 0.5 coefficient will be used */
- 0.3172888925508079,
+ 0.317288892550808,
+ /* here, a 0.5 coefficient will be used */
+ 0.3172888925508079,
-0.1030762204838425,
- 0.0587339701086993,
+ 0.0587339701086993,
-0.03881085273303492,
- 0.02718899890919862,
+ 0.02718899890919862,
-0.01949467352546535,
- 0.01405163477932982,
+ 0.01405163477932982,
-0.01007206140806923,
- 0.007125821223639309,
+ 0.007125821223639309,
-0.004946913550236062,
- 0.003352696749689839,
+ 0.003352696749689839,
-0.00220730382377229,
- 0.001404290771748321,
+ 0.001404290771748321,
-0.0008581650926167192,
- 0.0005000548069541726,
+ 0.0005000548069541726,
-0.0002752082937580344,
- 0.0001411845354097548,
+ 0.0001411845354097548,
-6.620621425702783e-05,
- 2.748456705294319e-05,
+ 2.748456705294319e-05,
-9.506281154917077e-06,
- 2.359361930409472e-06
+ 2.359361930409472e-06
};
-/*
- * coefficients = 52
+/* coefficients = 52
* x0 = 1.0104
* alpha = 0.8
*/
const double Resampler2::halfband_fir_144db_coeffs[52] = {
-1.841826652087099e-07,
- 8.762360674826639e-07,
+ 8.762360674826639e-07,
-2.867933918842901e-06,
- 7.670965310712155e-06,
+ 7.670965310712155e-06,
-1.795091436711159e-05,
- 3.808294405088742e-05,
+ 3.808294405088742e-05,
-7.483688716947913e-05,
- 0.0001381756990743866,
+ 0.0001381756990743866,
-0.0002421379200249195,
- 0.0004057667984715052,
+ 0.0004057667984715052,
-0.0006540521320531017,
- 0.001018873594538604,
+ 0.001018873594538604,
-0.001539987101083099,
- 0.002266194978575507,
+ 0.002266194978575507,
-0.003257014968854008,
- 0.004585469100383752,
+ 0.004585469100383752,
-0.006343174213238195,
- 0.008650017657145861,
+ 0.008650017657145861,
-0.01167305853124126,
- 0.01566484143899151,
+ 0.01566484143899151,
-0.02104586507283325,
- 0.02859957136356252,
+ 0.02859957136356252,
-0.04000402932277326,
- 0.05964131775019404,
+ 0.05964131775019404,
-0.1036437507243546,
- 0.3174820359034792,
- /* here, a 0.5 coefficient will be used */
- 0.3174820359034791,
+ 0.3174820359034792,
+ /* here, a 0.5 coefficient will be used */
+ 0.3174820359034791,
-0.1036437507243545,
- 0.05964131775019401,
+ 0.05964131775019401,
-0.04000402932277325,
- 0.0285995713635625,
+ 0.0285995713635625,
-0.02104586507283322,
- 0.01566484143899148,
+ 0.01566484143899148,
-0.01167305853124122,
- 0.008650017657145822,
+ 0.008650017657145822,
-0.006343174213238157,
- 0.004585469100383712,
+ 0.004585469100383712,
-0.003257014968853964,
- 0.002266194978575464,
+ 0.002266194978575464,
-0.00153998710108306,
- 0.001018873594538566,
+ 0.001018873594538566,
-0.0006540521320530672,
- 0.0004057667984714751,
+ 0.0004057667984714751,
-0.0002421379200248905,
- 0.0001381756990743623,
+ 0.0001381756990743623,
-7.483688716946011e-05,
- 3.808294405087123e-05,
+ 3.808294405087123e-05,
-1.795091436709889e-05,
- 7.670965310702215e-06,
+ 7.670965310702215e-06,
-2.867933918835638e-06,
- 8.762360674786308e-07,
+ 8.762360674786308e-07,
-1.841826652067372e-07,
};
-} /* namespace Resampler */
+} // Resampler
+} // Bse
-} /* namespace Bse */
-
-/*---- Resampler2 C API ----*/
-
+/* --- Resampler2 C API --- */
BseResampler2*
bse_resampler2_create (BseResampler2Mode mode,
BseResampler2Precision precision)
Modified: trunk/bse/bseresampler.hh
===================================================================
--- trunk/bse/bseresampler.hh 2006-09-16 08:09:13 UTC (rev 3885)
+++ trunk/bse/bseresampler.hh 2006-09-16 13:03:27 UTC (rev 3886)
@@ -25,15 +25,13 @@
typedef struct BseResampler2 BseResampler2;
-/* keep synchronized with corresponding factory enum */
-typedef enum /*< skip >*/
+typedef enum /*< skip >*/
{
BSE_RESAMPLER2_MODE_UPSAMPLE,
BSE_RESAMPLER2_MODE_DOWNSAMPLE
} BseResampler2Mode;
-/* keep synchronized with corresponding factory enum */
-typedef enum /*< skip >*/
+typedef enum /*< skip >*/
{
BSE_RESAMPLER2_PREC_48DB = 8,
BSE_RESAMPLER2_PREC_72DB = 12,
@@ -54,15 +52,11 @@
G_END_DECLS
#ifdef __cplusplus
-
#include <vector>
-namespace Bse
-{
+namespace Bse {
+namespace Resampler {
-namespace Resampler
-{
-
/**
* Interface for factor 2 resampling classes
*/
@@ -77,24 +71,21 @@
* virtual destructor for abstract class
*/
virtual ~Resampler2();
-
/**
* resample a data block
*/
virtual void process_block (const float *input, unsigned int n_input_samples, float *output) = 0;
-
/**
* return FIR filter order
*/
virtual guint order() const = 0;
-
protected:
static const double halfband_fir_48db_coeffs[16];
static const double halfband_fir_72db_coeffs[24];
static const double halfband_fir_96db_coeffs[32];
static const double halfband_fir_120db_coeffs[42];
static const double halfband_fir_144db_coeffs[52];
-
+
/* Creates implementation from filter coefficients and Filter implementation class
*
* Since up- and downsamplers use different (scaled) coefficients, its possible
@@ -108,17 +99,15 @@
float taps[order];
for (guint i = 0; i < order; i++)
taps[i] = d[i] * scaling;
-
+
Resampler2 *filter = new Filter (taps);
g_assert (order == filter->order());
return filter;
}
-
-public: // FIXME: friend class Bse::Block::Impl;
/* creates the actual implementation; specifying USE_SSE=true will use
* SSE instructions, USE_SSE=false will use FPU instructions
*
- * Don't use this directly - it's only public in order to be used by
+ * Don't use this directly - it's only to be used by
* bseblockutils.cc's anonymous Impl classes.
*/
template<bool USE_SSE> static inline Resampler2*
Modified: trunk/bse/bseresampler.tcc
===================================================================
--- trunk/bse/bseresampler.tcc 2006-09-16 08:09:13 UTC (rev 3885)
+++ trunk/bse/bseresampler.tcc 2006-09-16 13:03:27 UTC (rev 3886)
@@ -19,20 +19,15 @@
#ifndef __BSE_RESAMPLER_TCC__
#define __BSE_RESAMPLER_TCC__
-#include <glib.h>
#include <vector>
#include <bse/bseresampler.hh>
-#include <birnet/birnetutils.h>
+#include <birnet/birnet.h>
#ifdef __SSE__
#include <xmmintrin.h>
#endif
-namespace Bse
-{
-
-namespace Resampler
-{
-
+namespace Bse {
+namespace Resampler {
using std::vector;
using std::min;
using std::max;
@@ -41,8 +36,10 @@
/* see: http://ds9a.nl/gcc-simd/ */
union F4Vector
{
- __m128 v; // vector of four single floats
float f[4];
+#ifdef __SSE__
+ __m128 v; // vector of four single floats
+#endif
};
/**
@@ -89,18 +86,18 @@
float *out1,
float *out2,
float *out3)
-#ifdef __SSE__
{
+#ifdef __SSE__
/* input and taps must be 16-byte aligned */
const F4Vector *input_v = reinterpret_cast<const F4Vector *> (input);
const F4Vector *sse_taps_v = reinterpret_cast<const F4Vector *> (sse_taps);
F4Vector out0_v, out1_v, out2_v, out3_v;
-
+
out0_v.v = _mm_mul_ps (input_v[0].v, sse_taps_v[0].v);
out1_v.v = _mm_mul_ps (input_v[0].v, sse_taps_v[1].v);
out2_v.v = _mm_mul_ps (input_v[0].v, sse_taps_v[2].v);
out3_v.v = _mm_mul_ps (input_v[0].v, sse_taps_v[3].v);
-
+
for (guint i = 1; i < (order + 6) / 4; i++)
{
out0_v.v = _mm_add_ps (out0_v.v, _mm_mul_ps (input_v[i].v, sse_taps_v[i * 4 + 0].v));
@@ -108,17 +105,15 @@
out2_v.v = _mm_add_ps (out2_v.v, _mm_mul_ps (input_v[i].v, sse_taps_v[i * 4 + 2].v));
out3_v.v = _mm_add_ps (out3_v.v, _mm_mul_ps (input_v[i].v, sse_taps_v[i * 4 + 3].v));
}
-
+
*out0 = out0_v.f[0] + out0_v.f[1] + out0_v.f[2] + out0_v.f[3];
*out1 = out1_v.f[0] + out1_v.f[1] + out1_v.f[2] + out1_v.f[3];
*out2 = out2_v.f[0] + out2_v.f[1] + out2_v.f[2] + out2_v.f[3];
*out3 = out3_v.f[0] + out3_v.f[1] + out3_v.f[2] + out3_v.f[3];
-}
#else
-{
g_assert_not_reached();
-}
#endif
+}
/**
@@ -144,57 +139,55 @@
{
const int order = taps.size();
vector<float> sse_taps ((order + 6) / 4 * 16);
-
+
for (int j = 0; j < 4; j++)
for (int i = 0; i < order; i++)
{
int k = i + j;
sse_taps[(k / 4) * 16 + (k % 4) + j * 4] = taps[i];
}
-
+
return sse_taps;
}
/* Helper class to allocate aligned memory */
template<class T, int ALIGN>
-class AlignedMem {
+class AlignedArray {
unsigned char *unaligned_mem;
T *data;
unsigned int n_elements;
-
+
void
allocate_aligned_data()
{
g_assert ((ALIGN % sizeof (T)) == 0);
data = reinterpret_cast<T *> (birnet_malloc_aligned (n_elements * sizeof (T), ALIGN, &unaligned_mem));
}
- /* no copy constructor */
- AlignedMem (const AlignedMem&);
- /* no assignment operator */
- AlignedMem& operator= (const AlignedMem&);
+ /* no copy constructor and no assignment operator */
+ BIRNET_PRIVATE_CLASS_COPY (AlignedArray);
public:
- AlignedMem (const vector<T>& elements)
- : n_elements (elements.size())
+ AlignedArray (const vector<T>& elements) :
+ n_elements (elements.size())
{
allocate_aligned_data();
-
+
for (unsigned int i = 0; i < n_elements; i++)
new (data + i) T(elements[i]);
}
- AlignedMem (unsigned int n_elements)
- : n_elements (n_elements)
+ AlignedArray (unsigned int n_elements) :
+ n_elements (n_elements)
{
allocate_aligned_data();
-
+
for (unsigned int i = 0; i < n_elements; i++)
new (data + i) T();
}
- ~AlignedMem()
+ ~AlignedArray()
{
/* C++ destruction order: last allocated element is deleted first */
while (n_elements)
data[--n_elements].~T();
-
+
g_free (unaligned_mem);
}
T&
@@ -223,9 +216,9 @@
*/
template<guint ORDER, bool USE_SSE>
class Upsampler2 : public Resampler2 {
- vector<float> taps;
- AlignedMem<float,16> history;
- AlignedMem<float,16> sse_taps;
+ vector<float> taps;
+ AlignedArray<float,16> history;
+ AlignedArray<float,16> sse_taps;
protected:
/* fast SSE optimized convolution */
void
@@ -233,12 +226,12 @@
float *output)
{
const guint H = (ORDER / 2) - 1; /* half the filter length */
-
+
output[0] = input[H];
output[2] = input[H + 1];
output[4] = input[H + 2];
output[6] = input[H + 3];
-
+
fir_process_4samples_sse (input, &sse_taps[0], ORDER, &output[1], &output[3], &output[5], &output[7]);
}
/* slow convolution */
@@ -292,10 +285,10 @@
*
* init_taps: coefficients for the upsampling FIR halfband filter
*/
- Upsampler2 (float *init_taps)
- : taps (init_taps, init_taps + ORDER),
- history (2 * ORDER),
- sse_taps (fir_compute_sse_taps (taps))
+ Upsampler2 (float *init_taps) :
+ taps (init_taps, init_taps + ORDER),
+ history (2 * ORDER),
+ sse_taps (fir_compute_sse_taps (taps))
{
g_assert ((ORDER & 1) == 0); /* even order filter */
}
@@ -309,13 +302,13 @@
float *output)
{
unsigned int history_todo = min (n_input_samples, ORDER);
-
+
copy (input, input + history_todo, &history[ORDER]);
process_block_aligned (&history[0], history_todo, output);
if (n_input_samples >= ORDER)
{
process_block_unaligned (input, n_input_samples - history_todo, &output [2 * history_todo]);
-
+
// build new history from new input
copy (input + n_input_samples - ORDER, input + n_input_samples, &history[0]);
}
@@ -346,9 +339,9 @@
template<guint ORDER, bool USE_SSE>
class Downsampler2 : public Resampler2 {
vector<float> taps;
- AlignedMem<float,16> history_even;
- AlignedMem<float,16> history_odd;
- AlignedMem<float,16> sse_taps;
+ AlignedArray<float,16> history_even;
+ AlignedArray<float,16> history_odd;
+ AlignedArray<float,16> sse_taps;
/* fast SSE optimized convolution */
template<int ODD_STEPPING> void
process_4samples_aligned (const float *input_even /* aligned */,
@@ -356,9 +349,9 @@
float *output)
{
const guint H = (ORDER / 2) - 1; /* half the filter length */
-
+
fir_process_4samples_sse (input_even, &sse_taps[0], ORDER, &output[0], &output[1], &output[2], &output[3]);
-
+
output[0] += 0.5 * input_odd[H * ODD_STEPPING];
output[1] += 0.5 * input_odd[(H + 1) * ODD_STEPPING];
output[2] += 0.5 * input_odd[(H + 2) * ODD_STEPPING];
@@ -370,7 +363,7 @@
const float *input_odd)
{
const guint H = (ORDER / 2) - 1; /* half the filter length */
-
+
return fir_process_one_sample<float> (&input_even[0], &taps[0], ORDER) + 0.5 * input_odd[H * ODD_STEPPING];
}
template<int ODD_STEPPING> void
@@ -425,11 +418,11 @@
*
* init_taps: coefficients for the downsampling FIR halfband filter
*/
- Downsampler2 (float *init_taps)
- : taps (init_taps, init_taps + ORDER),
- history_even (2 * ORDER),
- history_odd (2 * ORDER),
- sse_taps (fir_compute_sse_taps (taps))
+ Downsampler2 (float *init_taps) :
+ taps (init_taps, init_taps + ORDER),
+ history_even (2 * ORDER),
+ history_odd (2 * ORDER),
+ sse_taps (fir_compute_sse_taps (taps))
{
g_assert ((ORDER & 1) == 0); /* even order filter */
}
@@ -443,18 +436,17 @@
float *output)
{
g_assert ((n_input_samples & 1) == 0);
-
+
const unsigned int BLOCKSIZE = 1024;
-
+
F4Vector block[BLOCKSIZE / 4]; /* using F4Vector ensures 16-byte alignment */
float *input_even = &block[0].f[0];
-
+
while (n_input_samples)
{
unsigned int n_input_todo = min (n_input_samples, BLOCKSIZE * 2);
-
- /*
- * since the halfband filter contains zeros every other sample
+
+ /* since the halfband filter contains zeros every other sample
* and since we're using SSE instructions, which expect the
* data to be consecutively represented in memory, we prepare
* a block of samples containing only even-indexed samples
@@ -462,27 +454,26 @@
* we keep the deinterleaved data on the stack (instead of per-class
* allocated memory), to ensure that even running a lot of these
* downsampler streams will not result in cache trashing
- */
- /*
- * FIXME: this implementation is suboptimal for non-SSE, because it
+ *
+ * FIXME: this implementation is suboptimal for non-SSE, because it
* performs an extra deinterleaving step in any case, but deinterleaving
* is only required for SSE instructions
*/
deinterleave2 (input, n_input_todo, input_even);
-
+
const float *input_odd = input + 1; /* we process this one with a stepping of 2 */
-
+
const unsigned int n_output_todo = n_input_todo / 2;
const unsigned int history_todo = min (n_output_todo, ORDER);
-
+
copy (input_even, input_even + history_todo, &history_even[ORDER]);
deinterleave2 (input_odd, history_todo * 2, &history_odd[ORDER]);
-
+
process_block_aligned <1> (&history_even[0], &history_odd[0], output, history_todo);
if (n_output_todo >= ORDER)
{
process_block_unaligned<2> (input_even, input_odd, &output[history_todo], n_output_todo - history_todo);
-
+
// build new history from new input
copy (input_even + n_output_todo - ORDER, input_even + n_output_todo, &history_even[0]);
deinterleave2 (input_odd + n_input_todo - ORDER * 2, ORDER * 2, &history_odd[0]); /* FIXME: can be optimized */
@@ -494,7 +485,7 @@
g_memmove (&history_even[0], &history_even[n_output_todo], sizeof (history_even[0]) * ORDER);
g_memmove (&history_odd[0], &history_odd[n_output_todo], sizeof (history_odd[0]) * ORDER);
}
-
+
n_input_samples -= n_input_todo;
input += n_input_todo;
output += n_output_todo;
@@ -539,8 +530,7 @@
return 0;
}
-} /* namespace Resampler */
+} // Resampler
+} // Bse
-} /* namespace Bse */
-
#endif /* __BSE_RESAMPLER_TCC__ */
Modified: trunk/bse/gsldatahandle.h
===================================================================
--- trunk/bse/gsldatahandle.h 2006-09-16 08:09:13 UTC (rev 3885)
+++ trunk/bse/gsldatahandle.h 2006-09-16 13:03:27 UTC (rev 3886)
@@ -112,10 +112,10 @@
int64 loop_first,
int64 loop_last);
-/* --- resampling datahandles with the factor 2 --- */
-GslDataHandle* bse_data_handle_new_upsample2 (GslDataHandle *src_handle, // implemented in bsedatahandle-resample.cc
- int precision_bits);
-GslDataHandle* bse_data_handle_new_downsample2 (GslDataHandle *src_handle); // implemented in bsedatahandle-resample.cc
+/* --- factor 2 resampling datahandles --- */
+GslDataHandle* bse_data_handle_new_upsample2 (GslDataHandle *src_handle, // implemented in bsedatahandle-resample.cc
+ int precision_bits);
+GslDataHandle* bse_data_handle_new_downsample2 (GslDataHandle *src_handle); // implemented in bsedatahandle-resample.cc
/* --- xinfo handling --- */
GslDataHandle* gsl_data_handle_new_add_xinfos (GslDataHandle *src_handle,
Modified: trunk/bse/tests/testresampler.cc
===================================================================
--- trunk/bse/tests/testresampler.cc 2006-09-16 08:09:13 UTC (rev 3885)
+++ trunk/bse/tests/testresampler.cc 2006-09-16 13:03:27 UTC (rev 3886)
@@ -109,7 +109,7 @@
for (guint i = 0; i < order; i++)
taps[i] = i + 1;
- AlignedMem<float,16> sse_taps (fir_compute_sse_taps (taps));
+ AlignedArray<float,16> sse_taps (fir_compute_sse_taps (taps));
for (unsigned int i = 0; i < sse_taps.size(); i++)
{
printf ("%3d", (int) (sse_taps[i] + 0.5));
@@ -120,7 +120,7 @@
}
printf ("\n\n");
- AlignedMem<float,16> random_mem (order + 4);
+ AlignedArray<float,16> random_mem (order + 4);
for (guint i = 0; i < order + 4; i++)
random_mem[i] = 1.0 - rand() / (0.5 * RAND_MAX);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]