[beast/devel: 1/6] BSE: Make gslfft results compatible with fftw.

From: Tim Janik <timj src gnome org>
To: commits-list gnome org
Cc:
Subject: [beast/devel: 1/6] BSE: Make gslfft results compatible with fftw.
Date: Sun, 16 Dec 2012 01:59:25 +0000 (UTC)
commit 6e1d04dc25f6f9d30b6b0c0159a14c27fff6b4dc
Author: Stefan Westerfeld <stefan space twc de>
Date:   Wed Oct 13 11:20:35 2010 +0200

    BSE: Make gslfft results compatible with fftw.
    
    The Numerical Recipies FFT and FFTW behave differently because a minus
    sign in the formula is different (so Numerical Recipies analysis performs
    the same computation like FFTW synthesis and vice versa). This change
    makes gslfft and FFTW results equivalent. Also, while FFTW never scales
    results, gslfft used to scale results during backward transform. To get
    the same results, scaling is now only performed optionally in gslfft.
    
    Finally, real variants (like fftar/fftsr) have been changed to produce the
    same results like real FFTW.

 bse/gsl-fftconf.sh |  150 ++++++++++++++++++++++++++++++++++++++--------------
 bse/gsl-fftgen.pl  |  104 +++++++++++++++++++++++++++---------
 bse/gslfft.h       |   85 ++++++++++++++++++++++++------
 3 files changed, 257 insertions(+), 82 deletions(-)
---
diff --git a/bse/gsl-fftconf.sh b/bse/gsl-fftconf.sh
index 109c66e..f4f97bf 100755
--- a/bse/gsl-fftconf.sh
+++ b/bse/gsl-fftconf.sh
@@ -29,7 +29,7 @@ $MKFFT $OPTIONS 0
 #
 # generate small fft sizes, seperating stage 2
 #
-for dir in --analysis --synthesis ; do
+for dir in --analysis --synthesis --synthesis-scale ; do
   DOPT="$OPTIONS --skip-macros $dir"
   echo "Generating FFT functions: $dir" >&2
   $MKFFT $DOPT	   2 F				# standalone fft2
@@ -68,31 +68,44 @@ cat <<__EOF
 
 /* public FFT frontends and generic handling of huge fft sizes */
 
+typedef enum {
+  BIG_ANALYSIS = 1,
+  BIG_SYNTHESIS = 2,
+  BIG_SYNTHESIS_SCALE = 3
+} FFTMode;
 
 static void
 gsl_power2_fftc_big (const unsigned int n_values,
 		     const $IEEE_TYPE  *rivalues_in,
 		     $IEEE_TYPE        *rivalues,
-                     const int          esign)
+                     FFTMode            mode)
 {
+  const int esign = (mode == BIG_ANALYSIS) ? -1 : 1;
   const unsigned int n_values2 = n_values << 1;
   double theta = esign < 0 ? -3.1415926535897932384626433832795029 : 3.1415926535897932384626433832795029;
   unsigned int i, block_size = 8192 << 1;
   double last_sin;
 
-  if (esign > 0)
+  if (mode == BIG_SYNTHESIS)
     {
       if (rivalues_in)
-        bitreverse_fft2analysis (n_values, rivalues_in, rivalues);
+        bitreverse_fft2synthesis (n_values, rivalues_in, rivalues);
       for (i = 0; i < n_values; i += 8192)
-        gsl_power2_fft8192analysis_skip2 (rivalues + (i << 1), rivalues + (i << 1));
+        gsl_power2_fft8192synthesis_skip2 (rivalues + (i << 1), rivalues + (i << 1));
     }
-  else
+  else if (mode == BIG_SYNTHESIS_SCALE)
     {
       if (rivalues_in)
-        bitreverse_fft2synthesis (n_values, rivalues_in, rivalues);
+        bitreverse_fft2synthesis_scale (n_values, rivalues_in, rivalues);
       for (i = 0; i < n_values; i += 8192)
-        gsl_power2_fft8192synthesis_skip2 (rivalues + (i << 1), rivalues + (i << 1));
+        gsl_power2_fft8192synthesis_scale_skip2 (rivalues + (i << 1), rivalues + (i << 1));
+    }
+  else if (mode == BIG_ANALYSIS)
+    {
+      if (rivalues_in)
+        bitreverse_fft2analysis (n_values, rivalues_in, rivalues);
+      for (i = 0; i < n_values; i += 8192)
+        gsl_power2_fft8192analysis_skip2 (rivalues + (i << 1), rivalues + (i << 1));
     }
   theta *= (double) 1.0 / 8192.;
   last_sin = sin (theta);
@@ -231,7 +244,7 @@ gsl_power2_fftac (const unsigned int n_values,
       case 2048: gsl_power2_fft2048analysis (rivalues_in, rivalues_out);	break;
       case 4096: gsl_power2_fft4096analysis (rivalues_in, rivalues_out);	break;
       case 8192: gsl_power2_fft8192analysis (rivalues_in, rivalues_out);	break;
-      default:	 gsl_power2_fftc_big (n_values, rivalues_in, rivalues_out, +1);
+      default:	 gsl_power2_fftc_big (n_values, rivalues_in, rivalues_out, BIG_ANALYSIS);
     }
 }
 void
@@ -257,7 +270,34 @@ gsl_power2_fftsc (const unsigned int n_values,
       case 2048: gsl_power2_fft2048synthesis (rivalues_in, rivalues_out);	break;
       case 4096: gsl_power2_fft4096synthesis (rivalues_in, rivalues_out);	break;
       case 8192: gsl_power2_fft8192synthesis (rivalues_in, rivalues_out);	break;
-      default:	 gsl_power2_fftc_big (n_values, rivalues_in, rivalues_out, -1);
+      default:	 gsl_power2_fftc_big (n_values, rivalues_in, rivalues_out, BIG_SYNTHESIS);
+    }
+  /* { unsigned int i; for (i = 0; i < n_values << 1; i++) rivalues_out[i] *= (double) n_values; } */
+}
+void
+gsl_power2_fftsc_scale (const unsigned int n_values,
+                        const $IEEE_TYPE  *rivalues_in,
+                        $IEEE_TYPE        *rivalues_out)
+{
+  g_return_if_fail ((n_values & (n_values - 1)) == 0 && n_values >= 1);
+
+  switch (n_values)
+    {
+      case    1: rivalues_out[0] = rivalues_in[0], rivalues_out[1] = rivalues_in[1]; break;
+      case    2: gsl_power2_fft2synthesis_scale (rivalues_in, rivalues_out);	break;
+      case    4: gsl_power2_fft4synthesis_scale (rivalues_in, rivalues_out);	break;
+      case    8: gsl_power2_fft8synthesis_scale (rivalues_in, rivalues_out);	break;
+      case   16: gsl_power2_fft16synthesis_scale (rivalues_in, rivalues_out);	break;
+      case   32: gsl_power2_fft32synthesis_scale (rivalues_in, rivalues_out);	break;
+      case   64: gsl_power2_fft64synthesis_scale (rivalues_in, rivalues_out);	break;
+      case  128: gsl_power2_fft128synthesis_scale (rivalues_in, rivalues_out);	break;
+      case  256: gsl_power2_fft256synthesis_scale (rivalues_in, rivalues_out);	break;
+      case  512: gsl_power2_fft512synthesis_scale (rivalues_in, rivalues_out);	break;
+      case 1024: gsl_power2_fft1024synthesis_scale (rivalues_in, rivalues_out);	break;
+      case 2048: gsl_power2_fft2048synthesis_scale (rivalues_in, rivalues_out);	break;
+      case 4096: gsl_power2_fft4096synthesis_scale (rivalues_in, rivalues_out);	break;
+      case 8192: gsl_power2_fft8192synthesis_scale (rivalues_in, rivalues_out);	break;
+      default:	 gsl_power2_fftc_big (n_values, rivalues_in, rivalues_out, BIG_SYNTHESIS_SCALE);
     }
   /* { unsigned int i; for (i = 0; i < n_values << 1; i++) rivalues_out[i] *= (double) n_values; } */
 }
@@ -280,7 +320,7 @@ gsl_power2_fftar (const unsigned int n_values,
 
   g_return_if_fail ((n_values & (n_values - 1)) == 0 && n_values >= 2);
 
-  gsl_power2_fftac (n_cvalues, r_values_in, rivalues_out);
+  gsl_power2_fftsc (n_cvalues, r_values_in, rivalues_out);
   theta = 3.1415926535897932384626433832795029;
   theta /= (double) n_cvalues;
 
@@ -309,12 +349,12 @@ gsl_power2_fftar (const unsigned int n_values,
       H1im = F2im + F1re;
       H1re = F1im - F2re;
       H2re = F2re - F1im;
-      H2im = H1im - FEim;
+      H2im = FEim - H1im;
       H1re += FEre;
       H2re += FEre;
       H1im += FEim;
       rivalues_out[i] = H1re;
-      rivalues_out[i + 1] = H1im;
+      rivalues_out[i + 1] = -H1im;
       rivalues_out[r] = H2re;
       rivalues_out[r + 1] = H2im;
       WMULTIPLY (Wre, Wim, Dre, Dim);
@@ -322,11 +362,14 @@ gsl_power2_fftar (const unsigned int n_values,
   Dre = rivalues_out[0];
   rivalues_out[0] = Dre + rivalues_out[1];
   rivalues_out[1] = Dre - rivalues_out[1];
+  rivalues_out[n_cvalues + 1] = -rivalues_out[n_cvalues + 1];
 }
-void
-gsl_power2_fftsr (const unsigned int n_values,
-                  const double      *rivalues_in,
-                  double            *r_values_out)
+
+static inline void
+gsl_power2_fftsr_impl (const unsigned int n_values,
+                       const double      *rivalues_in,
+                       double            *r_values_out,
+                       gboolean           need_scale)
 {
   unsigned int n_cvalues = n_values >> 1;
   double Dre, Dim, Wre, Wim, theta, scale;
@@ -360,9 +403,8 @@ gsl_power2_fftsr (const unsigned int n_values,
       ri |= j;
 
       FOre = -FOre;
-      FOim = -FOim;
       FEre *= 0.5;
-      FEim *= 0.5;
+      FEim *= -0.5;
       F2re = FOre * Wim;
       F2im = FOim * Wim;
       F1re = FOre * Wre;
@@ -391,9 +433,16 @@ gsl_power2_fftsr (const unsigned int n_values,
   if (n_values < 4)
     return;
   r_values_out[2] = rivalues_in[i];
-  r_values_out[2 + 1] = rivalues_in[i + 1];
-  scale = n_cvalues;
-  scale = 1.0 / scale;
+  r_values_out[2 + 1] = -rivalues_in[i + 1];
+  if (need_scale)
+    {
+      scale = n_cvalues;
+      scale = 1 / scale;
+    }
+  else
+    {
+      scale = 2;
+    }
   for (i = 0; i < n_values; i += 4)
     BUTTERFLY_10scale (r_values_out[i], r_values_out[i + 1],
                        r_values_out[i + 2], r_values_out[i + 3],
@@ -403,21 +452,39 @@ gsl_power2_fftsr (const unsigned int n_values,
   switch (n_cvalues)
     {
       case    2: break;
-      case    4: gsl_power2_fft4synthesis_skip2 (NULL, r_values_out);	 break;
-      case    8: gsl_power2_fft8synthesis_skip2 (NULL, r_values_out);	 break;
-      case   16: gsl_power2_fft16synthesis_skip2 (NULL, r_values_out);	 break;
-      case   32: gsl_power2_fft32synthesis_skip2 (NULL, r_values_out);	 break;
-      case   64: gsl_power2_fft64synthesis_skip2 (NULL, r_values_out);	 break;
-      case  128: gsl_power2_fft128synthesis_skip2 (NULL, r_values_out);	 break;
-      case  256: gsl_power2_fft256synthesis_skip2 (NULL, r_values_out);	 break;
-      case  512: gsl_power2_fft512synthesis_skip2 (NULL, r_values_out);	 break;
-      case 1024: gsl_power2_fft1024synthesis_skip2 (NULL, r_values_out); break;
-      case 2048: gsl_power2_fft2048synthesis_skip2 (NULL, r_values_out); break;
-      case 4096: gsl_power2_fft4096synthesis_skip2 (NULL, r_values_out); break;
-      case 8192: gsl_power2_fft8192synthesis_skip2 (NULL, r_values_out); break;
-      default:	 gsl_power2_fftc_big (n_cvalues, NULL, r_values_out, -1);
+      case    4: gsl_power2_fft4analysis_skip2 (NULL, r_values_out);	 break;
+      case    8: gsl_power2_fft8analysis_skip2 (NULL, r_values_out);	 break;
+      case   16: gsl_power2_fft16analysis_skip2 (NULL, r_values_out);	 break;
+      case   32: gsl_power2_fft32analysis_skip2 (NULL, r_values_out);	 break;
+      case   64: gsl_power2_fft64analysis_skip2 (NULL, r_values_out);	 break;
+      case  128: gsl_power2_fft128analysis_skip2 (NULL, r_values_out);	 break;
+      case  256: gsl_power2_fft256analysis_skip2 (NULL, r_values_out);	 break;
+      case  512: gsl_power2_fft512analysis_skip2 (NULL, r_values_out);	 break;
+      case 1024: gsl_power2_fft1024analysis_skip2 (NULL, r_values_out); break;
+      case 2048: gsl_power2_fft2048analysis_skip2 (NULL, r_values_out); break;
+      case 4096: gsl_power2_fft4096analysis_skip2 (NULL, r_values_out); break;
+      case 8192: gsl_power2_fft8192analysis_skip2 (NULL, r_values_out); break;
+      default:	 gsl_power2_fftc_big (n_cvalues, NULL, r_values_out, BIG_ANALYSIS);
     }
 }
+
+void
+gsl_power2_fftsr (const unsigned int n_values,
+                  const double      *rivalues_in,
+                  double            *r_values_out)
+{
+  gsl_power2_fftsr_impl (n_values, rivalues_in, r_values_out, FALSE);
+}
+
+void
+gsl_power2_fftsr_scale (const unsigned int n_values,
+                       const double      *rivalues_in,
+                       double            *r_values_out)
+{
+  gsl_power2_fftsr_impl (n_values, rivalues_in, r_values_out, TRUE);
+}
+
+
 void
 gsl_power2_fftar_simple (const unsigned int n_values,
                          const float       *real_values,
@@ -442,10 +509,14 @@ gsl_power2_fftar_simple (const unsigned int n_values,
   complex_values[n_values + 1] = 0.0;
   g_free (rv);
 }
+__EOF
+for FUNC_NAME in fftsr fftsr_scale
+do
+cat << __EOF
 void
-gsl_power2_fftsr_simple (const unsigned int n_values,
-                         const float       *complex_values,
-                         float             *real_values)
+gsl_power2_${FUNC_NAME}_simple (const unsigned int n_values,
+                                const float       *complex_values,
+                                float             *real_values)
 {
   double *cv, *rv;
   guint i;
@@ -458,10 +529,11 @@ gsl_power2_fftsr_simple (const unsigned int n_values,
   while (i--)
     cv[i] = complex_values[i];
   cv[1] = complex_values[n_values];
-  gsl_power2_fftsr (n_values, cv, rv);
+  gsl_power2_$FUNC_NAME (n_values, cv, rv);
   i = n_values;
   while (i--)
     real_values[i] = rv[i];
   g_free (cv);
 }
 __EOF
+done
diff --git a/bse/gsl-fftgen.pl b/bse/gsl-fftgen.pl
index 46a8bd6..ef9a198 100755
--- a/bse/gsl-fftgen.pl
+++ b/bse/gsl-fftgen.pl
@@ -32,7 +32,9 @@ my $min_split = 2048;
 my $min_compress = 512;
 my $fft2loop = 0;
 my $single_stage = 0;
-my $negate_sign = 0;
+my $negate_sign = 1;
+my $func_name = "analysis";
+my $scale = 0;
 my $Wtest = 0;
 
 #
@@ -42,8 +44,9 @@ while ($_ = $ARGV[0], defined $_ && /^-/) {
     shift;
     last if (/^--$/);
     if (/^--fft2loop$/) { $fft2loop = 1 }
-    elsif (/^--analysis$/) { $negate_sign = 0 }
-    elsif (/^--synthesis$/) { $negate_sign = 1 }
+    elsif (/^--analysis$/) { $negate_sign = 1; $func_name = "analysis"; $scale = 0; }
+    elsif (/^--synthesis$/) { $negate_sign = 0; $func_name = "synthesis"; $scale = 0; }
+    elsif (/^--synthesis-scale$/) { $negate_sign = 0; $func_name = "synthesis_scale"; $scale = 1; }
     elsif (/^--Wtest$/) { $Wtest = 1 }
     elsif (/^--double$/) { $ieee_type = "double" }
     elsif (/^--skip-macros$/) { $gen_macros = 0 }
@@ -52,6 +55,7 @@ while ($_ = $ARGV[0], defined $_ && /^-/) {
     elsif (/^--min-compress$/) { $min_compress = shift }
     elsif (/^--single-stage$/) { $single_stage = shift }
 }
+
 # parse arguments
 my @arguments = 0;
 if (defined $ARGV[0]) {
@@ -217,18 +221,18 @@ sub unroll_stage {
 		my $offset2 = $offset1 + $n_points;
 		my $offset1r = bitreverse ($fft_size, $offset1 >> 1) << 1;
 		my $offset2r = bitreverse ($fft_size, $offset2 >> 1) << 1;
-		my $scale = !$negate_sign ? "__1, __0" : sprintf "1.0 / (%s) %u", $tmp_ieee_type, $fft_size;
+		my $scale_args = ! $scale ? "__1, __0" : sprintf "1.0 / (%s) %u", $tmp_ieee_type, $fft_size;
 		printf($indent."BUTTERFLY_10%s (X[%s], X[%s + 1],\n".
 		       $indent."                X[%s], X[%s + 1],\n".
 		       $indent."                Y[%s], Y[%s + 1],\n".
 		       $indent."                Y[%s], Y[%s + 1],\n".
 		       $indent."                %s);\n",
-		       $negate_sign ? "scale" : "",
+		       $scale ? "scale" : "",
 		       $offset1r, $offset1r,
 		       $offset2r, $offset2r,
 		       $offset1, $offset1,
 		       $offset2, $offset2,
-		       $scale);
+		       $scale_args);
 	    }
 	}
     } elsif ($unroll_outer) {
@@ -325,7 +329,7 @@ sub bitreverse_fft2 {
     # mul_result = gsl_complex (c1.re * c2.re - c1.im * c2.im, c1.re * c2.im + c1.im * c2.re);
     printf "
 static inline void
-bitreverse_fft2analysis (const uint         n,
+bitreverse_fft2synthesis (const unsigned int n,
                          const %-6s        *X,
                          %-6s              *Y)
 {
@@ -370,25 +374,26 @@ bitreverse_fft2analysis (const uint         n,
     }
 }
 static inline void
-bitreverse_fft2synthesis (const uint         n,
-                          const %-6s        *X,
-                          %-6s              *Y)
+bitreverse_fft2synthesis_scale (const unsigned int n,
+                               const %-6s        *X,
+                               %-6s              *Y)
 {
   const uint n2 = n >> 1, n1 = n + n2, max = n >> 2;
   uint i, r;
   %s scale = n;
 
   scale = 1.0 / scale;
+
   BUTTERFLY_10scale (X[0], X[1],
 		     X[n], X[n + 1],
 		     Y[0], Y[1],
 		     Y[2], Y[3],
 		     scale);
   BUTTERFLY_10scale (X[n2], X[n2 + 1],
-		     X[n1], X[n1 + 1],
-		     Y[4], Y[5],
-		     Y[6], Y[7],
-		     scale);
+		X[n1], X[n1 + 1],
+		Y[4], Y[5],
+		Y[6], Y[7],
+		scale);
   for (i = 1, r = 0; i < max; i++)
     {
       uint k, j = n >> 1;
@@ -403,20 +408,65 @@ bitreverse_fft2synthesis (const uint         n,
       k = r >> 1;
       j = i << 3;
       BUTTERFLY_10scale (X[k], X[k + 1],
-			 X[k + n], X[k + n + 1],
-			 Y[j], Y[j + 1],
-			 Y[j + 2], Y[j + 3],
-			 scale);
+		         X[k + n], X[k + n + 1],
+		         Y[j], Y[j + 1],
+		         Y[j + 2], Y[j + 3],
+		         scale);
       k += n2;
       j += 4;
       BUTTERFLY_10scale (X[k], X[k + 1],
-			 X[k + n], X[k + n + 1],
-			 Y[j], Y[j + 1],
-			 Y[j + 2], Y[j + 3],
-			 scale);
+		         X[k + n], X[k + n + 1],
+		         Y[j], Y[j + 1],
+		         Y[j + 2], Y[j + 3],
+		         scale);
+    }
+}
+static inline void
+bitreverse_fft2analysis (const unsigned int n,
+                          const %-6s        *X,
+                          %-6s              *Y)
+{
+  const unsigned int n2 = n >> 1, n1 = n + n2, max = n >> 2;
+  unsigned int i, r;
+
+  BUTTERFLY_10 (X[0], X[1],
+		X[n], X[n + 1],
+		Y[0], Y[1],
+		Y[2], Y[3],
+		__0, __1);
+  BUTTERFLY_10 (X[n2], X[n2 + 1],
+		X[n1], X[n1 + 1],
+		Y[4], Y[5],
+		Y[6], Y[7],
+		__0, __1);
+  for (i = 1, r = 0; i < max; i++)
+    {
+      unsigned int k, j = n >> 1;
+
+      while (r >= j)
+	{
+	  r -= j;
+	  j >>= 1;
+	}
+      r |= j;
+
+      k = r >> 1;
+      j = i << 3;
+      BUTTERFLY_10 (X[k], X[k + 1],
+		    X[k + n], X[k + n + 1],
+		    Y[j], Y[j + 1],
+		    Y[j + 2], Y[j + 3],
+		    __0, __1);
+      k += n2;
+      j += 4;
+      BUTTERFLY_10 (X[k], X[k + 1],
+		    X[k + n], X[k + n + 1],
+		    Y[j], Y[j + 1],
+		    Y[j + 2], Y[j + 3],
+		    __0, __1);
     }
 }
-", $ieee_type, $ieee_type, $ieee_type, $ieee_type, $tmp_ieee_type;
+", $ieee_type, $ieee_type, $ieee_type, $ieee_type, $tmp_ieee_type, $ieee_type, $ieee_type, $tmp_ieee_type;
 
     # testing:
     # define BUTTERFLY_10(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,Wre,Wim,T1re,T1im,T2re,T2im) \
@@ -698,7 +748,7 @@ sub gen_stage {
     if ($n_points == 2) {   # input stage needs special handling
 	if ($kind eq 'L') {
 	    printf "\n%s/* perform fft2 and bitreverse input */\n", $indent;
-	    printf "%sbitreverse_fft2%s (%u, X, Y);\n", $indent, $negate_sign ? "synthesis" : "analysis", $fft_size;
+	    printf "%sbitreverse_fft2%s (%u, X, Y);\n", $indent, $func_name, $fft_size;
 	} elsif ($kind eq 'F') {
 	    printf "\n%s/* perform %u times fft2 */\n", $indent, $times;
 	    unroll_stage ($fft_size, $n_points, $times, 1);
@@ -723,10 +773,10 @@ sub gen_stage {
 	    for (my $i = 0; $i < $times; $i++) {
 		if ($i) {
 		    printf($indent."gsl_power2_fft%u%s_skip2 (X + %u, Y + %u);\n",
-			   $n_points, $negate_sign ? "synthesis" : "analysis", $n_points * $i << 1, $n_points * $i << 1);
+			   $n_points, $func_name, $n_points * $i << 1, $n_points * $i << 1);
 		} else {
 		    printf($indent."gsl_power2_fft%u%s_skip2 (X, Y);\n",
-			   $n_points, $negate_sign ? "synthesis" : "analysis");
+			   $n_points, $func_name);
 		}
 	    }
 	} else {
@@ -786,7 +836,7 @@ print " */\n";
     
     printf "static void\n";
     printf("gsl_power2_fft%u%s%s (const %s *X, %s *Y)\n{\n",
-	   $fft_size, $negate_sign ? "synthesis" : "analysis",
+	   $fft_size, $func_name,
 	   $skip2 ? "_skip2" : "",
 	   $ieee_type, $ieee_type);
     printf "%sregister uint butterfly, block, offset;\n", $indent;
diff --git a/bse/gslfft.h b/bse/gslfft.h
index f65873e..7f9436f 100644
--- a/bse/gslfft.h
+++ b/bse/gslfft.h
@@ -42,15 +42,18 @@ extern "C" {
  * equivalent).
  *
  * In general for the gsl_power2_fft*() family of functions, normalization is
- * only performed during backward transform. However, a popular mathematical
- * strategy of defining the FFT and IFFT in a way that the formulas are
- * symmetric is normalizing both, the forward and backward transform with
- * 1/sqrt(N) - where N is the number of complex values (n_values).
+ * only performed during backward transform if the gsl_power2_fftsc_scale()
+ * is used. No normalization is performed if gsl_power2_fftsc() is used.
+ *
+ * However, a popular mathematical strategy of defining the FFT and IFFT in a
+ * way that the formulas are symmetric is normalizing both, the forward and
+ * backward transform with 1/sqrt(N) - where N is the number of complex values
+ * (n_values).
  *
  * Compared to the above definition, in this implementation, the analyzed
  * values produced by gsl_power2_fftac()/gsl_power2_fftar() will be too large
  * by a factor of sqrt(N), which however are cancelled out on the backward
- * transform.
+ * transform (for _scale variants).
  *
  * Note that the transformation is performed out of place, the input
  * array is not modified, and may not overlap with the output array.
@@ -69,11 +72,10 @@ void	gsl_power2_fftac (const uint         n_values,
  * represents the counterpart to gsl_power2_fftac(), that is, a value
  * array which is transformed into the frequency domain with
  * gsl_power2_fftac() can be reconstructed by issuing gsl_power2_fftsc()
- * on the transform.
+ * on the transform. This function does not perform scaling, so calling
+ * gsl_power2_fftac() and gsl_power2_fftsc() will scale the data with a factor
+ * of n_values.  See also gsl_power2_fftsc_scale().
  *
- * This function also scales the time domain coefficients by a
- * factor of 1.0/n_values which is required for perfect reconstruction
- * of time domain data formerly transformed via gsl_power2_fftac().
  * More details on normalization can be found in the documentation of
  * gsl_power2_fftac().
  *
@@ -85,10 +87,34 @@ void	gsl_power2_fftsc (const uint         n_values,
 			  double            *ri_values_out);
 
 /**
- * @param n_values      Number of real sample values
- * @param r_values_in   Real sample values [0..n_values-1]
- * @param ri_values_out Complex frequency values [0..n_values-1]
+ * gsl_power2_fftsc_scale
+ * @n_values:      Number of complex values
+ * @ri_values_in:  Complex frequency values [0..n_values*2-1]
+ * @ri_values_out: Complex sample values [0..n_values*2-1]
+ * This function performs a decimation in time fourier transformation
+ * in backwards direction with normalization. As such, this function
+ * represents the counterpart to gsl_power2_fftac(), that is, a value
+ * array which is transformed into the frequency domain with
+ * gsl_power2_fftac() can be reconstructed by issuing gsl_power2_fftsc()
+ * on the transform.
  *
+ * This function also scales the time domain coefficients by a
+ * factor of 1.0/n_values which is required for perfect reconstruction
+ * of time domain data formerly transformed via gsl_power2_fftac().
+ * More details on normalization can be found in the documentation of
+ * gsl_power2_fftac().
+ *
+ * Note that the transformation is performed out of place, the input
+ * array is not modified, and may not overlap with the output array.
+ */
+void    gsl_power2_fftsc_scale (const unsigned int n_values,
+			        const double      *ri_values_in,
+			        double            *ri_values_out);
+/**
+ * gsl_power2_fftar
+ * @n_values:      Number of real sample values
+ * @r_values_in:   Real sample values [0..n_values-1]
+ * @ri_values_out: Complex frequency values [0..n_values-1]
  * Real valued variant of gsl_power2_fftac(), the input array contains
  * real valued equidistant sampled data [0..n_values-1], and the output
  * array contains the positive frequency half of the complex valued
@@ -124,6 +150,30 @@ void	gsl_power2_fftar (const uint         n_values,
  * A real valued data set transformed into the frequency domain
  * with gsl_power2_fftar() can be reconstructed using this function.
  *
+ * This function does not perform normalization, so data that is transformed
+ * back from gsl_power2_fftar() will be scaled by a factor of n_values. See
+ * also gsl_power2_fftsr_scale().
+ *
+ * More details on normalization can be found in the documentation of
+ * gsl_power2_fftac().
+ *
+ * Note that the transformation is performed out of place, the input
+ * array is not modified, and may not overlap with the output array.
+ */
+void	gsl_power2_fftsr (const unsigned int n_values,
+			  const double      *ri_values_in,
+			  double            *r_values_out);
+
+/**
+ * gsl_power2_fftsr_scale
+ * @n_values:     Number of real sample values
+ * @ri_values_in: Complex frequency values [0..n_values-1]
+ * @r_values_out: Real sample values [0..n_values-1]
+ * Real valued variant of gsl_power2_fftsc(), counterpart to
+ * gsl_power2_fftar(), using the same frequency storage format.
+ * A real valued data set transformed into the frequency domain
+ * with gsl_power2_fftar() can be reconstructed using this function.
+ *
  * This function also scales the time domain coefficients by a
  * factor of 1.0/(n_values/2) which is required for perfect
  * reconstruction of time domain data formerly transformed via
@@ -134,9 +184,9 @@ void	gsl_power2_fftar (const uint         n_values,
  * Note that the transformation is performed out of place, the input
  * array is not modified, and may not overlap with the output array.
  */
-void	gsl_power2_fftsr (const uint         n_values,
-			  const double      *ri_values_in,
-			  double            *r_values_out);
+void	gsl_power2_fftsr_scale (const unsigned int n_values,
+			        const double      *ri_values_in,
+			        double            *r_values_out);
 
 
 /* --- convenience wrappers --- */
@@ -146,7 +196,10 @@ void	gsl_power2_fftar_simple	(const uint         n_values,
 void	gsl_power2_fftsr_simple	(const uint         n_values,
 				 const float	   *complex_values,
 				 float             *real_values);
-     
+void	gsl_power2_fftsr_scale_simple (const unsigned int n_values,
+				       const float	   *complex_values,
+				       float             *real_values);
+
      
      
 #ifdef __cplusplus
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]