*** regression.c	Fri Aug 16 16:33:30 2002
--- new_regression.c	Mon Sep  9 18:44:00 2002
***************
*** 47,51 ****
  
  /* ------------------------------------------------------------------------- */
- 
  /* Returns in res the solution to the equation L * U * res = P * b.
  
--- 47,50 ----
***************
*** 321,324 ****
--- 320,489 ----
  
  /* ------------------------------------------------------------------------- */
+ #define LOGFIT_LARGE_C_FACTOR 2000
+ #define LOGFIT_C_ACCURACY 5
+ 
+ typedef struct {
+   gnum_float min_x;
+   gnum_float max_x;
+   gnum_float min_y;
+   gnum_float max_y;
+   gnum_float mean_y;
+ } point_cloud_measure_type;
+ 
+ /* Takes the current 'sign' (res[0]) and 'c' (res[3]) from the calling
+  * function, transforms xs to log(sign*(x-c)), performs a simple
+  * linear regression to find the best fitting 'a' (res[1]) and 'b'
+  * (res[2]) for ys and transformed xs, and computes the sum of squared
+  * residuals.
+  * Needs 'sign' (i.e. +1 or -1) and 'c' so adjusted that (sign*(x-c)) is
+  * positive for all xs. n must be > 0. These conditions are trusted to be
+  * checked by the calling functions.
+  * Is called often, so do not make it to slow.
+  */
+ 
+ static int
+ transform_x_and_linear_regression_log_fitting (gnum_float *xs,
+ 						gnum_float *transf_xs,
+ 						const gnum_float *ys, int n,
+ 						gnum_float *res,
+ 						point_cloud_measure_type *point_cloud)
+ {
+         int i;
+ 	int result = 0;
+ 	gnum_float mean_transf_x, diff_x, resid_y;
+ 	gnum_float sum1 = 0;
+ 	gnum_float sum2 = 0;
+ 
+ 	for (i=0; i<n; i++)
+ 	        transf_xs[i] = log(res[0]*(xs[i]-res[3]));/* log(always > 0) */
+ 	range_average (transf_xs, n, &mean_transf_x);
+ 	for (i=0; i<n; i++) {
+ 	        diff_x = transf_xs[i] - mean_transf_x;
+ 		sum1 += diff_x * (ys[i] - point_cloud->mean_y);
+ 		sum2 += diff_x * diff_x;
+ 	}
+ 	res[2] = sum1 / sum2;
+ 	res[1] = point_cloud->mean_y - (res[2] * mean_transf_x);
+ 	res[4] = 0;
+ 	for (i=0; i<n; i++) {
+ 	        resid_y = res[1] + (res[2] * transf_xs[i]) - ys[i];
+ 		res[4] += resid_y * resid_y;
+ 	}
+ 	return result; /* not used for error checking for the sake of speed */
+ }
+ 
+ static int
+ log_fitting (gnum_float *xs, const gnum_float *ys, int n,
+ 		  gnum_float *res, point_cloud_measure_type *point_cloud)
+ {
+         int result = 0;
+ 	int i;
+ 	int sign_plus_ok = 1;
+ 	int sign_minus_ok = 1;
+ 	gnum_float step_c, step_int, c_offset;
+ 	gnum_float *temp_res;
+         gnum_float *transf_xs;
+ 
+ 	temp_res = g_new (gnum_float, 5);
+ 	/* Not needed here, but allocate it once for all subfunction calls */
+ 	transf_xs = g_new (gnum_float, n);
+ 	/* Choose initial iteration step-width for c (arbitrarily) */
+ 	step_c = (point_cloud->max_x) - (point_cloud->min_x);
+ 	step_c = log10 (step_c);
+ 	if (step_c < 0)
+ 	        if (modf (step_c, &step_int) != 0)
+ 		        step_c--;
+ 	modf (step_c, &step_int);
+ 	step_c = (gnum_float) step_int;
+ 	step_c = pow (10, step_c);
+ 	step_c *= 0.05;
+ 
+ 	/* Determine sign. Take a c which is ``much to small'' since the part
+ 	 * of the curve cutting the point cloud is almost not bent.
+ 	 * If making c still smaller does not make things still worse,
+ 	 * assume that we have to change the direction of curve bending
+ 	 * by changing sign.
+ 	 */
+ 	res[0] = 1; /* sign */
+ 	res[3] = point_cloud->min_x - LOGFIT_LARGE_C_FACTOR * step_c;
+ 	temp_res[0] = 1;
+ 	temp_res[3] = res[3] - step_c;
+ 	transform_x_and_linear_regression_log_fitting (xs, transf_xs, ys, n,
+ 							res, point_cloud);
+ 	transform_x_and_linear_regression_log_fitting (xs, transf_xs, ys, n,
+ 							temp_res, point_cloud);
+ 	if (temp_res[4] <= res[4]) 
+ 	        sign_plus_ok = 0;
+         /* check again with new sign */
+ 	res[0] = -1; /* sign */
+ 	res[3] = point_cloud->max_x + LOGFIT_LARGE_C_FACTOR * step_c;
+ 	temp_res[0] = -1;
+ 	temp_res[3] = res[3] + step_c;
+ 	transform_x_and_linear_regression_log_fitting (xs, transf_xs, ys, n,
+ 						       res, point_cloud);
+ 	transform_x_and_linear_regression_log_fitting (xs, transf_xs, ys, n,
+ 						       temp_res, point_cloud);
+ 	if (temp_res[4] <= res[4]) 
+ 	        sign_minus_ok = 0;
+ 	/* If not exactly one of plus or minus works, give up. */
+ 	/* This also means that LOGFIT_LARGE_C_FACTOR determines the ``end''
+ 	 * of the fitted c-range. Any local minimum of squared residuals
+ 	 * outside this border is not found. This happens in point clouds
+ 	 * which are very weakly bent. */
+ 	if (sign_plus_ok && !sign_minus_ok)
+ 	        res[0] = 1;
+ 	else if (sign_minus_ok && !sign_plus_ok)
+ 	        res[0] = -1;
+ 	else {
+ 	        result = 1; 
+ 		goto out;
+ 	  }
+ 	
+ 	/* Now the adapting of c starts. Find a local minimum of sum
+ 	 * of squared residuals */
+ 	c_offset = (res[0] == 1) ? point_cloud->min_x
+                                  : point_cloud->max_x;
+ 	res[3] = c_offset - res[0] * step_c;
+ 	for (i=1; i<=LOGFIT_C_ACCURACY; i++) {
+ 	        do {
+ 		        memcpy (temp_res, res, 5 * sizeof (gnum_float));
+ 			transform_x_and_linear_regression_log_fitting (xs,
+ 								 transf_xs,
+ 								 ys, n,
+ 								 temp_res,
+ 								 point_cloud);
+ 			res[3] = res[3] - res[0] * step_c;
+ 			transform_x_and_linear_regression_log_fitting (xs,
+ 								 transf_xs,
+ 								 ys, n, res,
+ 								 point_cloud);
+ 		} while (res[4] < temp_res[4]);
+ 		memcpy (res, temp_res, 5 * sizeof (gnum_float));
+ 		if (i == LOGFIT_C_ACCURACY)
+ 		        break;
+ 		/* Go a step ``back'', increase accuracy, and go on again. */
+ 		res[3] = res[3] + res[0] * step_c;
+ 		step_c /= 10;
+ 		res[3] = res[3] - res[0] * step_c;
+ 	}
+ 	/* If c is still at the ``beginning'' of the fitted c-range,
+ 	 * then this is probably no local minimum.
+ 	 * Point cloud is probably not shaped ``logarithmical''.
+ 	 */
+ 	if (res[3] == (c_offset - res[0] * step_c)) {
+ 	        result = 1;
+ 		goto out;
+ 	}
+ 	if (fabs (res[3]) < (step_c / 2)) /* The difference to 0 might be
+ 					   * caused by accumulated floating
+ 					   * point rounding errors. */
+ 	        res[3] = 0;
+  out:
+ 	g_free (transf_xs);
+ 	g_free (temp_res);
+ 	return result;
+ }
+ 
+ /* ------------------------------------------------------------------------- */
  /* Please refer to description in regression.h.  */
  
***************
*** 393,396 ****
--- 558,667 ----
   out:
  	g_free (log_ys);
+ 	return result;
+ }
+ 
+ /* ------------------------------------------------------------------------- */
+ /* Please refer to description in regression.h.  */
+ 
+ int
+ logarithmic_regression (gnum_float **xss, int dim,
+ 			const gnum_float *ys, int n,
+ 			int affine,
+ 			gnum_float *res,
+ 			regression_stat_t *extra_stat)
+ {
+         gnum_float **log_xss;
+ 	int result;
+ 	int i, j;
+ 
+ 	ALLOC_MATRIX (log_xss, dim, n);
+ 	for (i = 0; i < dim; i++)
+ 	        for (j = 0; j < n; j++)
+ 		        if (xss[i][j] > 0)
+ 		                log_xss[i][j] = log (xss[i][j]);
+ 			else {
+ 			        result = 1; /* Bad data.  */
+ 				goto out;
+ 			}
+ 	
+ 
+ 	if (affine) {
+ 		gnum_float **log_xss2;
+ 		log_xss2 = g_new (gnum_float *, dim + 1);
+ 		log_xss2[0] = NULL;  /* Substitute for 1-vector.  */
+ 		memcpy (log_xss2 + 1, log_xss, dim * sizeof (gnum_float *));
+ 
+ 		result = general_linear_regression (log_xss2, dim + 1, ys, n,
+ 						    res, extra_stat, affine);
+ 		g_free (log_xss2);
+ 	} else {
+ 		res[0] = 0;
+ 		result = general_linear_regression (log_xss, dim, ys, n,
+ 						    res + 1, extra_stat,
+ 						    affine);
+ 	}
+ 
+  out:
+ 	FREE_MATRIX (log_xss, dim, n);
+ 	return result;
+ }
+ 
+ /* ------------------------------------------------------------------------- */
+ /* Please refer to description in regression.h.  */
+ 
+ int
+ logarithmic_fit (gnum_float *xs,
+ 			     const gnum_float *ys, int n,
+ 			     gnum_float *res)
+ {
+         point_cloud_measure_type point_cloud_measures;
+ 	int i, result;
+ 	int less_3_y = 1, less_3_x = 1;
+ 
+ 	if (n == 0) {
+ 	        result = 1;
+ 		goto out;
+ 	}
+ 	/* store useful measures for using them here and in subfunctions */
+ 	result = range_min (xs, n, &(point_cloud_measures.min_x));
+ 	result = range_max (xs, n, &(point_cloud_measures.max_x));
+ 	result = range_min (ys, n, &(point_cloud_measures.min_y));
+ 	result = range_max (ys, n, &(point_cloud_measures.max_y));
+ 	result = range_average (ys, n, &(point_cloud_measures.mean_y));
+ 	/* Checking of error conditions. */
+ 	/* less than 2 different ys or less than 2 different xs */
+ 	if (point_cloud_measures.min_y == point_cloud_measures.max_y ||
+ 	    point_cloud_measures.min_x == point_cloud_measures.max_x) {
+ 	        result = 1;
+ 		goto out;
+ 	}
+ 
+ 	for (i=0; i<n; i++) {
+ 	        if ((ys[i] != point_cloud_measures.min_y) &&
+ 		    (ys[i] != point_cloud_measures.max_y)) {
+ 		        less_3_y = 0;
+ 			break;
+ 		}
+ 	}
+ 	if (less_3_y) {
+ 	        result = 1; /* less than 3 different ys */
+ 		goto out;
+ 	}
+ 
+ 	for (i=0; i<n; i++) {
+ 	        if ((xs[i] != point_cloud_measures.min_x) &&
+ 		    (xs[i] != point_cloud_measures.max_x)) {
+ 		        less_3_x = 0;
+ 			break;
+ 		}
+ 	}
+ 	if (less_3_x) {
+ 	        result = 1; /* less than 3 different xs */
+ 		goto out;
+ 	}
+ 
+ 	/* no errors */
+ 	result = log_fitting (xs, ys, n, res, &point_cloud_measures);
+  out:
  	return result;
  }