*** regression.c Fri Aug 16 16:33:30 2002 --- new_regression.c Mon Sep 9 18:44:00 2002 *************** *** 47,51 **** /* ------------------------------------------------------------------------- */ - /* Returns in res the solution to the equation L * U * res = P * b. --- 47,50 ---- *************** *** 321,324 **** --- 320,489 ---- /* ------------------------------------------------------------------------- */ + #define LOGFIT_LARGE_C_FACTOR 2000 + #define LOGFIT_C_ACCURACY 5 + + typedef struct { + gnum_float min_x; + gnum_float max_x; + gnum_float min_y; + gnum_float max_y; + gnum_float mean_y; + } point_cloud_measure_type; + + /* Takes the current 'sign' (res[0]) and 'c' (res[3]) from the calling + * function, transforms xs to log(sign*(x-c)), performs a simple + * linear regression to find the best fitting 'a' (res[1]) and 'b' + * (res[2]) for ys and transformed xs, and computes the sum of squared + * residuals. + * Needs 'sign' (i.e. +1 or -1) and 'c' so adjusted that (sign*(x-c)) is + * positive for all xs. n must be > 0. These conditions are trusted to be + * checked by the calling functions. + * Is called often, so do not make it to slow. + */ + + static int + transform_x_and_linear_regression_log_fitting (gnum_float *xs, + gnum_float *transf_xs, + const gnum_float *ys, int n, + gnum_float *res, + point_cloud_measure_type *point_cloud) + { + int i; + int result = 0; + gnum_float mean_transf_x, diff_x, resid_y; + gnum_float sum1 = 0; + gnum_float sum2 = 0; + + for (i=0; i 0) */ + range_average (transf_xs, n, &mean_transf_x); + for (i=0; imean_y); + sum2 += diff_x * diff_x; + } + res[2] = sum1 / sum2; + res[1] = point_cloud->mean_y - (res[2] * mean_transf_x); + res[4] = 0; + for (i=0; imax_x) - (point_cloud->min_x); + step_c = log10 (step_c); + if (step_c < 0) + if (modf (step_c, &step_int) != 0) + step_c--; + modf (step_c, &step_int); + step_c = (gnum_float) step_int; + step_c = pow (10, step_c); + step_c *= 0.05; + + /* Determine sign. Take a c which is ``much to small'' since the part + * of the curve cutting the point cloud is almost not bent. + * If making c still smaller does not make things still worse, + * assume that we have to change the direction of curve bending + * by changing sign. + */ + res[0] = 1; /* sign */ + res[3] = point_cloud->min_x - LOGFIT_LARGE_C_FACTOR * step_c; + temp_res[0] = 1; + temp_res[3] = res[3] - step_c; + transform_x_and_linear_regression_log_fitting (xs, transf_xs, ys, n, + res, point_cloud); + transform_x_and_linear_regression_log_fitting (xs, transf_xs, ys, n, + temp_res, point_cloud); + if (temp_res[4] <= res[4]) + sign_plus_ok = 0; + /* check again with new sign */ + res[0] = -1; /* sign */ + res[3] = point_cloud->max_x + LOGFIT_LARGE_C_FACTOR * step_c; + temp_res[0] = -1; + temp_res[3] = res[3] + step_c; + transform_x_and_linear_regression_log_fitting (xs, transf_xs, ys, n, + res, point_cloud); + transform_x_and_linear_regression_log_fitting (xs, transf_xs, ys, n, + temp_res, point_cloud); + if (temp_res[4] <= res[4]) + sign_minus_ok = 0; + /* If not exactly one of plus or minus works, give up. */ + /* This also means that LOGFIT_LARGE_C_FACTOR determines the ``end'' + * of the fitted c-range. Any local minimum of squared residuals + * outside this border is not found. This happens in point clouds + * which are very weakly bent. */ + if (sign_plus_ok && !sign_minus_ok) + res[0] = 1; + else if (sign_minus_ok && !sign_plus_ok) + res[0] = -1; + else { + result = 1; + goto out; + } + + /* Now the adapting of c starts. Find a local minimum of sum + * of squared residuals */ + c_offset = (res[0] == 1) ? point_cloud->min_x + : point_cloud->max_x; + res[3] = c_offset - res[0] * step_c; + for (i=1; i<=LOGFIT_C_ACCURACY; i++) { + do { + memcpy (temp_res, res, 5 * sizeof (gnum_float)); + transform_x_and_linear_regression_log_fitting (xs, + transf_xs, + ys, n, + temp_res, + point_cloud); + res[3] = res[3] - res[0] * step_c; + transform_x_and_linear_regression_log_fitting (xs, + transf_xs, + ys, n, res, + point_cloud); + } while (res[4] < temp_res[4]); + memcpy (res, temp_res, 5 * sizeof (gnum_float)); + if (i == LOGFIT_C_ACCURACY) + break; + /* Go a step ``back'', increase accuracy, and go on again. */ + res[3] = res[3] + res[0] * step_c; + step_c /= 10; + res[3] = res[3] - res[0] * step_c; + } + /* If c is still at the ``beginning'' of the fitted c-range, + * then this is probably no local minimum. + * Point cloud is probably not shaped ``logarithmical''. + */ + if (res[3] == (c_offset - res[0] * step_c)) { + result = 1; + goto out; + } + if (fabs (res[3]) < (step_c / 2)) /* The difference to 0 might be + * caused by accumulated floating + * point rounding errors. */ + res[3] = 0; + out: + g_free (transf_xs); + g_free (temp_res); + return result; + } + + /* ------------------------------------------------------------------------- */ /* Please refer to description in regression.h. */ *************** *** 393,396 **** --- 558,667 ---- out: g_free (log_ys); + return result; + } + + /* ------------------------------------------------------------------------- */ + /* Please refer to description in regression.h. */ + + int + logarithmic_regression (gnum_float **xss, int dim, + const gnum_float *ys, int n, + int affine, + gnum_float *res, + regression_stat_t *extra_stat) + { + gnum_float **log_xss; + int result; + int i, j; + + ALLOC_MATRIX (log_xss, dim, n); + for (i = 0; i < dim; i++) + for (j = 0; j < n; j++) + if (xss[i][j] > 0) + log_xss[i][j] = log (xss[i][j]); + else { + result = 1; /* Bad data. */ + goto out; + } + + + if (affine) { + gnum_float **log_xss2; + log_xss2 = g_new (gnum_float *, dim + 1); + log_xss2[0] = NULL; /* Substitute for 1-vector. */ + memcpy (log_xss2 + 1, log_xss, dim * sizeof (gnum_float *)); + + result = general_linear_regression (log_xss2, dim + 1, ys, n, + res, extra_stat, affine); + g_free (log_xss2); + } else { + res[0] = 0; + result = general_linear_regression (log_xss, dim, ys, n, + res + 1, extra_stat, + affine); + } + + out: + FREE_MATRIX (log_xss, dim, n); + return result; + } + + /* ------------------------------------------------------------------------- */ + /* Please refer to description in regression.h. */ + + int + logarithmic_fit (gnum_float *xs, + const gnum_float *ys, int n, + gnum_float *res) + { + point_cloud_measure_type point_cloud_measures; + int i, result; + int less_3_y = 1, less_3_x = 1; + + if (n == 0) { + result = 1; + goto out; + } + /* store useful measures for using them here and in subfunctions */ + result = range_min (xs, n, &(point_cloud_measures.min_x)); + result = range_max (xs, n, &(point_cloud_measures.max_x)); + result = range_min (ys, n, &(point_cloud_measures.min_y)); + result = range_max (ys, n, &(point_cloud_measures.max_y)); + result = range_average (ys, n, &(point_cloud_measures.mean_y)); + /* Checking of error conditions. */ + /* less than 2 different ys or less than 2 different xs */ + if (point_cloud_measures.min_y == point_cloud_measures.max_y || + point_cloud_measures.min_x == point_cloud_measures.max_x) { + result = 1; + goto out; + } + + for (i=0; i