For anyone that is willing to take a look at my work...
the attached file contains some corrections to my original submission.
Once again, help is much appreciated.
------------------------------------------------------------------------
/* Non-linear Regression
*
* Daniel Carrera (dcarrera math toronto edu)
*
* Suggested additions to regression.h and regression.c
* Version 0.0.1
*
* These additions add a non-linear regrssion function to Gnumeric
* using the well-stablished Levenberg-Marquardt method.
*
* STATUS:
* =======
* I prototyped and tested the method under Python first. There should
* not be major problems with the algorithm. However, the C implementation
* has received NO TESTING whatsoever.
*
* TODO:
* =====
* 1.- Testing.
* 2.- Integration with Gnumeric.
*
*/
/* ---------------------------- regression.h ------------------------------- */
/* How do I make a RegressionFunction?
* ===================================
*
* SYNOPSIS:
* RegressionResult f(gnum_float * , gnum_float * , gnum_float * )
*
* USAGE:
* result = f(x, par, &y);
*
* DETAILS:
* x -> independent variables.
* par -> parameters.
*
* The function must:
* - Be smooth on the parameters.
* - Store the resulting value in 'y'.
*
* EXAMPLE:
RegressionResult
f (gnum_float * X, gnum_float * s, gnum_float *y)
{
gnum_float x = X[0];
gnum_float y = X[1];
*y = s[0] * sin( s[1]*x + s[2]*y ) + s[3];
return REG_ok;
}
*/
/* ---------------------------- regression.h ------------------------------ */
#define DELTA 1e-6
typedef RegressionResult
(*RegressionFunction) (gnum_float * x, gnum_float * params, gnum_float *f);
/* ---------------------------- regression.c ------------------------------ */
/*
* SYNOPSIS:
* result = derivative( f, &df, x, par, i)
*
* Approximates the partial derivative of a given function, at (x;params) with respect
* to the parameter indicated by ith parameter. The resulst is stored in 'df'.
*
* See the header file for more information.
*/
static RegressionResult
derivative(
RegressionFunction f,
gnum_float * df,
gnum_float * x, /* Only one point, not the whole data set. */
gnum_float * par,
gint index
)
{
gnum_float y1, y2;
RegressionResult result;
par[index] -= DELTA;
status = (*f)(x,par, &y1);
if (status != REGRESSION_OK)
return status;
params[index] += 2*DELTA;
status = (*f)(x,par, &y2);
if (status != REGRESSION_OK)
return status;
#ifdef DEBUG
printf("y1 = %lf\n",y1);
printf("y2 = %lf\n",y2);
printf("DELTA = %lf\n",DELTA);
#endif
*df = (y2 - y1) / (2*DELTA);
return REG_ok;
}
/*
* SYNOPSIS:
* result = chi_derivative( f, &dchi, xvals, par, i, yvals, sigmas)
*
* This is a simple adaptation of the derivative() function specific to
* the Chi Squared.
*/
static RegressionResult
chi_derivative(
RegressionFunction f,
gnum_float * dchi,
gnum_float * xvals, /* The entire data set, not just one point. */
gnum_float * par,
gint index,
gnum_float * yvals, /* Ditto. */
gnum_float * sigmas /* Ditto. */
)
{
gnum_float y1, y2;
RegressionResult result;
par[index] -= DELTA;
result = chi_squared( f, xvals, par, yvals, sigmas, x_dim, &y1);
if (result != REG_ok)
return result;
params[index] += 2*DELTA;
result = chi_squared( f, xvals, par, yvals, sigmas, x_dim, &y2);
if (result != REG_ok)
return result;
#ifdef DEBUG
printf("y1 = %lf\n",y1);
printf("y2 = %lf\n",y2);
printf("DELTA = %lf\n",DELTA);
#endif
*dchi = (y2 - y1) / (2*DELTA);
return REG_ok;
}
/*
* SYNOPSIS:
* result = coefficient_matrix(A, f, xvals, par, yvals, sigmas, x_dim, p_dim, r)
*
* RETURNS:
* The coefficient matrix of the LM method.
*
* DETAIS:
* The coefficient matrix matrix is defined by
*
* N 1 df df
* A = Sum ( ------- -- -- ( i == j? 1 + r : 1 ) )
* ij k=1 sigma^2 dp dp
* k i j
*
* A -> p_dim X p_dim coefficient matrix. MUST ALREADY BE ALLOCATED.
*
* sigmas -> Measurement errors in the dataset (along the y-axis).
* NULL means "no errors available", so they are all set to 1.
*
* x_dim -> Number of data points.
*
* p_dim -> Number of parameters.
*
* r -> Positive constant. It's value is altered during the LM procedure.
*
*/
static RegressionResult
coefficient_matrix(
gnum_float ** A, /* Location where the matrix will be stored. */
RegressionFunction f,
gnum_float ** xvals, /* The entire data set, not just one point. */
gnum_float * par,
gnum_float * yvals, /* Ditto. */
gnum_float * sigmas, /* Ditto. */
gint x_dim, /* Number of data points. */
gint p_dim, /* Number of parameters. */
gnum_float r
)
{
gint i, j, k;
RegressionResult result;
gnum_float df_i, df_j;
gnum_float sum, sigma;
/* Notice that the matrix is symetric. */
for (i = 0; i < p_dim; i++) {
for (j = 0; j <= i; j++) {
sum = 0;
for (k = 0; k < x_dim; x++) {
result = derivative( f, &df_i, xvals[k], par, i);
if (result != REG_ok)
return result;
result = derivative( f, &df_i, xvals[k], par, j);
if (result != REG_ok)
return result;
sigma = (sigmas == NULL? 1 : sigmas[k]);
sum = sum + 1 / (sigma*sigma) * df_i*df_j * (i == j? 1 + r : 1) ;
}
A[i][j] = A[j][i] = sum;
}
}
return REG_ok;
}
/*
* SYNOPSIS:
* result = parameter_errors(f, xvals, par, yvals, sigmas, x_dim, p_dim, errors)
*
* Returns the errors associated with the parameters.
* If an error is infinite, it is set to -1.
*
* sigmas -> Measurement errors in the dataset (along the y-axis).
* NULL means "no errors available", so they are all set to 1.
*
* x_dim -> Number of data points.
*
* p_dim -> Number of parameters.
*
* errors -> MUST ALREADY BE ALLOCATED.
*/
/* FIXME: I am not happy with the behaviour with infinite errors. */
static RegressionResult
parameter_errors(
RegressionFunction f,
gnum_float ** xvals, /* The entire data set, not just one point. */
gnum_float * par,
gnum_float * yvals, /* Ditto. */
gnum_float * sigmas, /* Ditto. */
gint x_dim, /* Number of data points. */
gint p_dim, /* Number of parameters. */
gnum_float * errors
)
{
RegressionResult result;
gnum_float ** A;
gint i;
ALLOC_MATRIX(A, p_dim, p_dim)
result = coefficient_matrix(A, f, x, par, y, sigmas, x_dim, p_dim, 0)
if (result != REG_ok)
return result;
for (i = 0; i < p_dim; i++)
errors[i] = (A[i][j] != 0? 1/sqrt(A[i][j]) : -1 );
FREE_MATRIX(A, p_dim, p_dim);
return REG_ok;
}
/*
* SYNOPSIS:
* result = chi_squared( f, xvals, par, yvals, sigmas, x_dim, &chisq)
*
* / y - f(x ; par) \ 2
* 2 | i i |
* Chi == Sum ( | ------------------ | )
* \ sigma /
* i
*
* sigmas -> Measurement errors in the dataset (along the y-axis).
* NULL means "no errors available", so they are all set to 1.
*
* x_dim -> Number of data points.
*
* This value is not very meaningful without the sigmas. However, it is still useful
* for the fit.
*/
static RegressionResult
chi_squared(
RegressionFunction f,
gnum_float ** xvals, /* The entire data set, not just one point. */
gnum_float * par,
gnum_float * yvals, /* Ditto. */
gnum_float * sigmas, /* Ditto. */
gint x_dim, /* Number of data points. */
gnum_float * chisq /* Chi Squared */
)
{
gint i;
RegressionResult result;
gnum_float tmp, y;
*chisq = 0;
for (i = 0; i < x_dim; i++) {
result = f(xvals[i], par, &y);
if (result != REG_ok)
return result;
tmp = (yvals[i] - y ) / (sigmas == NULL ? 1 : sigmas[i] );
*chisq += tmp*tmp;
}
return REG_ok;
}
/*
* SYNOPSIS:
* result = non_linear_regression(f, xvals, par, yvals, sigmas, x_dim, p_dim, &chi, errors)
*
* Returns the results of the non-linear regression from the given intial values.
* The resulting parameters are placed back into 'par'.
*
* PARAMETERS:
*
* sigmas -> Measurement errors in the dataset (along the y-axis).
* NULL means "no errors available", so they are all set to 1.
*
* x_dim -> Number of data points.
*
* p_dim -> Number of parameters.
*
* errors -> MUST ALREADY BE ALLOCATED. These are the approximated standard
* deviation for each parameter.
*
* chi -> Chi Squared of the final result. This value is not very meaningful
* without the sigmas.
*
*/
static RegressionResult
non_linear_regression(
RegressionFunction f,
gnum_float ** xvals, /* The entire data set, not just one point. */
gnum_float * par,
gnum_float * yvals, /* Ditto. */
gnum_float * sigmas, /* Ditto. */
gint x_dim, /* Number of data points. */
gint p_dim, /* Number of parameters. */
gnum_float * chi,
gnum_float * errors
)
{
gnum_float r = 0.001; /* Pick a conservative initial value. */
gnum_float * b;
gnum_float * dpar;
gnum_float * tmp_par;
gnum_float chi_pre, chi_pro, dchi;
RegressionResult result;
gint i;
result = chi_squared( f, xvals, par, yvals, sigmas, x_dim, &chi_pos);
if (result != REG_ok)
return result;
ALLOC_MATRIX(A, p_dim, p_dim);
dpar = g_new(gnum_float, p_dim);
tmp_par = g_new(gnum_float, p_dim);
b = g_new(gnum_float, p_dim);
#ifdef DEBUG
printf("Chi Squared : %lf",chi_pre);
#endif
while (1) {
for (i = 0; i < p_dim; i++) {
/*
* d Chi
* b == -----
* k d p
* k
*/
result = chi_derivative( f, &dchi, xvals, par, i, yvals, sigmas);
if (result != REG_ok)
return result;
b[i] = - dchi;
}
result = coefficient_matrix(A, f, xvals, par, yvals, sigmas, x_dim, p_dim, r);
if (result != REG_ok)
return result;
result = linear_solve(A, b, p_dim, dpar);
if (result != REG_ok)
return result;
for(i = 0; i < p_dim; i++)
tmp_par[i] = par[i] + dpar[i];
result = chi_squared( f, xvals, par, yvals, sigmas, x_dim, &chi_pos);
if (result != REG_ok)
return result;
#ifdef DEBUG
printf("Chi Squared : %lf",chi_pre);
printf("Chi Squared : &lf",chi_pos);
printf("r : %lf",r);
#endif
if (chi_pos <= chi_pre) { /* There is improvement */
r /= 10;
par = tmp_par;
if (abs(chi_pos - chi_pre) < DELTA)
break;
chi_pre = chi_pos;
} else {
r *= 10;
}
}
result = parameter_errors(f, xvals, par, yvals, sigmas, x_dim, p_dim, errors);
if (result != REG_ok)
return result;
*chi = chi_pos;
FREE_MATRIX(A, p_dim, p_dim);
g_free(dpar);
g_free(tmp_par);
g_free(b);
return REG_ok;
}