Re: non-linear regression update.



How do you _guarantee_ termination of your while(1) loop ? While I can see that normally you will terminate I don't see a reason why a well crafted example could not run unterminated (it probably doesn't help my understanding that I don't seem to see a use of r in non_linear_regression.

Andreas

Daniel Carrera wrote:

For anyone that is willing to take a look at my work...
the attached file contains some corrections to my original submission.

Once again, help is much appreciated.


------------------------------------------------------------------------

/* Non-linear Regression
 *
 * Daniel Carrera (dcarrera math toronto edu)
 *
 * Suggested additions to regression.h and regression.c
 * Version 0.0.1
 *
 * These additions add a non-linear regrssion function to Gnumeric
 * using the well-stablished Levenberg-Marquardt method.
 *
 * STATUS:
 * =======
 *    I prototyped and tested the method under Python first.  There should
 * not be major problems with the algorithm.  However, the C implementation
 * has received NO TESTING whatsoever.
 *
 * TODO:
 * =====
 * 1.-  Testing.
 * 2.-  Integration with Gnumeric.
 *
 */

/* ---------------------------- regression.h ------------------------------- */

/* How do I make a RegressionFunction?
 * ===================================
 *
 * SYNOPSIS:    
 *              RegressionResult f(gnum_float * , gnum_float * , gnum_float * )
 *
 * USAGE:
 *              result = f(x, par, &y);
 *
 * DETAILS:
 * x    -> independent variables.
 * par  -> parameters.
 *
 * The function must:
 *              -       Be smooth on the parameters.
 *              -       Store the resulting value in 'y'.
 *
 * EXAMPLE:
        
RegressionResult f (gnum_float * X, gnum_float * s, gnum_float *y)
{       
        gnum_float x = X[0];
        gnum_float y = X[1];

        *y = s[0] * sin( s[1]*x + s[2]*y ) + s[3];
        
        return REG_ok;
}
*/

/* ----------------------------  regression.h ------------------------------ */

#define DELTA 1e-6

typedef RegressionResult (*RegressionFunction) (gnum_float * x, gnum_float * params, gnum_float *f);


/* ----------------------------  regression.c ------------------------------ */

/*
 * SYNOPSIS:
 *      result = derivative( f, &df, x, par, i)
 *
 * Approximates the partial derivative of a given function, at (x;params) with respect
 * to the parameter indicated by ith parameter.  The resulst is stored in 'df'.
 *
 * See the header file for more information.
 */
static RegressionResult derivative(
        RegressionFunction f,
        gnum_float * df,
        gnum_float * x,                 /* Only one point, not the whole data set. */
        gnum_float * par,
        gint index
)
{
        gnum_float y1, y2;
        RegressionResult result;
        
        par[index] -= DELTA;
        status = (*f)(x,par, &y1);
        if (status != REGRESSION_OK)
                return status;
        
        params[index] += 2*DELTA;
        status = (*f)(x,par, &y2);
        if (status != REGRESSION_OK)
                return status;
        
#ifdef DEBUG
        printf("y1 = %lf\n",y1);
        printf("y2 = %lf\n",y2);
        printf("DELTA = %lf\n",DELTA);
#endif
        
        *df = (y2 - y1) / (2*DELTA);
        return REG_ok;
}

/*
 * SYNOPSIS:
 *      result = chi_derivative( f, &dchi, xvals, par, i, yvals, sigmas)
 *
* This is a simple adaptation of the derivative() function specific to * the Chi Squared. */ static RegressionResult
chi_derivative(
        RegressionFunction f,
        gnum_float * dchi,
        gnum_float * xvals,             /* The entire data set, not just one point. */
        gnum_float * par,
        gint index,
        gnum_float * yvals,             /* Ditto. */
        gnum_float * sigmas             /* Ditto. */
)
{
        gnum_float y1, y2;
        RegressionResult result;
        
        par[index] -= DELTA;
        result = chi_squared( f, xvals, par, yvals, sigmas, x_dim, &y1);
        if (result != REG_ok)
                return result;
        
        params[index] += 2*DELTA;
        result = chi_squared( f, xvals, par, yvals, sigmas, x_dim, &y2);
        if (result != REG_ok)
                return result;
        
#ifdef DEBUG
        printf("y1 = %lf\n",y1);
        printf("y2 = %lf\n",y2);
        printf("DELTA = %lf\n",DELTA);
#endif
        
        *dchi = (y2 - y1) / (2*DELTA);
        return REG_ok;
}


/*
 * SYNOPSIS:
 *              result = coefficient_matrix(A, f, xvals, par, yvals, sigmas, x_dim, p_dim, r)
 *
 * RETURNS:
 *              The coefficient matrix of the LM method.
 *      
 * DETAIS:
 *              The coefficient matrix matrix is defined by
* * N 1 df df
 *     A   = Sum  ( -------   --  --  ( i == j? 1 + r : 1 )  )
 *      ij   k=1    sigma^2   dp  dp
 *                       k      i   j
 *
 * A            ->   p_dim X p_dim coefficient matrix.  MUST ALREADY BE ALLOCATED.
 *
 * sigmas       ->   Measurement errors in the dataset (along the y-axis).
 *                              NULL means "no errors available", so they are all set to 1.
 *
 * x_dim        ->   Number of data points.
 *
 * p_dim        ->   Number of parameters.
 *
* r -> Positive constant. It's value is altered during the LM procedure. * */ static RegressionResult coefficient_matrix(
        gnum_float ** A,                /* Location where the matrix will be stored. */
        RegressionFunction f,
        gnum_float ** xvals,    /* The entire data set, not just one point. */
        gnum_float  * par,
        gnum_float  * yvals,    /* Ditto. */
        gnum_float  * sigmas,   /* Ditto. */
        gint              x_dim,        /* Number of data points. */
        gint              p_dim,        /* Number of parameters.  */
        gnum_float    r
)
{
        gint i, j, k;
        RegressionResult result;
        gnum_float df_i, df_j;
        gnum_float sum, sigma;
        
        /* Notice that the matrix is symetric.  */
        for (i = 0; i < p_dim; i++) {
                for (j = 0; j <= i; j++) {
                        sum = 0;
                        for (k = 0; k < x_dim; x++) {
                                result = derivative( f, &df_i, xvals[k], par, i);
if (result != REG_ok) return result;
                                
                                result = derivative( f, &df_i, xvals[k], par, j);
if (result != REG_ok) return result;
                                
                                sigma = (sigmas == NULL? 1 : sigmas[k]);
                                
                                sum = sum +   1 / (sigma*sigma) * df_i*df_j * (i == j? 1 + r : 1) ;
                        }
                        A[i][j] = A[j][i] = sum;
                }
        }
                
        return REG_ok;
}

/*
 * SYNOPSIS:
 *              result = parameter_errors(f, xvals, par, yvals, sigmas, x_dim, p_dim, errors)
* * Returns the errors associated with the parameters.
 * If an error is infinite, it is set to -1.
 *
 * sigmas       ->   Measurement errors in the dataset (along the y-axis).
 *                              NULL means "no errors available", so they are all set to 1.
 *
 * x_dim        ->   Number of data points.
 *
 * p_dim        ->   Number of parameters.
 *
 * errors       ->  MUST ALREADY BE ALLOCATED.
 */

/* FIXME:  I am not happy with the behaviour with infinite errors.  */
static RegressionResult
parameter_errors(
        RegressionFunction f,
        gnum_float ** xvals,    /* The entire data set, not just one point. */
        gnum_float  * par,
        gnum_float  * yvals,    /* Ditto. */
        gnum_float  * sigmas,   /* Ditto. */
        gint              x_dim,        /* Number of data points. */
        gint              p_dim,        /* Number of parameters.  */
        gnum_float  * errors
)
{
        RegressionResult result;
        gnum_float ** A;
        gint i;
        
        ALLOC_MATRIX(A, p_dim, p_dim)
        
        result = coefficient_matrix(A, f, x, par, y, sigmas, x_dim, p_dim, 0)
        if (result != REG_ok)
                return result;
        
        for (i = 0; i < p_dim; i++)
                errors[i] = (A[i][j] != 0? 1/sqrt(A[i][j]) : -1 );
        
        FREE_MATRIX(A, p_dim, p_dim);
        return REG_ok;
}

/*
 * SYNOPSIS:
 * result = chi_squared( f, xvals, par, yvals, sigmas, x_dim, &chisq)
* * / y - f(x ; par) \ 2
 *            2              |    i      i        |
 *         Chi   ==   Sum (  | ------------------ |   )
 *                            \     sigma        /
 *                                       i
* * sigmas -> Measurement errors in the dataset (along the y-axis).
 *                              NULL means "no errors available", so they are all set to 1.
 *
 * x_dim        ->   Number of data points.
 *
 * This value is not very meaningful without the sigmas.  However, it is still useful
 * for the fit.
 */
static RegressionResult
chi_squared(
        RegressionFunction f,
        gnum_float ** xvals,    /* The entire data set, not just one point. */
        gnum_float  * par,
        gnum_float  * yvals,    /* Ditto. */
        gnum_float  * sigmas,   /* Ditto. */
        gint              x_dim,        /* Number of data points. */
        gnum_float  * chisq     /* Chi Squared */
)
{
        gint i;
        RegressionResult result;
        gnum_float tmp, y;
        *chisq = 0;
        
        for (i = 0; i < x_dim; i++) {
                result = f(xvals[i], par, &y);
                if (result != REG_ok)
                        return result;
                
                tmp = (yvals[i] - y ) / (sigmas == NULL ? 1 : sigmas[i] );
                
                *chisq += tmp*tmp;
        }
        
        return REG_ok;
}

/*
 * SYNOPSIS:
 *              result = non_linear_regression(f, xvals, par, yvals, sigmas, x_dim, p_dim, &chi, errors)
* * Returns the results of the non-linear regression from the given intial values.
 * The resulting parameters are placed back into 'par'.
 *              
 * PARAMETERS:
 *
 * sigmas       ->   Measurement errors in the dataset (along the y-axis).
 *                              NULL means "no errors available", so they are all set to 1.
 *
 * x_dim        ->   Number of data points.
 *
 * p_dim        ->   Number of parameters.
 *
 * errors       ->  MUST ALREADY BE ALLOCATED.  These are the approximated standard
 *                              deviation for each parameter.
 *
 * chi          ->   Chi Squared of the final result.  This value is not very meaningful
 *                              without the sigmas.
 *              
 */
static RegressionResult
non_linear_regression( RegressionFunction f,
        gnum_float ** xvals,    /* The entire data set, not just one point. */
        gnum_float  * par,
        gnum_float  * yvals,    /* Ditto. */
        gnum_float  * sigmas,   /* Ditto. */
        gint              x_dim,        /* Number of data points. */
        gint              p_dim,        /* Number of parameters.  */
        gnum_float  * chi,
        gnum_float  * errors
)
{
        gnum_float   r = 0.001; /* Pick a conservative initial value. */
        gnum_float * b;
        gnum_float * dpar;
        gnum_float * tmp_par;
        gnum_float chi_pre, chi_pro, dchi;
        RegressionResult result;
        gint i;
        
        result = chi_squared( f, xvals, par, yvals, sigmas, x_dim, &chi_pos);
        if (result != REG_ok)
                return result;
        
        
        ALLOC_MATRIX(A, p_dim, p_dim);
        dpar    = g_new(gnum_float, p_dim);
        tmp_par = g_new(gnum_float, p_dim);
        b       = g_new(gnum_float, p_dim);
#ifdef DEBUG
        printf("Chi Squared : %lf",chi_pre);
#endif
        
        while (1) {
                for (i = 0; i < p_dim; i++) {
                        /*
                         *          d Chi
                         *  b   ==  -----
                         *   k       d p
                         *              k
                         */
                        result = chi_derivative( f, &dchi, xvals, par, i, yvals, sigmas);
                        if (result != REG_ok)
                                return result;
                        
                        b[i] = - dchi;
                }
                
                result = coefficient_matrix(A, f, xvals, par, yvals, sigmas, x_dim, p_dim, r);
                if (result != REG_ok)
                        return result;
                
                result = linear_solve(A, b, p_dim, dpar);
                if (result != REG_ok)
                        return result;
                
                for(i = 0; i < p_dim; i++)
                        tmp_par[i] = par[i] + dpar[i];
        
                result = chi_squared( f, xvals, par, yvals, sigmas, x_dim, &chi_pos);
                if (result != REG_ok)
                        return result;
                        
#ifdef DEBUG
                printf("Chi Squared : %lf",chi_pre);
                printf("Chi Squared : &lf",chi_pos);
                printf("r  :  %lf",r);
#endif
                
                if (chi_pos <= chi_pre) { /* There is improvement */
                        r /= 10;
                        par = tmp_par;
                        
                        if (abs(chi_pos - chi_pre) < DELTA)
                                break;
                        
                        chi_pre = chi_pos;
                } else {
                        r *= 10;
                }
        }
        
        result = parameter_errors(f, xvals, par, yvals, sigmas, x_dim, p_dim, errors);
        if (result != REG_ok)
                return result;

        *chi = chi_pos;
        
        FREE_MATRIX(A, p_dim, p_dim);
        g_free(dpar);
        g_free(tmp_par);
        g_free(b);
        
        return REG_ok;
}



--
Prof. Dr. Andreas J. Guelzow
http://www.math.concordia.ab.ca/aguelzow




[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]