/* Non-linear Regression * * Daniel Carrera (dcarrera math toronto edu) * * Suggested additions to regression.h and regression.c * Version 0.0.0 * * TODO: * ----- * There is a lot left to do. This file should be taken as the rough draft * that it is. Some of the things left to do include: * * - I have only managed to test the derivative() function. * EVERYTHING HERE NEEDS TESTING. * * - The function non_linear_regression() only performs one iteration. * We still need a function to call it until we reach the desired * accuracy. * * - I tried to follow the Gnumeric standards, but there are probably many * things that I could have done better. * * - We need to get Gnumeric to use this for something useful. */ /* ---------------------------- regression.h ------------------------------- */ #define DELTA 1e-6 typedef enum { REGRESSION_OK, REGRESSION_ERROR } RegressionStatus; /* x == independent variables. * par == parameters. * * Ex: f(x,y) = a*sin(b*x + c*y + d) + e * x == (x,y) * par == (a,b,c,d,e) * * Ex: status = f(x, par, &y); */ typedef RegressionStatus (*RegressionFunction) (gnum_float * x, gnum_float * params, gnum_float *f); /* ---------------------------- regression.c ------------------------------ */ /* Usage: * status = derivative(func, &df, x, par, index) * * Approximate the partial derivative of a given function, at the * point 'params', with respect to the parameter indicated by 'index'. * * See the 'STANDALONE' section for an example. */ NonLinearFitStatus derivative( NonLinearFitStatus (*f) (gnum_float *, gnum_float *, gnum_float *), gnum_float *df, gnum_float * x, gnum_float * par, gint index ) { gnum_float y1,y2; RegressionStatus status; par[index] -= DELTA; status = (*f)(x,par, &y1); if (status != NON_LINEAR_FIT_OK) return status; params[index] += 2*DELTA; status = (*f)(x,par, &y2); if (status != NON_LINEAR_FIT_OK) return status; #ifdef DEBUG printf("y1 = %lf\n",y1); printf("y2 = %lf\n",y2); printf("DELTA = %lf\n",DELTA); #endif *df = (y2 - y1) / (2*DELTA); return REGRESSION_OK; } /* USAGE: * status = non_linear_regression(xs,y,m,f,par,p) * * */ RegressionStatus non_linear_regression( const gnum_float **xs, /* x-vectors (independent data) */ const gnum_float *y, /* y-vector (dependent data) */ const int m, /* Number of data points. */ RegressionStatus /* */ (*f) /* Function to fit. */ (gnum_float *, gnum_float *, gnum_float *), /* */ gnum_float *par, /* Initial guess for the parameters. */ /* Will be replaced by their final values. */ const int p, /* Number of parameters. */ gnum_float *sum_res /* Sum of the squares of the residuals. */ ) { int i,j; gnum_float df, ds; RegressionStatus status; /* Original system: A*ds = b * Final system: (ATA)*ds = AT*b * * Where s == parameters */ gnum_float **A, **ATA; /* AT == A transpose. ATA == AT*A */ gnum_float *ATb, *b; gnum_float tmp; RegressionStatus status; /* Calculate A using the derivative() function. */ ALLOC_MATRIX (A, m, p); for (i =0; i < m; i++) for (j=0; j < p; l++) { /* df * A = --- (x ; s) * ij ds i * j * */ status = derivative(f,&tmp,xs[i],par,j); if (status != REGRESSION_OK) return status; A[i][j] = tmp; } /* Calculate b -- having it before hand saves computations * b == y - f(x,par) * */ b = g_new(gnum_float, m); sum_res = 0; for (i = 0; i < m; i++ ) { status = (*f)(xs[i], par, &tmp); if (status != REGRESSION_OK) return status; b[i] = y[i] - tmp; *sum_res += b[i]*b[i]; } /* Find ATb */ ATb = g_new (gnum_float, p); for (j = 0; j < p; j++) { register gnum_float dot_product = 0; /* Compute the dot_product of the jth col of A (jth row of AT) * and b. * */ for (i = 0; i < m; i++) dot_product += A[i][j] * b[i]; ATb[j] = dot_product; } /* Find ATA */ ALLOC_MATRIX (ATA, p, p); for (j = 0; j < p; j++) { register gnum_float dot_product; for (i = 0; i <= j; i++) { int k; dot_product = 0; /* Compute the dot_product of the ith row of AT * (ith col of A) and the jth col of A * */ for (k = 0; k < m; k++) dot_product += A[k][i]*A[k][j]; ATA[i][j] = ATA[j][i] = dot_product; } } /* Find ds */ ds = g_new (gnum_float, p); status = linear_solve(ATA, ATb, p, ds); /* FIXME linear_solve() does not * return a RegressionStatus */ if (status != 0) return REGRESSION_ERROR; /* Update the parameters */ for (i = 0; i < p; i++) par[i] += ds[i]; return REGRESSION_OK; } /* * EXAMPLE * This function implements 'a * sin(a*x + b*y + c ) + e'. * RegressionStatus f (gnum_float * X, gnum_float * s, gnum_float *y) { gnum_float x = X[0]; gnum_float y = X[1]; gnum_float a = s[0]; gnum_float b = s[1]; gnum_float c = s[2]; gnum_float d = s[3]; gnum_float e = s[4]; *y = a * sin(a*x + b*y + c ) + e; return NON_LINEAR_FIT_OK; } */