*** fn-stat.c	Tue Sep 10 13:50:42 2002
--- new_fn-stat.c	Tue Sep 10 13:50:31 2002
***************
*** 3819,3822 ****
--- 3819,4191 ----
  /***************************************************************************/
  
+ static char *help_logreg = {
+ 	N_("@FUNCTION=LOGREG\n"
+ 	   "@SYNTAX=LOGREG(known_y's[,known_x's[,const[,stat]]])\n"
+ 
+ 	   "@DESCRIPTION="
+ 	   "The LOGREG function transforms your x's to z=log(x) and "
+ 	   "applies the ``least squares'' method to fit the linear equation\n"
+            "y = m * z + b \n"
+ 	   "to your y's and z's --- equivalent to fitting the equation\n"
+ 	   "y = m * log(x) + b \n"
+ 	   "to y's and x's. \n"
+ 	   "\n"
+ 	   "If @known_x's is omitted, an array {1, 2, 3, ...} is used. "
+            "LOGREG returns an array having two columns and one row. "
+            "m is given in the first column and b in the second. \n"
+ 	   "\n"
+ 	   "If @known_y's and @known_x's have unequal number of data points, "
+ 	   "LOGREG returns #NUM! error."
+ 	   "\n"
+ 	   "If @const is FALSE, the line will be forced to go through [1; 0], "
+ 	   "i.e., b will be zero. The default is TRUE."
+ 	   "\n"
+ 	   "If @stat is TRUE, extra statistical information will be returned "
+ 	   "which applies to the state AFTER transformation to z, "
+ 	   "assuming that the z = log(x) are normally distributed. "
+ 	   "Extra statistical information is written below m and b in the "
+ 	   "result array.  Extra statistical information consists of four "
+ 	   "rows of data.  In the first row the standard error values for the "
+ 	   "coefficients m, b are represented.  The second row "
+ 	   "contains the square of R and the standard error for the y "
+ 	   "estimate. The third row contains the F-observed value and the "
+ 	   "degrees of freedom.  The last row contains the regression sum "
+ 	   "of squares and the residual sum of squares."
+ 	   "\n"
+ 	   "The default of @stat is FALSE.\n"
+ 	   "@EXAMPLES=\n"
+ 	   "\n"
+ 	   "@SEEALSO=LOGFIT,LINEST,LOGEST")
+ };
+ 
+ /* The following is a copy of "gnumeric_linest" of Gnumeric version 1.0.9 
+  * with "linear_regression" replaced by "logarithmic_regression".
+  *
+  * In Excel, this functionality is not available as a function, but only
+  * as a "trend curve" within graphs.
+  *
+  * The function "logarithmic_regression" transforms x's logarithmically
+  * before calling "general_linear_regression" written by others.
+  * 
+  * I do not know if in statistical praxis logarithmically transformed x-data
+  * is useful for *multidimensional* regression, and also if extra statistical
+  * data is useful in this case, but since "general_linear_regression" written
+  * by others provides it I have passed this functionality to the user.
+  * But see comment to "gnumeric_linest" for problem with reading more than
+  * one x-range.
+  */
+ 
+ static Value *
+ gnumeric_logreg (FunctionEvalInfo *ei, Value *argv[])
+ {
+ 	gnum_float        **xss = NULL, *ys = NULL;
+ 	Value             *result = NULL;
+ 	int               nx, ny, dim, i;
+ 	int               xarg = 0;
+ 	gnum_float        *logreg_res = NULL;
+ 	gboolean          affine, stat, err;
+ 	enum {
+ 		ARRAY      = 1,
+ 		SINGLE_COL = 2,
+ 		SINGLE_ROW = 3,
+ 		OTHER      = 4
+ 	}                 ytype;
+ 	regression_stat_t extra_stat;
+ 
+ 	memset (&extra_stat, 0, sizeof (extra_stat));
+ 	dim = 0;
+ 
+ 	if (argv[0] == NULL || (argv[0]->type != VALUE_ARRAY && argv[0]->type != VALUE_CELLRANGE)){
+ 	        goto out; /* Not a valid input for ys */
+ 	}
+ 
+ 	if (argv[0]->type == VALUE_ARRAY)
+ 		ytype = ARRAY;
+ 	else if (argv[0]->v_range.cell.a.col == argv[0]->v_range.cell.b.col)
+ 		ytype = SINGLE_COL;
+ 	else if (argv[0]->v_range.cell.a.row == argv[0]->v_range.cell.b.row)
+ 		ytype = SINGLE_ROW;
+ 	else ytype = OTHER;
+ 
+ 	if (argv[0]->type == VALUE_CELLRANGE)
+ 		ys = collect_floats_value (argv[0], ei->pos,
+ 					   COLLECT_IGNORE_STRINGS |
+ 					   COLLECT_IGNORE_BOOLS,
+ 					   &ny, &result);
+ 	else if (argv[0]->type == VALUE_ARRAY){
+ 	  /*
+ 	   * Get ys from array argument argv[0]
+ 	   */
+ 	}
+ 
+ 	if (result)
+ 		goto out;
+ 
+ 	/* TODO Better error-checking in next statement */
+ 
+ 	if (argv[1] == NULL || (ytype == ARRAY && argv[1]->type != VALUE_ARRAY) ||
+ 	    (ytype != ARRAY && argv[1]->type != VALUE_CELLRANGE)){
+ 		dim = 1;
+ 		xss = g_new (gnum_float *, 1);
+ 	        xss[0] = g_new (gnum_float, ny);
+ 	        for (nx = 0; nx < ny; nx++)
+ 		        xss[0][nx] = nx + 1;
+ 	}
+ 	else if (ytype == ARRAY){
+ 			xarg = 1;
+ 			/* Get xss from array argument argv[1] */
+ 	}
+ 	else if (ytype == SINGLE_COL){
+ 		int firstcol, lastcol;
+ 		Value *copy;
+ 		xarg = 1;
+ 		firstcol = argv[1]->v_range.cell.a.col;
+ 		lastcol  = argv[1]->v_range.cell.b.col;
+ 
+ 		if (firstcol < lastcol) {
+ 			int tmp = firstcol;
+ 			firstcol = lastcol;
+ 			lastcol = tmp;
+ 		}
+ 
+ 		dim = lastcol - firstcol + 1;
+ 		copy = value_duplicate (argv[1]);
+ 		xss = g_new (gnum_float *, dim);
+ 		for (i = firstcol; i <= lastcol; i++){
+ 			copy->v_range.cell.a.col = i;
+ 			copy->v_range.cell.b.col = i;
+ 			xss[i - firstcol] = collect_floats_value (copy, ei->pos,
+ 						       COLLECT_IGNORE_STRINGS |
+ 						       COLLECT_IGNORE_BOOLS,
+ 						       &nx, &result);
+ 			if (result){
+ 				g_free (copy);
+ 				dim = i - firstcol; /*How many got allocated before failure*/
+ 				goto out;
+ 			}
+ 			if (nx != ny){
+ 				g_free (copy);
+ 				dim = i - firstcol + 1;
+ 				result = value_new_error (ei->pos, gnumeric_err_NUM);
+ 				goto out;
+ 			}
+ 		}
+ 		g_free (copy);
+ 	}
+ 	else if (ytype == SINGLE_ROW){
+ 		int firstrow, lastrow;
+ 		Value *copy;
+ 		xarg = 1;
+ 		firstrow = argv[1]->v_range.cell.a.row;
+ 		lastrow  = argv[1]->v_range.cell.b.row;
+ 
+ 		if (firstrow < lastrow) {
+ 			int tmp = firstrow;
+ 			firstrow = lastrow;
+ 			lastrow = tmp;
+ 		}
+ 
+ 		dim = lastrow - firstrow + 1;
+ 		copy = value_duplicate (argv[1]);
+ 		xss = g_new (gnum_float *, dim);
+ 		for (i = firstrow; i <= lastrow; i++){
+ 			copy->v_range.cell.a.row = i;
+ 			copy->v_range.cell.b.row = i;
+ 			xss[i - firstrow] = collect_floats_value (copy, ei->pos,
+ 						       COLLECT_IGNORE_STRINGS |
+ 						       COLLECT_IGNORE_BOOLS,
+ 						       &nx, &result);
+ 			if (result){
+ 				g_free (copy);
+ 				dim = i - firstrow; /*How many got allocated before failure*/
+ 				goto out;
+ 			}
+ 			if (nx != ny){
+ 					g_free (copy);
+ 					dim = i - firstrow + 1;
+ 					result = value_new_error (ei->pos, gnumeric_err_NUM);
+ 					goto out;
+ 			}
+ 		}
+ 		g_free (copy);
+ 	}
+ 	else { /*Y is none of the above */
+ 		xarg = 1;
+ 		dim = 1;
+ 		xss = g_new (gnum_float *, dim);
+ 		xss[0] = collect_floats_value (argv[1], ei->pos,
+ 					       COLLECT_IGNORE_STRINGS |
+ 					       COLLECT_IGNORE_BOOLS,
+ 					       &nx, &result);
+ 		if (result){
+ 			dim = 0;
+ 			goto out;
+ 		}
+ 		if (nx != ny){
+ 			dim = 1;
+ 			result = value_new_error (ei->pos, gnumeric_err_NUM);
+ 			goto out;
+ 		}
+ 	}
+ 
+ 	if (argv[1 + xarg]) {
+ 		affine = value_get_as_bool (argv[1 + xarg], &err);
+ 		if (err) {
+ 			result = value_new_error (ei->pos, gnumeric_err_VALUE);
+ 			goto out;
+ 		}
+ 	} else
+ 		affine = TRUE;
+ 
+ 	if (argv[2 + xarg]) {
+ 		stat = value_get_as_bool (argv[2 + xarg], &err);
+ 		if (err) {
+ 			result = value_new_error (ei->pos,
+ 						  gnumeric_err_VALUE);
+ 			goto out;
+ 		}
+ 	} else
+ 		stat = FALSE;
+ 
+ 	logreg_res = g_new (gnum_float, dim + 1);
+ 
+ 	if (logarithmic_regression (xss, dim, ys, nx, affine,
+ 			       logreg_res, &extra_stat)) {
+ 		result = value_new_error (ei->pos, gnumeric_err_NUM);
+ 		goto out;
+ 	}
+ 
+ 	if (stat) {
+ 		result = value_new_array (dim + 1, 5);
+ 
+ 		value_array_set (result, 0, 2,
+ 				 value_new_float (extra_stat.sqr_r));
+ 		value_array_set (result, 1, 2,
+ 				 value_new_float (sqrt (extra_stat.var)));
+ 		value_array_set (result, 0, 3,
+ 				 value_new_float (extra_stat.F));
+ 		value_array_set (result, 1, 3,
+ 				 value_new_float (extra_stat.df));
+ 		value_array_set (result, 0, 4,
+ 				 value_new_float (extra_stat.ss_reg));
+ 		value_array_set (result, 1, 4,
+ 				 value_new_float (extra_stat.ss_resid));
+ 		for (i = 0; i < dim; i++)
+ 			value_array_set (result, dim - i - 1, 1,
+ 					 value_new_float (extra_stat.se[i+affine]));
+ 		value_array_set (result, dim, 1,
+ 				 value_new_float (extra_stat.se[0]));
+ 	} else
+ 		result = value_new_array (dim + 1, 1);
+ 
+ 	value_array_set (result, dim, 0, value_new_float (logreg_res[0]));
+ 	for (i = 0; i < dim; i++)
+ 		value_array_set (result, dim - i - 1, 0, value_new_float (logreg_res[i + 1]));
+ 
+  out:
+ 	for (i = 0; i < dim; i++)
+ 		g_free (xss[i]);
+ 	g_free (xss);
+ 	g_free (ys);
+ 	g_free (logreg_res);
+ 	g_free (extra_stat.se);
+ 	g_free (extra_stat.xbar);
+ 	g_free (extra_stat.t);
+ 	return result;
+ }
+ 
+ /***************************************************************************/
+ 
+ static char *help_logfit = {
+ 	N_("@FUNCTION=LOGFIT\n"
+ 	   "@SYNTAX=LOGFIT(known_y's,known_x's)\n"
+ 
+ 	   "@DESCRIPTION="
+ 	   "The LOGFIT function applies the ``least squares'' method to fit "
+ 	   "the logarithmic equation\n"
+ 	   "y = a + b * log(sign * (x - c)) ,   sign = +1 or -1 \n"
+ 	   "to your data. The graph of the equation is a logarithmic curve "
+ 	   "moved horizontally by -c and possibly mirrored across the y-axis "
+ 	   "(if sign = -1).\n"
+ 	   "\n"
+ 	   "LOGFIT returns an array having five columns and one row. "
+ 	   "`Sign' is given in the first column, `a', `b', and `c' are "
+ 	   "given in columns 2 to 4. Column 5 holds the sum of squared "
+ 	   "residuals.\n"
+ 	   "\n"
+ 	   "An error is returned when there are less than 3 different x's "
+ 	   "or y's, or when the shape of the point cloud is to different "
+ 	   "from a ``logarithmic'' one.\n"
+ 	   "\n"
+ 	   "You can use the above formula \n"
+ 	   "= a + b * log(sign * (x - c)) \n"
+ 	   "or rearrange it to \n"
+ 	   "= (exp((y - a) / b)) / sign + c \n"
+ 	   "to compute unknown y's or x's, respectively. \n"
+ 	   "\n"
+ 	   "Technically, this is non-linear fitting by trial-and-error. "
+ 	   "The accuracy of `c' is: width of x-range -> rounded to the "
+ 	   "next smaller (10^integer), times 0.000005. There might be cases "
+ 	   "in which the returned fit is not the best possible.\n"
+            "@EXAMPLES=\n"
+ 	   "\n"
+ 	   "@SEEALSO=LOGREG,LINEST,LOGEST")
+ };
+ 
+ /* This function is not available in Excel.
+  * It is intended for calculation of unknowns from a calibration curve.
+  * It adapts well to some types of scientific data.
+  * It does not do multidimensional regression or extra statistics.
+  *
+  * One could do this kind of non-linear fitting with a general solver, too,
+  * but the success depends on the choosing of suitable starting values.
+  * Also, determination of `sign' would be complicated.
+  */
+ static Value *
+ gnumeric_logfit (FunctionEvalInfo *ei, Value *argv[])
+ {
+         gnum_float         *xs = NULL, *ys = NULL;
+ 	Value              *result = NULL;
+ 	int                nx, ny, i;
+ 	gnum_float         *logfit_res = NULL;
+ 
+         if (argv[0] == NULL || argv[0]->type != VALUE_CELLRANGE)
+ 	        goto out;
+ 	ys = collect_floats_value (argv[0], ei->pos,
+ 				   COLLECT_IGNORE_BLANKS, &ny, &result);
+ 	if (result)
+ 	        goto out;
+ 	if (argv[1] == NULL || argv[1]->type != VALUE_CELLRANGE)
+ 	        goto out;
+ 	xs = collect_floats_value (argv[1], ei->pos,
+ 				   COLLECT_IGNORE_BLANKS, &nx, &result);
+ 	if (result)
+ 	        goto out;
+ 	if (nx != ny || nx < 3) {
+ 	        result = value_new_error (ei->pos, gnumeric_err_VALUE);
+ 		goto out;
+ 	}
+ 
+ 	logfit_res = g_new (gnum_float, 5);
+ 
+ 	if (logarithmic_fit (xs, ys, nx, logfit_res)) {
+ 	        result = value_new_error (ei->pos, gnumeric_err_NUM);
+ 		goto out;
+ 	}
+ 
+ 	result = value_new_array (5, 1);
+ 	for (i=0; i<5; i++)
+ 	        value_array_set (result, i, 0,
+ 				 value_new_float (logfit_res[i]));
+ 
+  out:
+ 	g_free (xs);
+ 	g_free (ys);
+ 	g_free (logfit_res);
+ 	return result;
+ }
+ 
+ /***************************************************************************/
+ 
  static char *help_trend = {
  	N_("@FUNCTION=TREND\n"
***************
*** 4592,4595 ****
--- 4961,4972 ----
  				  "known_y's[,known_x's,const,stat]",
  				  &help_linest, gnumeric_linest);
+ 
+ 	def = function_add_args  (cat, "logreg",   "A|Abb",
+ 				  "known_y's[,known_x's,const,stat]",
+ 				  &help_logreg, gnumeric_logreg);
+ 
+ 	def = function_add_args  (cat, "logfit", "rr",
+ 				  "known_y's,known_x's",
+ 				  &help_logfit, gnumeric_logfit);
  
  	def = function_add_args  (cat, "logest",  "A|Abb",