[gcalctool/gcalctool-new-parser] First work on replacing old parser

From: Robert Ancell <rancell src gnome org>
To: commits-list gnome org
Cc:
Subject: [gcalctool/gcalctool-new-parser] First work on replacing old parser
Date: Mon, 21 Mar 2011 04:26:02 +0000 (UTC)
commit d6dcf6ccc659c4220f1011383459fb38008c467e
Author: Robert Ancell <robert ancell canonical com>
Date:   Mon Mar 21 15:25:46 2011 +1100

    First work on replacing old parser

 configure.ac              |   19 -
 src/Makefile.am           |   33 +--
 src/mp-equation-lexer.l   |  111 -----
 src/mp-equation-parser.y  |  258 ----------
 src/mp-equation-private.h |   56 ---
 src/mp-equation.c         | 1174 ++++++++++++++++++++++++++++++++++++++++-----
 6 files changed, 1050 insertions(+), 601 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index 7162b92..a4a34b4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -59,25 +59,6 @@ AC_SUBST(GLIB_MKENUMS)
 AC_CHECK_LIB(m, log)
 
 dnl ###########################################################################
-dnl Determine if a usable lex is available on this system
-dnl ###########################################################################
-
-AM_PROG_LEX
-if [[ "$LEX" != "flex" ]]; then
-	AC_MSG_ERROR(flex is required to create the gcalctool scanners)
-fi
-
-dnl ###########################################################################
-dnl Determine if a usable yacc is available on this system
-dnl ###########################################################################
-
-AC_PROG_YACC
-AC_CHECK_PROG(HAVE_YACC, $YACC, yes, no)
-if [[ "$HAVE_YACC" = "no" ]]; then
-	AC_MSG_ERROR($YACC is not usable as yacc - consider using bison)
-fi
-
-dnl ###########################################################################
 dnl Internationalization
 dnl ###########################################################################
 
diff --git a/src/Makefile.am b/src/Makefile.am
index 3a23f65..e958931 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -41,12 +41,6 @@ gcalctool_SOURCES = \
 	mp-enums.h \
 	mp-equation.c \
 	mp-equation.h \
-	mp-equation-private.h \
-	mp-equation-lexer.c \
-	mp-equation-lexer.h \
-	mp-equation-parser.c \
-	mp-equation-parser.h \
-	mp-private.h \
 	mp-serializer.c \
 	mp-serializer.h \
 	mp-trigonometric.c \
@@ -60,7 +54,7 @@ gcalctool_SOURCES = \
 	unit-manager.h
 
 gcalctool_LDADD = \
-	$(GCALCTOOL_LIBS)        
+	$(GCALCTOOL_LIBS)
 
 gcalccmd_SOURCES = \
 	gcalccmd.c \
@@ -74,8 +68,6 @@ gcalccmd_SOURCES = \
 	mp-enums.c \
 	mp-enums.h \
 	mp-equation.c \
-	mp-equation-parser.c \
-	mp-equation-lexer.c \
 	mp-serializer.c \
 	mp-serializer.h\
 	mp-trigonometric.c \
@@ -117,8 +109,6 @@ test_mp_equation_SOURCES = \
 	mp-enums.c \
 	mp-enums.h \
 	mp-equation.c \
-	mp-equation-parser.c \
-	mp-equation-lexer.c \
 	mp-serializer.c \
 	mp-serializer.h \
 	mp-trigonometric.c \
@@ -135,24 +125,7 @@ test_mp_equation_LDADD = \
 
 CLEANFILES = \
 	mp-enums.c \
-	mp-enums.h \
-	mp-equation-parser.h \
-	mp-equation-parser.c \
-	mp-equation-lexer.c \
-	mp-equation-lexer.h
-
-# Generate parser files
-mp-equation-parser.c mp-equation-parser.h: mp-equation-parser.y mp-equation-lexer.h
-	$(AM_V_GEN)$(YACC) -d -o mp-equation-parser.c $(srcdir)/mp-equation-parser.y
-
-# Generate lexer files
-mp-equation-lexer.c mp-equation-lexer.h: mp-equation-lexer.l
-	$(AM_V_GEN)$(LEX) $(srcdir)/mp-equation-lexer.l
-
-# Rebuild parser when source files change
-mp-equation-parser.o: mp-equation-lexer.h
-mp-equation-lexer.o: mp-equation-parser.h
-mp-equation.c: mp-equation-lexer.h mp-equation-parser.h
+	mp-enums.h
 
 # Generate enum types
 mp-enums.h: mp-enums.h.template mp-serializer.h
@@ -176,8 +149,6 @@ uninstall-local:
 	&& rm -f "$(DESTDIR)$(bindir)/gnome-calculator"
 
 EXTRA_DIST = \
-	mp-equation-parser.y \
-	mp-equation-lexer.l \
 	mp-enums.c.template \
 	mp-enums.h.template
 
diff --git a/src/mp-equation.c b/src/mp-equation.c
index dc1eff7..48cb0a3 100644
--- a/src/mp-equation.c
+++ b/src/mp-equation.c
@@ -9,29 +9,26 @@
  * license.
  */
 
+#include <stdlib.h>
 #include <ctype.h>
+#include <string.h>
 
-#include "mp-equation-private.h"
-#include "mp-equation-parser.h"
-#include "mp-equation-lexer.h"
-
-extern int _mp_equation_parse(yyscan_t yyscanner);
-
+#include "mp-equation.h"
 
 static int
-variable_is_defined(MPEquationParserState *state, const char *name)
+variable_is_defined(MPEquationOptions *options, const char *name)
 {
     /* FIXME: Make more generic */
     if (strcmp(name, "e") == 0 || strcmp(name, "i") == 0 || strcmp(name, "Ï?") == 0)
         return 1;
-    if (state->options->variable_is_defined)
-        return state->options->variable_is_defined(name, state->options->callback_data);
+    if (options->variable_is_defined)
+        return options->variable_is_defined(name, options->callback_data);
     return 0;
 }
 
 
 static int
-get_variable(MPEquationParserState *state, const char *name, MPNumber *z)
+get_variable(MPEquationOptions *options, const char *name, MPNumber *z)
 {
     int result = 1;
 
@@ -41,73 +38,29 @@ get_variable(MPEquationParserState *state, const char *name, MPNumber *z)
         mp_get_i(z);
     else if (strcmp(name, "Ï?") == 0)
         mp_get_pi(z);
-    else if (state->options->get_variable)
-        result = state->options->get_variable(name, z, state->options->callback_data);
+    else if (options->get_variable)
+        result = options->get_variable(name, z, options->callback_data);
     else
         result = 0;
 
     return result;
 }
 
+
 static void
-set_variable(MPEquationParserState *state, const char *name, const MPNumber *x)
+set_variable(MPEquationOptions *options, const char *name, const MPNumber *x)
 {
     // Reserved words, e, Ï?, mod, and, or, xor, not, abs, log, ln, sqrt, int, frac, sin, cos, ...
     if (strcmp(name, "e") == 0 || strcmp(name, "i") == 0 || strcmp(name, "Ï?") == 0)
         return; // FALSE
 
-    if (state->options->set_variable)
-        state->options->set_variable(name, x, state->options->callback_data);
-}
-
-// FIXME: Accept "2sin" not "2 sin", i.e. let the tokenizer collect the multiple
-// Parser then distinguishes between "sin"="s*i*n" or "sin5" = "sin 5" = "sin(5)"
-// i.e. numbers+letters = variable or function depending on following arg
-// letters+numbers = numbers+letters+numbers = function
-
-
-int
-sub_atoi(const char *data)
-{
-    int i, value = 0;
-    const char *digits[] = {"â??", "â??", "â??", "â??", "â??", "â??", "â??", "â??", "â??", "â??", NULL};
-
-    do {
-        for(i = 0; digits[i] != NULL && strncmp(data, digits[i], strlen(digits[i])) != 0; i++);
-        if(digits[i] == NULL)
-            return -1;
-        data += strlen(digits[i]);
-        value = value * 10 + i;
-    } while(*data != '\0');
-
-    return value;
-}
-
-int
-super_atoi(const char *data)
-{
-   int i, sign = 1, value = 0;
-   const char *digits[11] = {"â?°", "Â¹", "Â²", "Â³", "â?´", "â?µ", "â?¶", "â?·", "â?¸", "â?¹", NULL};
-
-   if(strncmp(data, "â?»", strlen("â?»")) == 0) {
-      sign = -1;
-      data += strlen("â?»");
-   }
-
-   do {
-      for(i = 0; digits[i] != NULL && strncmp(data, digits[i], strlen(digits[i])) != 0; i++);
-      if(digits[i] == NULL)
-         return 0;
-      value = value * 10 + i;
-      data += strlen(digits[i]);
-   } while(*data != '\0');
-
-   return sign * value;
+    if (options->set_variable)
+        options->set_variable(name, x, options->callback_data);
 }
 
 
 static int
-function_is_defined(MPEquationParserState *state, const char *name)
+function_is_defined(MPEquationOptions *options, const char *name)
 {
     char *c, *lower_name;
 
@@ -117,7 +70,6 @@ function_is_defined(MPEquationParserState *state, const char *name)
 
     /* FIXME: Make more generic */
     if (strcmp(lower_name, "log") == 0 ||
-        (strncmp(lower_name, "log", 3) == 0 && sub_atoi(lower_name + 3) >= 0) ||
         strcmp(lower_name, "ln") == 0 ||
         strcmp(lower_name, "sqrt") == 0 ||
         strcmp(lower_name, "abs") == 0 ||
@@ -143,14 +95,14 @@ function_is_defined(MPEquationParserState *state, const char *name)
     }
     g_free (lower_name);
 
-    if (state->options->function_is_defined)
-        return state->options->function_is_defined(name, state->options->callback_data);
+    if (options->function_is_defined)
+        return options->function_is_defined(name, options->callback_data);
     return 0;
 }
 
 
 static int
-get_function(MPEquationParserState *state, const char *name, const MPNumber *x, MPNumber *z)
+get_function(MPEquationOptions *options, const char *name, const MPNumber *x, MPNumber *z)
 {
     char *c, *lower_name;
     int result = 1;
@@ -163,15 +115,6 @@ get_function(MPEquationParserState *state, const char *name, const MPNumber *x,
 
     if (strcmp(lower_name, "log") == 0)
         mp_logarithm(10, x, z); // FIXME: Default to ln
-    else if (strncmp(lower_name, "log", 3) == 0) {
-        int base;
-
-        base = sub_atoi(lower_name + 3);
-        if (base < 0)
-            result = 0;
-        else
-            mp_logarithm(base, x, z);
-    }
     else if (strcmp(lower_name, "ln") == 0)
         mp_ln(x, z);
     else if (strcmp(lower_name, "sqrt") == 0) // â??x
@@ -181,7 +124,7 @@ get_function(MPEquationParserState *state, const char *name, const MPNumber *x,
     else if (strcmp(lower_name, "sgn") == 0)
         mp_sgn(x, z);
     else if (strcmp(lower_name, "arg") == 0)
-        mp_arg(x, state->options->angle_units, z);
+        mp_arg(x, options->angle_units, z);
     else if (strcmp(lower_name, "conj") == 0)
         mp_conjugate(x, z);
     else if (strcmp(lower_name, "int") == 0)
@@ -199,17 +142,17 @@ get_function(MPEquationParserState *state, const char *name, const MPNumber *x,
     else if (strcmp(lower_name, "im") == 0)
         mp_imaginary_component(x, z);
     else if (strcmp(lower_name, "sin") == 0)
-        mp_sin(x, state->options->angle_units, z);
+        mp_sin(x, options->angle_units, z);
     else if (strcmp(lower_name, "cos") == 0)
-        mp_cos(x, state->options->angle_units, z);
+        mp_cos(x, options->angle_units, z);
     else if (strcmp(lower_name, "tan") == 0)
-        mp_tan(x, state->options->angle_units, z);
+        mp_tan(x, options->angle_units, z);
     else if (strcmp(lower_name, "sinâ?»Â¹") == 0 || strcmp(lower_name, "asin") == 0)
-        mp_asin(x, state->options->angle_units, z);
+        mp_asin(x, options->angle_units, z);
     else if (strcmp(lower_name, "cosâ?»Â¹") == 0 || strcmp(lower_name, "acos") == 0)
-        mp_acos(x, state->options->angle_units, z);
+        mp_acos(x, options->angle_units, z);
     else if (strcmp(lower_name, "tanâ?»Â¹") == 0 || strcmp(lower_name, "atan") == 0)
-        mp_atan(x, state->options->angle_units, z);
+        mp_atan(x, options->angle_units, z);
     else if (strcmp(lower_name, "sinh") == 0)
         mp_sinh(x, z);
     else if (strcmp(lower_name, "cosh") == 0)
@@ -223,11 +166,11 @@ get_function(MPEquationParserState *state, const char *name, const MPNumber *x,
     else if (strcmp(lower_name, "tanhâ?»Â¹") == 0 || strcmp(lower_name, "atanh") == 0)
         mp_atanh(x, z);
     else if (strcmp(lower_name, "ones") == 0)
-        mp_ones_complement(x, state->options->wordlen, z);
+        mp_ones_complement(x, options->wordlen, z);
     else if (strcmp(lower_name, "twos") == 0)
-        mp_twos_complement(x, state->options->wordlen, z);
-    else if (state->options->get_function)
-        result = state->options->get_function(name, x, z, state->options->callback_data);
+        mp_twos_complement(x, options->wordlen, z);
+    else if (options->get_function)
+        result = options->get_function(name, x, z, options->callback_data);
     else
         result = 0;
 
@@ -238,61 +181,1046 @@ get_function(MPEquationParserState *state, const char *name, const MPNumber *x,
 
 
 static int
-convert(MPEquationParserState *state, const MPNumber *x, const char *x_units, const char *z_units, MPNumber *z)
+convert(MPEquationOptions *options, const MPNumber *x, const char *x_units, const char *z_units, MPNumber *z)
 {
-    if (state->options->convert)
-        return state->options->convert(x, x_units, z_units, z, state->options->callback_data);
+    if (options->convert)
+        return options->convert(x, x_units, z_units, z, options->callback_data);
     else
         return 0;
 }
 
 
-MPErrorCode
-mp_equation_parse(const char *expression, MPEquationOptions *options, MPNumber *result, char **error_token)
+typedef enum
 {
-    int ret;
-    MPEquationParserState state;
-    yyscan_t yyscanner;
-    YY_BUFFER_STATE buffer;
+    TOKEN_NONE,
+    TOKEN_NUMBER,
+    TOKEN_SUPER_NUMBER,
+    TOKEN_SUB_NUMBER,
+    TOKEN_ADD,
+    TOKEN_SUBTRACT,
+    TOKEN_MULTIPLY,
+    TOKEN_DIVIDE,
+    TOKEN_MODULUS_DIVIDE,
+    TOKEN_EXPONENT,
+    TOKEN_ROOT,
+    TOKEN_CUBE_ROOT,
+    TOKEN_FOURTH_ROOT,
+    TOKEN_PERCENTAGE,
+    TOKEN_FACTORIAL,
+    TOKEN_BOOLEAN_AND,
+    TOKEN_BOOLEAN_OR,
+    TOKEN_BOOLEAN_XOR,
+    TOKEN_BOOLEAN_NOT,
+    TOKEN_BOOLEAN_NAND,
+    TOKEN_BOOLEAN_NOR,
+    TOKEN_LEFT_BLOCK,
+    TOKEN_RIGHT_BLOCK,
+    TOKEN_ABS_BLOCK,
+    TOKEN_LEFT_ROUND,
+    TOKEN_RIGHT_ROUND,
+    TOKEN_LEFT_FLOOR,
+    TOKEN_RIGHT_FLOOR,
+    TOKEN_LEFT_CEILING,
+    TOKEN_RIGHT_CEILING,
+    TOKEN_LEFT_FRACTION,
+    TOKEN_RIGHT_FRACTION,
+    TOKEN_VARIABLE,
+    TOKEN_FUNCTION,
+    TOKEN_EXPRESSION
+} TokenType;
 
-    if (!(expression && result) || strlen(expression) == 0)
+
+typedef struct
+{
+    TokenType type;
+    const gchar *start, *end;
+    MPNumber value;
+} Token;
+
+
+static Token *
+token_new(TokenType type, const gchar *start, const gchar *end)
+{
+    Token *token;
+    token = g_malloc(sizeof(Token));
+    token->type = type;
+    token->start = start;
+    token->end = end;
+    mp_set_from_integer(0, &token->value);
+    return token;
+}
+
+
+static gchar *
+token_get_string (Token *token)
+{
+    return g_strdup_printf("%.*s", (int)(token->end - token->start), token->start);
+}
+
+
+static gboolean
+unichar_issubdigit(gunichar c)
+{
+    return c >= 0x2080 && c <= 0x2089;
+}
+
+
+static gint
+unichar_subdigit_value(gunichar c)
+{
+    return c - 0x2080;
+}
+
+
+static gboolean
+unichar_issuperdigit(gunichar c)
+{
+    return c == 0x2070 || c == 0x00B9 || c == 0x00B2 || c == 0x00B3 || (c >= 0x2074 && c <= 0x2079);
+}
+
+
+static gboolean
+unichar_isfraction(gunichar c)
+{
+    return c == 0x00BC /* Â¼ */ || c == 0x00BD /* Â½ */ || c == 0x00BE /* Â¾ */;
+}
+
+
+static gint
+unichar_superdigit_value(gunichar c)
+{
+    if (c == 0x00B9)
+        return 1;
+    else if (c == 0x00B2)
+        return 2;
+    else if (c == 0x00B3)
+        return 3;
+    else
+       return c - 0x2070;
+}
+
+
+static int
+peek_base(MPEquationOptions *options, const gchar *number)
+{
+    const gchar *i;
+
+    for (i = number; *i; i = g_utf8_next_char(i)) {
+        gunichar c = g_utf8_get_char(i);
+
+        if (unichar_issubdigit(c)) {
+            int base = 0;
+            do
+            {
+                base = base * 10 + unichar_subdigit_value(c);
+                i = g_utf8_next_char(i);
+                c = g_utf8_get_char(i);
+            } while (unichar_issubdigit(c));
+            return base;
+        }
+
+        if (!g_unichar_isxdigit(c) || c != '.')
+            break;
+    }
+  
+    return options->base;    
+}
+
+
+static MPErrorCode
+parse(MPEquationOptions *options, const gchar *expression, GList **tokens)
+{
+    TokenType current_token = TOKEN_NONE;
+    const gchar *i, *token_start = NULL;
+    int number_base = 0;
+
+    *tokens = NULL;
+    i = expression;
+    while (TRUE) {
+        gboolean refeed = FALSE;
+        gunichar c = g_utf8_get_char(i);
+        //g_debug ("%d '%c'", current_token, c);
+
+        switch (current_token)
+        {
+        case TOKEN_NONE:
+            token_start = i;
+            /* FIXME, check ahead for explicit base */
+            if (g_unichar_isxdigit(c) || c == '.' || unichar_isfraction(c))
+            {
+                number_base = peek_base(options, i);
+                g_debug("base=%d", number_base);
+                if (g_unichar_xdigit_value(c) < number_base)
+                    current_token = TOKEN_NUMBER;
+            }
+
+            if (unichar_issuperdigit(c) || c == 0x207B /* â?» */)
+                current_token = TOKEN_SUPER_NUMBER;
+            else if (unichar_issubdigit(c))
+                current_token = TOKEN_SUB_NUMBER;
+            else if (g_unichar_isalpha(c))
+                current_token = TOKEN_VARIABLE;
+            else if (c == '+')
+                *tokens = g_list_append(*tokens, token_new(TOKEN_ADD, i, i));
+            else if (c == '-' || c == 0x2212 /* â?? */ )
+                *tokens = g_list_append(*tokens, token_new(TOKEN_SUBTRACT, i, i));
+            else if (c == '*' || c == 0x00D7 /* Ã? */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_MULTIPLY, i, i));
+            else if (c == '/' || c == 0x2215 /* â?? */ || c == 0x00F7 /* Ã· */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_DIVIDE, i, i));
+            else if (c == '^')
+                *tokens = g_list_append(*tokens, token_new(TOKEN_EXPONENT, i, i));
+            else if (c == 0x221A /* â?? */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_ROOT, i, i));
+            else if (c == 0x221B /* â?? */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_CUBE_ROOT, i, i));
+            else if (c == 0x221C /* â?? */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_FOURTH_ROOT, i, i));
+            else if (c == '%')
+                *tokens = g_list_append(*tokens, token_new(TOKEN_PERCENTAGE, i, i));
+            else if (c == '!')
+                *tokens = g_list_append(*tokens, token_new(TOKEN_FACTORIAL, i, i));
+            else if (c == 0x2227 /* â?§ */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_BOOLEAN_AND, i, i));
+            else if (c == 0x2228 /* â?¨ */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_BOOLEAN_OR, i, i));
+            else if (c == 0x22BB /* â?» */ || c == 0x2295 /* â?? */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_BOOLEAN_XOR, i, i));
+            else if (c == '~')
+                *tokens = g_list_append(*tokens, token_new(TOKEN_BOOLEAN_NOT, i, i));
+            else if (c == 0x22BC /* â?¼ */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_BOOLEAN_NAND, i, i));
+            else if (c == 0x22BD /* â?½ */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_BOOLEAN_NOR, i, i));
+            else if (c == '(')
+                *tokens = g_list_append(*tokens, token_new(TOKEN_LEFT_BLOCK, i, i));
+            else if (c == ')')
+                *tokens = g_list_append(*tokens, token_new(TOKEN_RIGHT_BLOCK, i, i));
+            else if (c == '|')
+                *tokens = g_list_append(*tokens, token_new(TOKEN_ABS_BLOCK, i, i));
+            else if (c == '[')
+                *tokens = g_list_append(*tokens, token_new(TOKEN_LEFT_ROUND, i, i));
+            else if (c == ']')
+                *tokens = g_list_append(*tokens, token_new(TOKEN_RIGHT_ROUND, i, i));
+            else if (c == 0x2308 /* â?? */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_LEFT_CEILING, i, i));
+            else if (c == 0x2309 /* â?? */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_RIGHT_CEILING, i, i));
+            else if (c == 0x230A /* â?? */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_LEFT_FLOOR, i, i));
+            else if (c == 0x230B /* â?? */)
+                *tokens = g_list_append(*tokens, token_new(TOKEN_RIGHT_FLOOR, i, i));
+            else if (c == '{')
+                *tokens = g_list_append(*tokens, token_new(TOKEN_LEFT_FRACTION, i, i));
+            else if (c == '}')
+                *tokens = g_list_append(*tokens, token_new(TOKEN_RIGHT_FRACTION, i, i));
+            else if (c == ' ')
+                ;
+            else if (c == '\0')
+                return PARSER_ERR_NONE;
+
+            if (current_token == TOKEN_NONE)
+            {
+                g_debug ("unknown 0x%04X", c);
+                return PARSER_ERR_INVALID;
+            }
+            break;
+
+        case TOKEN_NUMBER:
+            if (c == '.') {
+            }
+            else if (c == 0x00B0 /* Â° */ ) {
+            }
+            else if (c == '\'') {
+            }
+            else if (c == '"') {
+            }
+            else if ((g_unichar_isxdigit(c) && (g_unichar_xdigit_value(c) < number_base)) || unichar_issubdigit(c) || unichar_isfraction(c)) {
+            }
+            else {
+                Token *t;
+                gchar *string;
+
+                current_token = TOKEN_NONE;
+                t = token_new(TOKEN_NUMBER, token_start, i);
+                string = token_get_string(t);
+                if (!mp_set_from_string(string, options->base, &t->value))
+                    *tokens = g_list_append(*tokens, t);
+                else if (g_unichar_isalpha(c)) {
+                    /* Try as a variable instead */
+                    current_token = TOKEN_VARIABLE;
+                }
+                else {
+                    return PARSER_ERR_INVALID;
+                }
+
+                g_free(string);
+
+                refeed = TRUE;
+            }
+            break;
+
+        case TOKEN_SUPER_NUMBER:
+            if (!unichar_issuperdigit(c)) {
+                *tokens = g_list_append(*tokens, token_new(TOKEN_SUPER_NUMBER, token_start, i));
+                current_token = TOKEN_NONE;
+                refeed = TRUE;
+            }
+            break;
+
+        case TOKEN_SUB_NUMBER:
+            if (!unichar_issubdigit(c)) {
+                *tokens = g_list_append(*tokens, token_new(TOKEN_SUB_NUMBER, token_start, i));
+                current_token = TOKEN_NONE;
+                refeed = TRUE;
+            }
+            break;
+
+        case TOKEN_VARIABLE:
+            if (!g_unichar_isalpha(c)) {
+                gchar *name;
+
+                name = g_strdup_printf("%.*s", (int)(i - token_start), token_start);
+
+                if (strcmp(name, "mod") == 0) {
+                    *tokens = g_list_append(*tokens, token_new(TOKEN_MODULUS_DIVIDE, token_start, i));
+                }
+                else if (strcmp(name, "and") == 0) {
+                    *tokens = g_list_append(*tokens, token_new(TOKEN_BOOLEAN_AND, token_start, i));
+                }
+                else if (strcmp(name, "or") == 0) {
+                    *tokens = g_list_append(*tokens, token_new(TOKEN_BOOLEAN_OR, token_start, i));
+                }
+                else if (strcmp(name, "nand") == 0) {
+                    *tokens = g_list_append(*tokens, token_new(TOKEN_BOOLEAN_NAND, token_start, i));
+                }
+                else if (strcmp(name, "nor") == 0) {
+                    *tokens = g_list_append(*tokens, token_new(TOKEN_BOOLEAN_NOR, token_start, i));
+                }
+                else if (strcmp(name, "xor") == 0) {
+                    *tokens = g_list_append(*tokens, token_new(TOKEN_BOOLEAN_XOR, token_start, i));
+                }
+                else if (function_is_defined(options, name)) {
+                    *tokens = g_list_append(*tokens, token_new(TOKEN_FUNCTION, token_start, i));
+                }
+                else if (variable_is_defined(options, name)) {
+                    Token *t;
+                    t = token_new(TOKEN_VARIABLE, token_start, i);
+                    get_variable(options, name, &t->value);
+                    *tokens = g_list_append(*tokens, t);
+                }
+                else {
+                    const gchar *j;
+                    GString *v;
+                    GList *variables = NULL;
+
+                    /* If each value is defined then is a multiple of variables */
+                    v = g_string_new("");
+                    for (j = token_start; *j; j = g_utf8_next_char(j)) {
+                        Token *t;
+
+                        g_string_truncate(v, 0);
+                        g_string_append_unichar(v, g_utf8_get_char(j));
+                        if (!variable_is_defined(options, v->str) || j == i)
+                            break;
+
+                        t = token_new(TOKEN_VARIABLE, j, g_utf8_next_char(j));
+                        get_variable(options, v->str, &t->value);
+                        variables = g_list_append(variables, t);
+                    }
+                    g_string_free(v, TRUE);
+
+                    if (j != i)
+                    {
+                        g_list_foreach(variables, (GFunc) g_free, NULL);
+                        g_list_free(variables);
+                        return PARSER_ERR_UNKNOWN_VARIABLE;
+                    }
+
+                    *tokens = g_list_concat(*tokens, variables);
+                }
+
+                g_free(name);
+
+                current_token = TOKEN_NONE;
+                refeed = TRUE;
+            }
+            break;
+
+        default:
+            break;
+        }
+
+        if (!refeed)
+        {
+           if (*i == '\0')
+           {
+               g_warning ("Unexpected end of line");
+               return PARSER_ERR_INVALID;
+           }        
+           i = g_utf8_next_char(i);
+        }
+    }
+
+    return PARSER_ERR_NONE;
+}
+
+
+static GList *
+find_token(GList *first_token, GList *last_token, TokenType type)
+{
+    GList *i;
+    for (i = first_token; ; i = i->next) {
+        Token *t = i->data;
+        if (t->type == type)
+            return i;
+        if (i == last_token)
+            return NULL;
+    }
+}
+
+
+static GList *
+rfind_token(GList *first_token, GList *last_token, TokenType type)
+{
+    GList *i;
+    for (i = last_token; ; i = i->prev) {
+        Token *t = i->data;
+        if (t->type == type)
+            return i;
+        if (i == first_token)
+           return NULL;
+    }
+}
+
+
+static void
+replace_block(GList **first, GList **last, GList *start, GList *end, MPNumber *result)
+{
+    Token *t, *t_start, *t_end;
+    GList *link;
+
+    /* Create new expression that combines the block */
+    t_start = start->data;
+    t_end = end->data;
+    t = token_new(TOKEN_EXPRESSION, t_start->start, t_end->end);
+    mp_set_from_mp(result, &t->value);
+
+    /* Replace block with new link */
+    link = g_list_alloc();
+    link->data = t;
+    link->next = end->next;
+    if (link->next)
+        link->next->prev = link;
+    link->prev = start->prev;
+    if (link->prev)
+        link->prev->next = link;
+  
+    if (*first == start)
+        *first = link;
+    if (*last == end)
+        *last = link;
+
+    /* Delete block */
+    link = start;
+    do {
+        GList *next = link->next;
+        link->prev = NULL;
+        link->next = NULL;
+        g_free(link->data);
+        g_list_free(link);
+        link = next;
+    } while (link != end);
+}
+
+
+static MPErrorCode
+has_value (Token *token)
+{
+    return token->type == TOKEN_EXPRESSION || token->type == TOKEN_NUMBER || token->type == TOKEN_VARIABLE;
+}
+
+
+static MPErrorCode
+do_root(GList **first, GList **last, GList *function)
+{
+    GList *start;
+    Token *f, *arg;
+    MPNumber result;
+    int n = 2;
+
+    start = function;
+    f = function->data;
+    if (f->type == TOKEN_ROOT)
+    {
+        if (function->prev) {
+            Token *t = function->prev->data;
+
+            if (t->type == TOKEN_SUB_NUMBER) {
+                const gchar *i;
+
+                n = 0;
+                for (i = t->start; i != t->end; i = g_utf8_next_char(i))
+                    n = n * 10 + unichar_subdigit_value(g_utf8_get_char(i));
+
+                start = function->prev;
+            }
+        }
+    }
+    else if (f->type == TOKEN_CUBE_ROOT)
+        n = 3;
+    else if (f->type == TOKEN_FOURTH_ROOT)
+        n = 4;
+    else
+    {
+        g_warning("Unknown root function: %d", f->type);
         return PARSER_ERR_INVALID;
+    }
 
-    memset(&state, 0, sizeof(MPEquationParserState));
-    state.options = options;
-    state.variable_is_defined = variable_is_defined;
-    state.get_variable = get_variable;
-    state.set_variable = set_variable;
-    state.function_is_defined = function_is_defined;
-    state.get_function = get_function;
-    state.convert = convert;
-    state.error = 0;
+    //g_debug("root %d", n);
 
-    mp_clear_error();
+    if (!function->next)
+        return PARSER_ERR_INVALID;
+    arg = function->next->data;
+    if (!has_value(arg))
+        return PARSER_ERR_INVALID;
+
+    mp_root(&arg->value, n, &result);
+    replace_block(first, last, start, function->next, &result);
+    return PARSER_ERR_NONE;
+}
+  
+
+static MPErrorCode
+do_function(MPEquationOptions *options, GList **first, GList **last, GList *function)
+{
+    GList *end;
+    Token *name, *arg;
+    gchar *string;
+    MPNumber result;
+    int exponent = 1, base = -1;
+
+    if (!function->next)
+        return PARSER_ERR_INVALID;
+
+    end = function->next;
+    arg = end->data;
+    if (arg->type == TOKEN_SUPER_NUMBER) {
+        const gchar *i;
 
-    _mp_equation_lex_init_extra(&state, &yyscanner);
-    buffer = _mp_equation__scan_string(expression, yyscanner);
+        exponent = 0;
+        i = arg->start;
+        if (g_utf8_get_char(i) == 0x207B /* â?» */)
+            i = g_utf8_next_char(i);
+        for (; i != arg->end; i = g_utf8_next_char(i))
+            exponent = exponent * 10 + unichar_superdigit_value(g_utf8_get_char(i));
+        if (g_utf8_get_char(arg->start) == 0x207B /* â?» */)
+            exponent = -exponent;
 
-    ret = _mp_equation_parse(yyscanner);
-    if (state.error_token != NULL && error_token != NULL) {
-        *error_token = state.error_token;
+        if (!end->next)
+            return PARSER_ERR_INVALID;
+        end = end->next;
+        arg = end->data;
     }
+    else if (arg->type == TOKEN_SUB_NUMBER) 
+    {
+        const gchar *i;
 
-    _mp_equation__delete_buffer(buffer, yyscanner);
-    _mp_equation_lex_destroy(yyscanner);
+        base = 0;
+        for (i = arg->start; i != arg->end; i = g_utf8_next_char(i))
+            base = base * 10 + unichar_subdigit_value(g_utf8_get_char(i));
 
-    /* Error during parsing */
-    if (state.error)
-        return state.error;
+        if (!end->next)
+            return PARSER_ERR_INVALID;
+        end = end->next;
+        arg = end->data;
+    }
 
-    if (mp_get_error())
-        return PARSER_ERR_MP;
+    if (!has_value (arg))
+        return PARSER_ERR_INVALID;
+
+    name = function->data;
 
-    /* Failed to parse */
-    if (ret)
+    /* Special case for inverse functions */
+    if (exponent == -1) {
+        string = g_strdup_printf("%.*sâ?»Â¹", (int)(name->end - name->start), name->start);
+        exponent = 1;
+    }
+    else
+        string = token_get_string(name);
+
+    if (base >= 0) {
+        //g_debug ("%s base %d", string, base);
+        if (strcmp(string, "log") == 0)
+            mp_logarithm(base, &arg->value, &result);
+        else {
+            g_free(string);
+            return PARSER_ERR_INVALID;
+        }
+    }
+    else {
+        //g_debug ("function '%s'", string);
+        get_function(options, string, &arg->value, &result);      
+    }
+    g_free(string);
+
+    if (exponent != 1)
+        mp_xpowy_integer(&result, exponent, &result);
+
+    replace_block(first, last, function, end, &result);
+    return PARSER_ERR_NONE;
+}
+
+
+static MPErrorCode
+do_operation (MPEquationOptions *options, GList **first, GList **last, GList *operation)
+{
+    Token *o, *a, *b;
+    MPNumber result;
+
+    if (!operation->prev || !operation->next)
+        return PARSER_ERR_INVALID;
+    o = operation->data;
+    a = operation->prev->data;
+    b = operation->next->data;
+  
+    if (!has_value(a) || !has_value(b))
         return PARSER_ERR_INVALID;
 
-    mp_set_from_mp(&state.ret, result);
+    switch (o->type) {
+    case TOKEN_ADD:
+        //g_debug ("+");
+        mp_add(&a->value, &b->value, &result);
+        break;
+    case TOKEN_SUBTRACT:
+        //g_debug ("-");
+        mp_subtract(&a->value, &b->value, &result);
+        break;
+    case TOKEN_MULTIPLY:
+        //g_debug ("*");
+        mp_multiply(&a->value, &b->value, &result);
+        break;
+    case TOKEN_DIVIDE:
+        //g_debug ("/");
+        mp_divide(&a->value, &b->value, &result);
+        break;
+    case TOKEN_MODULUS_DIVIDE:
+        //g_debug ("mod");
+        mp_modulus_divide(&a->value, &b->value, &result);
+        break;
+    case TOKEN_EXPONENT:
+        //g_debug ("^");
+        mp_xpowy(&a->value, &b->value, &result);
+        break;
+    case TOKEN_BOOLEAN_AND:
+        //g_debug ("and");
+        mp_and(&a->value, &b->value, &result);
+        break;
+    case TOKEN_BOOLEAN_OR:
+        //g_debug ("or");
+        mp_or(&a->value, &b->value, &result);
+        break;
+    case TOKEN_BOOLEAN_NAND:
+        //g_debug ("nand");
+        mp_and(&a->value, &b->value, &result);
+        mp_not(&result, options->wordlen, &result);
+        break;
+    case TOKEN_BOOLEAN_NOR:
+        //g_debug ("nor");
+        mp_or(&a->value, &b->value, &result);
+        mp_not(&result, options->wordlen, &result);
+        break;
+    case TOKEN_BOOLEAN_XOR:
+        //g_debug ("xor");
+        mp_xor(&a->value, &b->value, &result);
+        break;
+    default:
+        g_warning ("Unknown operation: %d", o->type);
+        return PARSER_ERR_INVALID;
+    }
+
+    replace_block(first, last, operation->prev, operation->next, &result);
+    return PARSER_ERR_NONE;
+}
+
+
+static MPErrorCode
+do_super(GList **first, GList **last, GList *super)
+{
+    MPNumber result;
+    Token *t;
+    int value = 0;
+    const gchar *i;
+
+    if (!super->prev || !has_value(super->prev->data))
+        return PARSER_ERR_INVALID;
+
+    t = super->data;
+    i = t->start;
+    if (g_utf8_get_char(i) == 0x207B /* â?» */)
+        i = g_utf8_next_char(i);
+    for (; i != t->end; i = g_utf8_next_char(i))
+        value = value * 10 + unichar_superdigit_value(g_utf8_get_char(i));
+    if (g_utf8_get_char(t->start) == 0x207B /* â?» */)
+        value = -value;
+
+    //g_debug ("x^%d", value);
+    t = super->prev->data;
+    mp_xpowy_integer(&t->value, value, &result);
+    replace_block(first, last, super->prev, super, &result);
+    return PARSER_ERR_NONE;
+}
+
+
+/*static void
+print_tokens(GList *first, GList *last)
+{
+    GList *link;
+
+    for (link = first; link; link = link->next) {
+        Token *t = link->data;
+        g_print("%d", t->type);
+
+        if (link == last)
+            break;
+        g_print("-");
+    }
+    g_print("\n");
+}*/
+
+
+static MPErrorCode
+solve(MPEquationOptions *options, GList *first, GList *last, MPNumber *result)
+{
+    //Token *t_start = first->data, *t_last = last->data; 
+    //g_debug ("solve '%.*s'", (int) (t_last->end - t_start->start + 1), t_start->start);
+
+    while (TRUE) {
+        GList *link;
+        GList *p_start;
+        Token *t;
+        MPErrorCode error;
+
+        //print_tokens (first, last);
+      
+        // FIXME: Make a generic parenthesis style handler
+
+        /* Collapse parenthesis */
+        p_start = find_token(first, last, TOKEN_LEFT_BLOCK);
+        if (p_start != NULL) {
+            GList *p_end;
+            MPNumber r;
+
+            // FIXME: Check have a next and disallow ()
+
+            //g_debug ("(");
+            for (p_end = p_start->next; p_end; p_end = p_end->next) {
+                Token *t = p_end->data;
+                if (t->type == TOKEN_LEFT_BLOCK)
+                    p_start = p_end;
+                else if (t->type == TOKEN_RIGHT_BLOCK)
+                    break;
+                if (p_end == last) {
+                    p_end = NULL;
+                    break;
+                }
+            }
+
+            /* No closing parenthesis */
+            if (!p_end)
+            {
+                g_debug ("no )");
+                return PARSER_ERR_INVALID;
+            }
+
+            error = solve(options, p_start->next, p_end->prev, &r);
+            if (error)
+                return error;
+            replace_block(&first, &last, p_start, p_end, &r);
+            continue;
+        }
+
+        /* Collapse absolute value blocks */
+        p_start = find_token(first, last, TOKEN_ABS_BLOCK);
+        if (p_start && p_start->next) {
+            GList *p_end;
+            MPNumber r;
+
+            p_end = find_token(p_start->next, last, TOKEN_ABS_BLOCK);
+            if (p_end) {
+                error = solve(options, p_start->next, p_end->prev, &r);
+                if (error)
+                    return error;
+                mp_abs(&r, &r);
+                replace_block(&first, &last, p_start, p_end, &r);
+                continue;
+            }
+        }
+
+        /* Collapse round blocks */
+        p_start = find_token(first, last, TOKEN_LEFT_ROUND);
+        if (p_start && p_start->next) {
+            GList *p_end;
+            MPNumber r;
+
+            p_end = find_token(p_start->next, last, TOKEN_RIGHT_ROUND);
+            if (p_end) {
+                error = solve(options, p_start->next, p_end->prev, &r);
+                if (error)
+                    return error;
+                mp_round(&r, &r);
+                replace_block(&first, &last, p_start, p_end, &r);
+                continue;
+            }
+        }
+
+        /* Collapse floor blocks */
+        p_start = find_token(first, last, TOKEN_LEFT_FLOOR);
+        if (p_start && p_start->next) {
+            GList *p_end;
+            MPNumber r;
+
+            // FIXME: Check have a next
+
+            p_end = find_token(p_start->next, last, TOKEN_RIGHT_FLOOR);
+            if (p_end) {
+                error = solve(options, p_start->next, p_end->prev, &r);
+                if (error)
+                    return error;
+                mp_floor(&r, &r);
+                replace_block(&first, &last, p_start, p_end, &r);
+                continue;
+            }
+        }
+
+        /* Collapse ceiling blocks */
+        p_start = find_token(first, last, TOKEN_LEFT_CEILING);
+        if (p_start && p_start->next) {
+            GList *p_end;
+            MPNumber r;
+
+            p_end = find_token(p_start->next, last, TOKEN_RIGHT_CEILING);
+            if (p_end) {
+                error = solve(options, p_start->next, p_end->prev, &r);
+                if (error)
+                    return error;
+                mp_ceiling(&r, &r);
+                replace_block(&first, &last, p_start, p_end, &r);
+                continue;
+            }
+        }
+
+        /* Collapse fraction blocks */
+        p_start = find_token(first, last, TOKEN_LEFT_FRACTION);
+        if (p_start && p_start->next) {
+            GList *p_end;
+            MPNumber r;
+
+            p_end = find_token(p_start->next, last, TOKEN_RIGHT_FRACTION);
+            if (p_end) {
+                error = solve(options, p_start->next, p_end->prev, &r);
+                if (error)
+                    return error;
+                mp_fractional_part(&r, &r);
+                replace_block(&first, &last, p_start, p_end, &r);
+                continue;
+            }
+        }
+
+        link = find_token(first, last, TOKEN_FACTORIAL);
+        if (link) {
+            Token *arg;
+            MPNumber r;
+
+            if (!link->prev)
+                return PARSER_ERR_INVALID;
+            arg = link->prev->data;
+            if (!has_value(arg))
+                return PARSER_ERR_INVALID;
+            mp_factorial(&arg->value, &r);
+            replace_block(&first, &last, link->prev, link, &r);
+            continue;
+        }
+
+        for (link = first; link; link = link->next) {
+            Token *t = link->data;
+            if (link == last) {
+                link = NULL;
+                break;
+            }
+            if (t->type == TOKEN_SUBTRACT &&
+                has_value (link->next->data) &&
+                (!link->prev || !has_value(link->prev->data)))
+                break;
+        }
+        if (link) {
+            Token *arg;
+            MPNumber r;
+
+            if (!link->next)
+                return PARSER_ERR_INVALID;
+            arg = link->next->data;
+
+            //g_debug("invert");
+            mp_invert_sign(&arg->value, &r);
+            replace_block(&first, &last, link, link->next, &r);
+            continue;
+        }
+
+        link = find_token(first, last, TOKEN_BOOLEAN_NOT);
+        if (link) {
+            Token *arg;
+            MPNumber r;
+
+            if (!link->next)
+                return PARSER_ERR_INVALID;
+            arg = link->next->data;
+
+            mp_not(&arg->value, options->wordlen, &r);
+            replace_block(&first, &last, link, link->next, &r);
+            continue;
+        }
+
+        link = find_token(first, last, TOKEN_ROOT);
+        if (!link)
+            link = find_token(first, last, TOKEN_CUBE_ROOT);
+        if (!link)
+            link = find_token(first, last, TOKEN_FOURTH_ROOT);
+        if (link) {
+            error = do_root(&first, &last, link);
+            if (error)
+                return error;
+            continue;
+        }
+
+        link = find_token(first, last, TOKEN_FUNCTION);
+        if (link) {
+            error = do_function(options, &first, &last, link);
+            if (error)
+                return error;
+            continue;
+        }
+
+        // FIXME: Not sure how mathematically valid the plus sign is
+        for (link = first; link; link = link->next) {
+            Token *t = link->data;
+            if (link == last) {
+                link = NULL;
+                break;
+            }
+            if (t->type == TOKEN_ADD &&
+                has_value (link->next->data) &&
+                (!link->prev || !has_value(link->prev->data)))
+                break;
+        }
+        if (link) {
+            Token *x = link->next->data;
+            //g_debug("plus sign");
+            replace_block(&first, &last, link, link->next, &x->value);
+            continue;
+        }
+
+        link = find_token(first, last, TOKEN_SUPER_NUMBER);
+        if (link) {
+            error = do_super(&first, &last, link);
+            if (error)
+                return error;
+            continue;
+        }
+
+        link = rfind_token(first, last, TOKEN_EXPONENT);
+        if (link) {
+            error = do_operation(options, &first, &last, link);
+            if (error)
+                return error;
+            continue;
+        }
+
+        /* Implied multiply */
+        for (link = first; link; link = link->next) {
+            Token *t = link->data, *t2;
+            if (link == last) {
+                link = NULL;
+                break;
+            }
+            t2 = link->next->data;
+            if ((t->type == TOKEN_NUMBER || t->type == TOKEN_VARIABLE || t->type == TOKEN_EXPRESSION) &&
+                t2->type != TOKEN_NUMBER &&
+                has_value (t2)) // Or the previous value if it is a number
+                break;
+        }
+        if (link) {
+            Token *t = link->data, *t2 = link->next->data;
+            MPNumber r;
+
+            //g_debug("implied multiply");
+            mp_multiply(&t->value, &t2->value, &r);
+            replace_block(&first, &last, link, link->next, &r);
+            continue;
+        }
+      
+        link = find_token(first, last, TOKEN_DIVIDE);
+        if (!link)
+            link = find_token(first, last, TOKEN_MODULUS_DIVIDE);
+        if (!link)
+            link = find_token(first, last, TOKEN_MULTIPLY);
+        if (!link)
+            link = find_token(first, last, TOKEN_BOOLEAN_AND);
+        if (!link)
+            link = find_token(first, last, TOKEN_BOOLEAN_OR);
+        if (!link)
+            link = find_token(first, last, TOKEN_BOOLEAN_NAND);
+        if (!link)
+            link = find_token(first, last, TOKEN_BOOLEAN_NOR);
+        if (!link)
+            link = find_token(first, last, TOKEN_BOOLEAN_XOR);
+        if (!link)
+            link = find_token(first, last, TOKEN_ADD);
+        if (!link)
+            link = find_token(first, last, TOKEN_SUBTRACT);
+        if (link) {
+            error = do_operation(options, &first, &last, link);
+            if (error)
+                return error;
+            continue;
+        }
+
+        /* Didn't converge */
+        if (first != last)
+            return PARSER_ERR_INVALID;
+
+        t = first->data;
+        if (!has_value(t))
+            return PARSER_ERR_INVALID;
+        mp_set_from_mp (&t->value, result);
+
+        return PARSER_ERR_NONE;
+    }
+}
+
+
+MPErrorCode
+mp_equation_parse(const char *expression, MPEquationOptions *options, MPNumber *result, char **error_token)
+{
+    GList *tokens, *last_token;
+    MPErrorCode error;
+
+    if (!(expression && result) || strlen(expression) == 0)
+        return PARSER_ERR_INVALID;
+
+    mp_clear_error();
+
+    error = parse(options, expression, &tokens);
+    if (error)
+        return error;
+
+    last_token = g_list_last(tokens);
+    error = solve(options, tokens, last_token, result);
+    if (error)
+        return error;
+
+    if (mp_get_error())
+        return PARSER_ERR_MP;
 
     return PARSER_ERR_NONE;
 }
@@ -321,9 +1249,3 @@ mp_error_code_to_string(MPErrorCode error_code)
         return "Unknown parser error";
     }
 }
-
-
-int _mp_equation_error(void *yylloc, MPEquationParserState *state, char *text)
-{
-    return 0;
-}
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]