[gcalctool] Make lexer/parser support UTF-8 characters



commit d6e47cb777deaf2bc2c755aba9fe78d40831e8a7
Author: Robert Ancell <robert ancell gmail com>
Date:   Mon Jun 15 17:43:49 2009 +1000

    Make lexer/parser support UTF-8 characters

 src/mp-equation-lexer.l  |   28 ++++++++++++++++++----------
 src/mp-equation-parser.y |   44 ++++++++++++++++++++++++++++++++------------
 src/unittest.c           |   15 +++++++++++++++
 3 files changed, 65 insertions(+), 22 deletions(-)
---
diff --git a/src/mp-equation-lexer.l b/src/mp-equation-lexer.l
index 6bc7971..5394b38 100644
--- a/src/mp-equation-lexer.l
+++ b/src/mp-equation-lexer.l
@@ -38,11 +38,10 @@
 %}
 
 DECIMAL	"."|","
-SIGN	"+"|"-" 
-CHARACTER [a-z]|[A-Z]
+SIGN	"+"|"-"|"â??"
 BIN     [0-1]
 OCT     [0-7]
-DEC	[0-9]
+DEC     [0-9]
 HEX     [0-9]|[A-F]|[a-f]
 EXP     "e"|"e+"|"e-"|"E"|"E+"|"E-"
 HEX_NUM {HEX}+|{HEX}*{DECIMAL}{HEX}*
@@ -52,10 +51,15 @@ BIN_NUM{BIN}+|{BIN}*{DECIMAL}{BIN}*
 
 %%
 
-"abs"|"Abs"|"ABS" {return tABS;}
+"+"     {return tADD;}
+"-"|"â??" {return tSUBTRACT;}
+"*"|"Ã?" {return tMULTIPLY;}
+"/"|"÷" {return tDIVIDE;}
+"abs"|"Abs"|"ABS" {return tABS_FUNC;}
+"|" {return tABS;}
 "acosh"|"Acosh"|"ACOSH" {return tACOSH;}
 "acos"|"Acos"|"ACOS" {return tACOS;}
-"and"|"And"|"AND" {return tAND;}
+"â?§"|"and"|"And"|"AND" {return tAND;}
 "ans"|"Ans"|"ANS" {return tANS;}
 "asinh"|"Asinh"|"ASINH" {return tASINH;}
 "asin"|"Asin"|"ASIN" {return tASIN;}
@@ -73,14 +77,19 @@ BIN_NUM{BIN}+|{BIN}*{DECIMAL}{BIN}*
 "log"|"Log"|"LOG" {return tLOG10;}
 "log2"|"Log2"|"LOG2" {return tLOG2;}
 "mod"|"Mod"|"MOD" {return tMOD;}
-"not"|"Not"|"NOT" {return tNOT;}
-"or"|"Or"|"OR" {return tOR;}
-"pi"|"Pi"|"PI" {return tPI;}
+"¬"|"~" {return tNOT;}
+"â?¨"|"or"|"Or"|"OR" {return tOR;}
+"Ï?"|"pi"|"Pi"|"PI" {return tPI;}
 "rand"|"Rand"|"RAND" {return tRAND;}
 "rcl"|"Rcl"|"RCL" {return tRCL;}
 "sinh"|"Sinh"|"SINH" {return tSINH;}
 "sin"|"Sin"|"SIN" {return tSIN;}
 "sqrt"|"Sqrt"|"SQRT" {return tSQRT;}
+"â??" {return tROOT;}
+"â??" {return tROOT3;}
+"â??" {return tROOT4;}
+"²" {return tSQUARED;}
+"³" {return tCUBED;}
 "sto"|"Sto"|"STO" {return tSTO;}
 "tanh"|"Tanh"|"TANH" {return tTANH;}
 "tan"|"Tan"|"TAN" {return tTAN;}
@@ -88,14 +97,13 @@ BIN_NUM{BIN}+|{BIN}*{DECIMAL}{BIN}*
 "1s"|"1S" {return t1S;}
 "2s"|"2S" {return t2S;}
 "xnor"|"Xnor"|"XNOR" {return tXNOR;}
-"xor"|"Xor"|"XOR" {return tXOR;}
+"â??"|"xor"|"Xor"|"XOR" {return tXOR;}
 
 "R"{DEC}+ {
 yylval->integer = atoi(yytext+1);  
 return tREG;
 }
 
-
 {DEC_NUM}{EXP}{DEC_NUM} {
 if (_mp_equation_get_extra(yyscanner)->base == 16) REJECT;
 if (strlen(yytext) > MAX_DIGITS) yyextra->error = -PARSER_ERR_TOO_LONG_NUMBER;
diff --git a/src/mp-equation-parser.y b/src/mp-equation-parser.y
index c301818..8d0108d 100644
--- a/src/mp-equation-parser.y
+++ b/src/mp-equation-parser.y
@@ -43,7 +43,12 @@
   int integer;
 }
 
+%token tADD
+%token tSUBTRACT
+%token tMULTIPLY
+%token tDIVIDE
 %token tABS
+%token tABS_FUNC
 %token tACOS
 %token tACOSH
 %token tAND
@@ -73,6 +78,11 @@
 %token tRCL
 %token tSIN
 %token tSINH
+%token tROOT
+%token tROOT3
+%token tROOT4
+%token tSQUARED
+%token tCUBED
 %token tSQRT
 %token tSTO
 %token tTAN
@@ -89,8 +99,7 @@
 %type  <int_t> exp rcl value term reg func number parenthesis
 
 %start statement
-%left '+' '-'
-%left '*' '/'
+%left tADD tSUBTRACT tMULTIPLY tDIVIDE
 %left MED
 %left LNEG
 %left NEG
@@ -98,6 +107,7 @@
 %right '^'
 %right '!'
 %right '%'
+%right tSQUARED tCUBED
 %left HIGH
 
 %%
@@ -141,11 +151,15 @@ value:
 exp: 
   term {mp_set_from_mp(&$1, &$$);}
 
-| exp '+' term '%' {mp_add_integer(&$3, 100, &$3); mp_divide_integer(&$3, 100, &$3); mp_multiply(&$1, &$3, &$$);}
-| exp '-' term '%' {mp_add_integer(&$3, -100, &$3); mp_divide_integer(&$3, -100, &$3); mp_multiply(&$1, &$3, &$$);}
+| exp tADD term '%' {mp_add_integer(&$3, 100, &$3); mp_divide_integer(&$3, 100, &$3); mp_multiply(&$1, &$3, &$$);}
+| exp tSUBTRACT term '%' {mp_add_integer(&$3, -100, &$3); mp_divide_integer(&$3, -100, &$3); mp_multiply(&$1, &$3, &$$);}
 
-| exp '+' exp {mp_add(&$1, &$3, &$$);}
-| exp '-' exp {mp_subtract(&$1, &$3, &$$);}
+| exp tROOT term {MPNumber t; mp_sqrt(&$3, &t); mp_multiply(&$1, &t, &$$);}
+| exp tROOT3 term {MPNumber t; mp_root(&$3, 3, &t); mp_multiply(&$1, &t, &$$);}
+| exp tROOT4 term {MPNumber t; mp_root(&$3, 4, &t); mp_multiply(&$1, &t, &$$);}
+
+| exp tADD exp {mp_add(&$1, &$3, &$$);}
+| exp tSUBTRACT exp {mp_subtract(&$1, &$3, &$$);}
 
 | exp tMOD exp %prec MED {
     if (!mp_is_integer(&$1) || !mp_is_integer(&$3)) {
@@ -187,12 +201,18 @@ exp:
 term:
   number {mp_set_from_mp(&$1, &$$);}
 | rcl {mp_set_from_mp(&$1, &$$);}
-| term '/' term {mp_divide(&$1, &$3, &$$);}
-| term '*' term {mp_multiply(&$1, &$3, &$$);}
+| tROOT term {mp_sqrt(&$2, &$$);}
+| tROOT3 term {mp_root(&$2, 3, &$$);}
+| tROOT4 term {mp_root(&$2, 4, &$$);}
+| term tDIVIDE term {mp_divide(&$1, &$3, &$$);}
+| term tMULTIPLY term {mp_multiply(&$1, &$3, &$$);}
+| tABS exp tABS {mp_abs(&$2, &$$);} 
 | 'e' '^' term {mp_epowy(&$3, &$$);} 
 | term '!' {mp_factorial(&$1, &$$);}
+| term tSQUARED {mp_pwr_integer(&$1, 2, &$$);}
+| term tCUBED {mp_pwr_integer(&$1, 3, &$$);}
 | term '%' {mp_divide_integer(&$1, 100, &$$);}
-| '~' term %prec LNEG {
+| tNOT term %prec LNEG {
     if (!mp_is_natural(&$2)) {
 	(_mp_equation_get_extra(yyscanner))->error = -PARSER_ERR_BITWISEOP;
     } else if (!mp_is_overflow(&$2, _mp_equation_get_extra(yyscanner)->wordlen)) {
@@ -200,8 +220,8 @@ term:
     }
     mp_not(&$2, _mp_equation_get_extra(yyscanner)->wordlen, &$$);
 }
-| '-' term %prec NEG {mp_invert_sign(&$2, &$$);}
-| '+' term %prec POS {mp_set_from_mp(&$2, &$$);}
+| tSUBTRACT term %prec NEG {mp_invert_sign(&$2, &$$);}
+| tADD term %prec POS {mp_set_from_mp(&$2, &$$);}
 | term '^' term {mp_xpowy(&$1, &$3, &$$);}
 
 | func {mp_set_from_mp(&$1, &$$);}
@@ -224,7 +244,7 @@ func:
 | tSQRT term %prec HIGH {mp_sqrt(&$2, &$$);}
 | tLN term %prec HIGH {mp_ln(&$2, &$$);}
 | tRAND %prec HIGH {mp_set_from_random(&$$);}
-| tABS term %prec HIGH {mp_abs(&$2, &$$);}
+| tABS_FUNC term %prec HIGH {mp_abs(&$2, &$$);}
 | tFRAC term %prec HIGH {mp_fractional_component(&$2, &$$);}
 | tINT term %prec HIGH {mp_integer_component(&$2, &$$);}
 | tCHS term %prec HIGH {mp_invert_sign(&$2, &$$);}
diff --git a/src/unittest.c b/src/unittest.c
index 1439b13..048e663 100644
--- a/src/unittest.c
+++ b/src/unittest.c
@@ -118,12 +118,14 @@ test_parser()
     test("4+1", "5", 0);
     test("40000+0.001", "40000.001", 0);
     test("0.001+40000", "40000.001", 0);
+    test("2â??3", "-1", 0);
     test("2-3", "-1", 0);
     test("3-2", "1", 0);
     test("1-0.9-0.1", "0", 0);   
     test("40000-0.001", "39999.999", 0);
     test("0.001-40000", "-39999.999", 0);
     test("40000000-40000000", "0", 0);
+    test("2Ã?3", "6", 0);
     test("2*3", "6", 0);
     test("-2*3", "-6", 0);
     test("2*-3", "-6", 0);
@@ -131,6 +133,7 @@ test_parser()
     //FIXME: Need to update mperr() test("1/2", "0.5", 0);
     //FIXME: Need to update mperr() test("1/0", "", 0);
     //FIXME: Need to update mperr() test("0/0", "", 0);
+    test("6÷3", "2", 0);
     test("6/3", "2", 0);
     test("-6/3", "-2", 0);
     test("6/-3", "-2", 0);
@@ -173,6 +176,15 @@ test_parser()
     test("4^3^2", "262144", 0);
     test("4^(3^2)", "262144", 0);    
     test("(4^3)^2", "4096", 0);
+    test("2²", "4", 0);
+    test("2³", "8", 0);
+    test("â??4", "2", 0);
+    test("â??4-2", "0", 0);    
+    test("â??8", "2", 0);
+    test("â??16", "2", 0);
+    test("â??(2+2)", "2", 0);
+    test("2â??4", "4", 0);
+    test("2*â??4", "4", 0);
     test("Sqrt(4)", "2", 0);
     test("Sqrt(2)", "1.414213562", 0);
     test("4^(1/2)", "2", 0);
@@ -191,6 +203,9 @@ test_parser()
     test("Int(-3.2)", "-3", 0);
     test("Frac(-3.2)", "-0.2", 0);
 
+    test("|1|", "1", 0);
+    test("|-1|", "1", 0);
+    test("|3-5|", "2", 0);    
     test("Abs(1)", "1", 0);
     test("Abs(-1)", "1", 0);
     



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]