[vala] Genie: Added Regex literals
- From: Jamie McCracken <jamiemcc src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [vala] Genie: Added Regex literals
- Date: Mon, 24 May 2010 19:27:09 +0000 (UTC)
commit d944ecd50f3b53a657098092d5bacbfcd6abea99
Author: Jamie McCracken <jamie.mccrack gmail com>
Date: Mon May 24 15:09:54 2010 -0400
Genie: Added Regex literals
vala/valagenieparser.vala | 21 ++++
vala/valageniescanner.vala | 246 +++++++++++++++++++++++++++++++++++++----
vala/valagenietokentype.vala | 6 +
3 files changed, 249 insertions(+), 24 deletions(-)
---
diff --git a/vala/valagenieparser.vala b/vala/valagenieparser.vala
index 18bf146..d4d01b9 100644
--- a/vala/valagenieparser.vala
+++ b/vala/valagenieparser.vala
@@ -347,6 +347,13 @@ public class Vala.Genie.Parser : CodeVisitor {
Report.error (lit.source_reference, "invalid character literal");
}
return lit;
+ case TokenType.REGEX_LITERAL:
+ next ();
+ string match_part = get_last_string ();
+ SourceReference src_begin = get_src (begin);
+ expect (TokenType.CLOSE_REGEX_LITERAL);
+ string close_token = get_last_string ();
+ return new RegexLiteral ("%s/%s".printf (close_token, match_part), src_begin);
case TokenType.STRING_LITERAL:
next ();
return new StringLiteral (get_last_string (), get_src (begin));
@@ -444,6 +451,15 @@ public class Vala.Genie.Parser : CodeVisitor {
accept (TokenType.INTERR);
accept (TokenType.HASH);
}
+
+
+ Expression parse_regex_literal () throws ParseError {
+ expect (TokenType.OPEN_REGEX_LITERAL);
+
+ var expr = parse_literal ();
+
+ return expr;
+ }
DataType parse_type (bool owned_by_default = true) throws ParseError {
var begin = get_location ();
@@ -642,6 +658,7 @@ public class Vala.Genie.Parser : CodeVisitor {
case TokenType.INTEGER_LITERAL:
case TokenType.REAL_LITERAL:
case TokenType.CHARACTER_LITERAL:
+ case TokenType.REGEX_LITERAL:
case TokenType.STRING_LITERAL:
case TokenType.TEMPLATE_STRING_LITERAL:
case TokenType.VERBATIM_STRING_LITERAL:
@@ -656,6 +673,9 @@ public class Vala.Genie.Parser : CodeVisitor {
case TokenType.OPEN_PARENS:
expr = parse_tuple ();
break;
+ case TokenType.OPEN_REGEX_LITERAL:
+ expr = parse_regex_literal ();
+ break;
case TokenType.OPEN_TEMPLATE:
expr = parse_template ();
break;
@@ -1227,6 +1247,7 @@ public class Vala.Genie.Parser : CodeVisitor {
case TokenType.INTEGER_LITERAL:
case TokenType.REAL_LITERAL:
case TokenType.CHARACTER_LITERAL:
+ case TokenType.REGEX_LITERAL:
case TokenType.STRING_LITERAL:
case TokenType.TEMPLATE_STRING_LITERAL:
case TokenType.VERBATIM_STRING_LITERAL:
diff --git a/vala/valageniescanner.vala b/vala/valageniescanner.vala
index 5c2bb6d..87e9b45 100644
--- a/vala/valageniescanner.vala
+++ b/vala/valageniescanner.vala
@@ -34,7 +34,7 @@ public class Vala.Genie.Scanner {
char* begin;
char* current;
char* end;
-
+
int line;
int column;
@@ -65,6 +65,7 @@ public class Vala.Genie.Scanner {
PARENS,
BRACE,
BRACKET,
+ REGEX_LITERAL,
TEMPLATE,
TEMPLATE_PART
}
@@ -104,6 +105,176 @@ public class Vala.Genie.Scanner {
return (c.isalnum () || c == '_');
}
+ bool in_regex_literal () {
+ return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
+ }
+
+
+ public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
+ TokenType type;
+ char* begin = current;
+ token_begin.pos = begin;
+ token_begin.line = line;
+ token_begin.column = column;
+
+ int token_length_in_chars = -1;
+
+ if (current >= end) {
+ type = TokenType.EOF;
+ } else {
+ switch (current[0]) {
+ case '/':
+ type = TokenType.CLOSE_REGEX_LITERAL;
+ current++;
+ state_stack.length--;
+ var fl_i = false;
+ var fl_s = false;
+ var fl_m = false;
+ var fl_x = false;
+ while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
+ switch (current[0]) {
+ case 'i':
+ if (fl_i) {
+ Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'i' used more than once");
+ }
+ fl_i = true;
+ break;
+ case 's':
+ if (fl_s) {
+ Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 's' used more than once");
+ }
+ fl_s = true;
+ break;
+ case 'm':
+ if (fl_m) {
+ Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'm' used more than once");
+ }
+ fl_m = true;
+ break;
+ case 'x':
+ if (fl_x) {
+ Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'x' used more than once");
+ }
+ fl_x = true;
+ break;
+ }
+ current++;
+ token_length_in_chars++;
+ }
+ break;
+ default:
+ type = TokenType.REGEX_LITERAL;
+ token_length_in_chars = 0;
+ while (current < end && current[0] != '/') {
+ if (current[0] == '\\') {
+ current++;
+ token_length_in_chars++;
+ if (current >= end) {
+ break;
+ }
+
+ switch (current[0]) {
+ case '\'':
+ case '"':
+ case '\\':
+ case '/':
+ case '^':
+ case '$':
+ case '.':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '?':
+ case '*':
+ case '+':
+ case '-':
+ case '#':
+ case '&':
+ case '~':
+ case ':':
+ case ';':
+ case '<':
+ case '>':
+ case '|':
+ case '%':
+ case '=':
+ case '@':
+ case '0':
+ case 'b':
+ case 'B':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'a':
+ case 'A':
+ case 'p':
+ case 'P':
+ case 'e':
+ case 'd':
+ case 'D':
+ case 's':
+ case 'S':
+ case 'w':
+ case 'W':
+ case 'G':
+ case 'z':
+ case 'Z':
+ current++;
+ token_length_in_chars++;
+ break;
+ case 'x':
+ // hexadecimal escape character
+ current++;
+ token_length_in_chars++;
+ while (current < end && current[0].isxdigit ()) {
+ current++;
+ token_length_in_chars++;
+ }
+ break;
+ default:
+ Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
+ break;
+ }
+ } else if (current[0] == '\n') {
+ break;
+ } else {
+ unichar u = ((string) current).get_char_validated ((long) (end - current));
+ if (u != (unichar) (-1)) {
+ current += u.to_utf8 (null);
+ token_length_in_chars++;
+ } else {
+ current++;
+ Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
+ }
+ }
+ }
+ if (current >= end || current[0] == '\n') {
+ Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"");
+ state_stack.length--;
+ return read_token (out token_begin, out token_end);
+ }
+ break;
+ }
+ }
+
+ if (token_length_in_chars < 0) {
+ column += (int) (current - begin);
+ } else {
+ column += token_length_in_chars;
+ }
+
+ token_end.pos = current;
+ token_end.line = line;
+ token_end.column = column - 1;
+
+ return type;
+ }
+
+
public void seek (SourceLocation location) {
current = location.pos;
line = location.line;
@@ -607,13 +778,10 @@ public class Vala.Genie.Scanner {
public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
-
- /* emit dedents if outstanding before checking any other chars */
-
- if (pending_dedents > 0) {
- pending_dedents--;
- indent_level--;
-
+ if (in_template ()) {
+ return read_template_token (out token_begin, out token_end);
+ } else if (in_template_part ()) {
+ state_stack.length--;
token_begin.pos = current;
token_begin.line = line;
@@ -621,18 +789,21 @@ public class Vala.Genie.Scanner {
token_end.pos = current;
token_end.line = line;
- token_end.column = column;
-
- last_token = TokenType.DEDENT;
+ token_end.column = column - 1;
- return TokenType.DEDENT;
+ return TokenType.COMMA;
+ } else if (in_regex_literal ()) {
+ return read_regex_token (out token_begin, out token_end);
}
- if (in_template ()) {
- return read_template_token (out token_begin, out token_end);
- } else if (in_template_part ()) {
- state_stack.length--;
+
+ /* emit dedents if outstanding before checking any other chars */
+
+ if (pending_dedents > 0) {
+ pending_dedents--;
+ indent_level--;
+
token_begin.pos = current;
token_begin.line = line;
@@ -640,11 +811,12 @@ public class Vala.Genie.Scanner {
token_end.pos = current;
token_end.line = line;
- token_end.column = column - 1;
+ token_end.column = column;
- return TokenType.COMMA;
- }
+ last_token = TokenType.DEDENT;
+ return TokenType.DEDENT;
+ }
if ((_indent_spaces == 0 ) || (last_token != TokenType.EOL)) {
/* scrub whitespace (excluding newlines) and comments */
@@ -1023,13 +1195,40 @@ public class Vala.Genie.Scanner {
}
break;
case '/':
- type = TokenType.DIV;
- current++;
- if (current < end && current[0] == '=') {
- type = TokenType.ASSIGN_DIV;
+ switch (last_token) {
+ case TokenType.ASSIGN:
+ case TokenType.COMMA:
+ case TokenType.MINUS:
+ case TokenType.OP_AND:
+ case TokenType.OP_DEC:
+ case TokenType.OP_EQ:
+ case TokenType.OP_GE:
+ case TokenType.OP_GT:
+ case TokenType.OP_INC:
+ case TokenType.OP_LE:
+ case TokenType.OP_LT:
+ case TokenType.OP_NE:
+ case TokenType.OP_NEG:
+ case TokenType.OP_OR:
+ case TokenType.OPEN_BRACE:
+ case TokenType.OPEN_PARENS:
+ case TokenType.PLUS:
+ case TokenType.RETURN:
+ type = TokenType.OPEN_REGEX_LITERAL;
+ state_stack += State.REGEX_LITERAL;
current++;
+ break;
+ default:
+ type = TokenType.DIV;
+ current++;
+ if (current < end && current[0] == '=') {
+ type = TokenType.ASSIGN_DIV;
+ current++;
+ }
+ break;
}
break;
+
case '%':
type = TokenType.PERCENT;
current++;
@@ -1152,7 +1351,6 @@ public class Vala.Genie.Scanner {
token_end.pos = current;
token_end.line = line;
token_end.column = column - 1;
-
last_token = type;
return type;
diff --git a/vala/valagenietokentype.vala b/vala/valagenietokentype.vala
index 30ed7ba..fb8f54f 100644
--- a/vala/valagenietokentype.vala
+++ b/vala/valagenietokentype.vala
@@ -50,6 +50,7 @@ public enum Vala.Genie.TokenType {
CLOSE_BRACE,
CLOSE_BRACKET,
CLOSE_PARENS,
+ CLOSE_REGEX_LITERAL,
CLOSE_TEMPLATE,
COLON,
COMMA,
@@ -121,6 +122,7 @@ public enum Vala.Genie.TokenType {
OPEN_BRACE,
OPEN_BRACKET,
OPEN_PARENS,
+ OPEN_REGEX_LITERAL,
OPEN_TEMPLATE,
OVERRIDE,
OWNED,
@@ -138,6 +140,7 @@ public enum Vala.Genie.TokenType {
REAL_LITERAL,
READONLY,
REF,
+ REGEX_LITERAL,
REQUIRES,
RETURN,
SEMICOLON,
@@ -195,6 +198,7 @@ public enum Vala.Genie.TokenType {
case CLOSE_BRACE: return "`}'";
case CLOSE_BRACKET: return "`]'";
case CLOSE_PARENS: return "`)'";
+ case CLOSE_REGEX_LITERAL: return "`/'";
case COLON: return "`:'";
case COMMA: return "`,'";
case CONST: return "`const'";
@@ -265,6 +269,7 @@ public enum Vala.Genie.TokenType {
case OPEN_BRACE: return "`{'";
case OPEN_BRACKET: return "`['";
case OPEN_PARENS: return "`('";
+ case OPEN_REGEX_LITERAL: return "`/'";
case OVERRIDE: return "`override'";
case OWNED: return "`owned'";
case PARAMS: return "`params'";
@@ -281,6 +286,7 @@ public enum Vala.Genie.TokenType {
case READONLY: return "`readonly'";
case REAL_LITERAL: return "real literal";
case REF: return "`ref'";
+ case REGEX_LITERAL: return "regex literal";
case REQUIRES: return "`requires'";
case RETURN: return "`return'";
case SEMICOLON: return "`;'";
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]