[vala] Genie: Added Regex literals

From: Jamie McCracken <jamiemcc src gnome org>
To: commits-list gnome org
Cc:
Subject: [vala] Genie: Added Regex literals
Date: Mon, 24 May 2010 19:27:09 +0000 (UTC)
commit d944ecd50f3b53a657098092d5bacbfcd6abea99
Author: Jamie McCracken <jamie.mccrack gmail com>
Date:   Mon May 24 15:09:54 2010 -0400

    Genie: Added Regex literals

 vala/valagenieparser.vala    |   21 ++++
 vala/valageniescanner.vala   |  246 +++++++++++++++++++++++++++++++++++++----
 vala/valagenietokentype.vala |    6 +
 3 files changed, 249 insertions(+), 24 deletions(-)
---
diff --git a/vala/valagenieparser.vala b/vala/valagenieparser.vala
index 18bf146..d4d01b9 100644
--- a/vala/valagenieparser.vala
+++ b/vala/valagenieparser.vala
@@ -347,6 +347,13 @@ public class Vala.Genie.Parser : CodeVisitor {
 				Report.error (lit.source_reference, "invalid character literal");
 			}
 			return lit;
+		case TokenType.REGEX_LITERAL:
+			next ();
+			string match_part = get_last_string ();
+			SourceReference src_begin = get_src (begin);
+			expect (TokenType.CLOSE_REGEX_LITERAL);
+			string close_token = get_last_string ();
+			return new RegexLiteral ("%s/%s".printf (close_token, match_part), src_begin);	
 		case TokenType.STRING_LITERAL:
 			next ();
 			return new StringLiteral (get_last_string (), get_src (begin));
@@ -444,6 +451,15 @@ public class Vala.Genie.Parser : CodeVisitor {
 		accept (TokenType.INTERR);
 		accept (TokenType.HASH);
 	}
+	
+	
+	Expression parse_regex_literal () throws ParseError {
+		expect (TokenType.OPEN_REGEX_LITERAL);
+
+		var expr = parse_literal ();
+
+		return expr;
+	}
 
 	DataType parse_type (bool owned_by_default = true) throws ParseError {
 		var begin = get_location ();
@@ -642,6 +658,7 @@ public class Vala.Genie.Parser : CodeVisitor {
 		case TokenType.INTEGER_LITERAL:
 		case TokenType.REAL_LITERAL:
 		case TokenType.CHARACTER_LITERAL:
+		case TokenType.REGEX_LITERAL:
 		case TokenType.STRING_LITERAL:
 		case TokenType.TEMPLATE_STRING_LITERAL:
 		case TokenType.VERBATIM_STRING_LITERAL:
@@ -656,6 +673,9 @@ public class Vala.Genie.Parser : CodeVisitor {
 		case TokenType.OPEN_PARENS:
 			expr = parse_tuple ();
 			break;
+		case TokenType.OPEN_REGEX_LITERAL:
+			expr = parse_regex_literal ();
+			break;
 		case TokenType.OPEN_TEMPLATE:
 			expr = parse_template ();
 			break;
@@ -1227,6 +1247,7 @@ public class Vala.Genie.Parser : CodeVisitor {
 					case TokenType.INTEGER_LITERAL:
 					case TokenType.REAL_LITERAL:
 					case TokenType.CHARACTER_LITERAL:
+					case TokenType.REGEX_LITERAL:
 					case TokenType.STRING_LITERAL:
 					case TokenType.TEMPLATE_STRING_LITERAL:
 					case TokenType.VERBATIM_STRING_LITERAL:
diff --git a/vala/valageniescanner.vala b/vala/valageniescanner.vala
index 5c2bb6d..87e9b45 100644
--- a/vala/valageniescanner.vala
+++ b/vala/valageniescanner.vala
@@ -34,7 +34,7 @@ public class Vala.Genie.Scanner {
 	char* begin;
 	char* current;
 	char* end;
-
+	
 	int line;
 	int column;
 
@@ -65,6 +65,7 @@ public class Vala.Genie.Scanner {
 		PARENS,
 		BRACE,
 		BRACKET,
+		REGEX_LITERAL,
 		TEMPLATE,
 		TEMPLATE_PART
 	}
@@ -104,6 +105,176 @@ public class Vala.Genie.Scanner {
 		return (c.isalnum () || c == '_');
 	}
 	
+	bool in_regex_literal () {
+		return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
+	}
+
+
+	public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
+		TokenType type;
+		char* begin = current;
+		token_begin.pos = begin;
+		token_begin.line = line;
+		token_begin.column = column;
+
+		int token_length_in_chars = -1;
+
+		if (current >= end) {
+			type = TokenType.EOF;
+		} else {
+			switch (current[0]) {
+			case '/':
+				type = TokenType.CLOSE_REGEX_LITERAL;
+				current++;
+				state_stack.length--;
+				var fl_i = false;
+				var fl_s = false;
+				var fl_m = false;
+				var fl_x = false;
+				while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
+					switch (current[0]) {
+					case 'i':
+						if (fl_i) {
+							Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'i' used more than once");
+						}
+						fl_i = true;
+						break;
+					case 's':
+						if (fl_s) {
+							Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 's' used more than once");
+						}
+						fl_s = true;
+						break;
+					case 'm':
+						if (fl_m) {
+							Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'm' used more than once");
+						}
+						fl_m = true;
+						break;
+					case 'x':
+						if (fl_x) {
+							Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'x' used more than once");
+						}
+						fl_x = true;
+						break;
+					}
+					current++;
+					token_length_in_chars++;
+				}
+				break;
+			default:
+				type = TokenType.REGEX_LITERAL;
+				token_length_in_chars = 0;
+				while (current < end && current[0] != '/') {
+					if (current[0] == '\\') {
+						current++;
+						token_length_in_chars++;
+						if (current >= end) {
+							break;
+						}
+
+						switch (current[0]) {
+						case '\'':
+						case '"':
+						case '\\':
+						case '/':
+						case '^':
+						case '$':
+						case '.':
+						case '[':
+						case ']':
+						case '{':
+						case '}':
+						case '(':
+						case ')':
+						case '?':
+						case '*':
+						case '+':
+						case '-':
+						case '#':
+						case '&':
+						case '~':
+						case ':':
+						case ';':
+						case '<':
+						case '>':
+						case '|':
+						case '%':
+						case '=':
+						case '@':
+						case '0':
+						case 'b':
+						case 'B':
+						case 'f':
+						case 'n':
+						case 'r':
+						case 't':
+						case 'a':
+						case 'A':
+						case 'p':
+						case 'P':
+						case 'e':
+						case 'd':
+						case 'D':
+						case 's':
+						case 'S':
+						case 'w':
+						case 'W':
+						case 'G':
+						case 'z':
+						case 'Z':
+							current++;
+							token_length_in_chars++;
+							break;
+						case 'x':
+							// hexadecimal escape character
+							current++;
+							token_length_in_chars++;
+							while (current < end && current[0].isxdigit ()) {
+								current++;
+								token_length_in_chars++;
+							}
+							break;
+						default:
+							Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
+							break;
+						}
+					} else if (current[0] == '\n') {
+						break;
+					} else {
+						unichar u = ((string) current).get_char_validated ((long) (end - current));
+						if (u != (unichar) (-1)) {
+							current += u.to_utf8 (null);
+							token_length_in_chars++;
+						} else {
+							current++;
+							Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
+						}
+					}
+				}
+				if (current >= end || current[0] == '\n') {
+					Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"");
+					state_stack.length--;
+					return read_token (out token_begin, out token_end);
+				}
+				break;
+			}
+		}
+
+		if (token_length_in_chars < 0) {
+			column += (int) (current - begin);
+		} else {
+			column += token_length_in_chars;
+		}
+
+		token_end.pos = current;
+		token_end.line = line;
+		token_end.column = column - 1;
+
+		return type;
+	}
+
+	
 	public void seek (SourceLocation location) {
 		current = location.pos;
 		line = location.line;
@@ -607,13 +778,10 @@ public class Vala.Genie.Scanner {
 	public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
 		
 
-
-		/* emit dedents if outstanding before checking any other chars */
-
-		if (pending_dedents > 0) {
-			pending_dedents--;
-			indent_level--;
-
+		if (in_template ()) {
+			return read_template_token (out token_begin, out token_end);
+		} else if (in_template_part ()) {
+			state_stack.length--;
 
 			token_begin.pos = current;
 			token_begin.line = line;
@@ -621,18 +789,21 @@ public class Vala.Genie.Scanner {
 
 			token_end.pos = current;
 			token_end.line = line;
-			token_end.column = column;
-
-			last_token = TokenType.DEDENT;
+			token_end.column = column - 1;
 
-			return TokenType.DEDENT;
+			return TokenType.COMMA;
+		} else if (in_regex_literal ()) {
+			return read_regex_token (out token_begin, out token_end);
 		}
 
 
-		if (in_template ()) {
-			return read_template_token (out token_begin, out token_end);
-		} else if (in_template_part ()) {
-			state_stack.length--;
+
+		/* emit dedents if outstanding before checking any other chars */
+
+		if (pending_dedents > 0) {
+			pending_dedents--;
+			indent_level--;
+
 
 			token_begin.pos = current;
 			token_begin.line = line;
@@ -640,11 +811,12 @@ public class Vala.Genie.Scanner {
 
 			token_end.pos = current;
 			token_end.line = line;
-			token_end.column = column - 1;
+			token_end.column = column;
 
-			return TokenType.COMMA;
-		}
+			last_token = TokenType.DEDENT;
 
+			return TokenType.DEDENT;
+		}
 
 		if ((_indent_spaces == 0 ) || (last_token != TokenType.EOL)) {
 			/* scrub whitespace (excluding newlines) and comments */		
@@ -1023,13 +1195,40 @@ public class Vala.Genie.Scanner {
 				}
 				break;
 			case '/':
-				type = TokenType.DIV;
-				current++;
-				if (current < end && current[0] == '=') {
-					type = TokenType.ASSIGN_DIV;
+				switch (last_token) {
+				case TokenType.ASSIGN:
+				case TokenType.COMMA:
+				case TokenType.MINUS:
+				case TokenType.OP_AND:
+				case TokenType.OP_DEC:
+				case TokenType.OP_EQ:
+				case TokenType.OP_GE:
+				case TokenType.OP_GT:
+				case TokenType.OP_INC:
+				case TokenType.OP_LE:
+				case TokenType.OP_LT:
+				case TokenType.OP_NE:
+				case TokenType.OP_NEG:
+				case TokenType.OP_OR:
+				case TokenType.OPEN_BRACE:
+				case TokenType.OPEN_PARENS:
+				case TokenType.PLUS:
+				case TokenType.RETURN:
+					type = TokenType.OPEN_REGEX_LITERAL;
+					state_stack += State.REGEX_LITERAL;
 					current++;
+					break;
+				default:
+					type = TokenType.DIV;
+					current++;
+					if (current < end && current[0] == '=') {
+						type = TokenType.ASSIGN_DIV;
+						current++;
+					}
+					break;
 				}
 				break;
+
 			case '%':
 				type = TokenType.PERCENT;
 				current++;
@@ -1152,7 +1351,6 @@ public class Vala.Genie.Scanner {
 		token_end.pos = current;
 		token_end.line = line;
 		token_end.column = column - 1;
-		
 		last_token = type;
 
 		return type;
diff --git a/vala/valagenietokentype.vala b/vala/valagenietokentype.vala
index 30ed7ba..fb8f54f 100644
--- a/vala/valagenietokentype.vala
+++ b/vala/valagenietokentype.vala
@@ -50,6 +50,7 @@ public enum Vala.Genie.TokenType {
 	CLOSE_BRACE,
 	CLOSE_BRACKET,
 	CLOSE_PARENS,
+	CLOSE_REGEX_LITERAL,
 	CLOSE_TEMPLATE,
 	COLON,
 	COMMA,
@@ -121,6 +122,7 @@ public enum Vala.Genie.TokenType {
 	OPEN_BRACE,
 	OPEN_BRACKET,
 	OPEN_PARENS,
+	OPEN_REGEX_LITERAL,
 	OPEN_TEMPLATE,
 	OVERRIDE,
 	OWNED,
@@ -138,6 +140,7 @@ public enum Vala.Genie.TokenType {
 	REAL_LITERAL,
 	READONLY,
 	REF,
+	REGEX_LITERAL,
 	REQUIRES,
 	RETURN,
 	SEMICOLON,
@@ -195,6 +198,7 @@ public enum Vala.Genie.TokenType {
 		case CLOSE_BRACE: return "`}'";
 		case CLOSE_BRACKET: return "`]'";
 		case CLOSE_PARENS: return "`)'";
+		case CLOSE_REGEX_LITERAL: return "`/'";
 		case COLON: return "`:'";
 		case COMMA: return "`,'";
 		case CONST: return "`const'";
@@ -265,6 +269,7 @@ public enum Vala.Genie.TokenType {
 		case OPEN_BRACE: return "`{'";
 		case OPEN_BRACKET: return "`['";
 		case OPEN_PARENS: return "`('";
+		case OPEN_REGEX_LITERAL: return "`/'";
 		case OVERRIDE: return "`override'";
 		case OWNED: return "`owned'";
 		case PARAMS: return "`params'";
@@ -281,6 +286,7 @@ public enum Vala.Genie.TokenType {
 		case READONLY: return "`readonly'";
 		case REAL_LITERAL: return "real literal";
 		case REF: return "`ref'";
+		case REGEX_LITERAL: return "regex literal";
 		case REQUIRES: return "`requires'";
 		case RETURN: return "`return'";
 		case SEMICOLON: return "`;'";
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]