[cogl/wip/sparse: 3/5] sparse: keep comments during tokenization

From: Robert Bragg <rbragg src gnome org>
To: commits-list gnome org
Cc:
Subject: [cogl/wip/sparse: 3/5] sparse: keep comments during tokenization
Date: Tue, 10 Apr 2012 17:45:37 +0000 (UTC)
commit 20bddde7766f75df9e6ba1c5f1fec97adfacea0a
Author: Robert Bragg <robert linux intel com>
Date:   Mon Apr 9 19:57:54 2012 +0100

    sparse: keep comments during tokenization
    
    Instead of dropping comments when tokenizing, comments are now dropped
    conditionally during preprocessing. The intention is to later be able to
    create a tool that can extract all comments taking into consideration
    preprocessor defines.

 deps/sparse/lib.c         |    1 +
 deps/sparse/lib.h         |    1 +
 deps/sparse/pre-process.c |   12 ++++++-
 deps/sparse/token.h       |    3 ++
 deps/sparse/tokenize.c    |   85 ++++++++++++++++++++++++++++++++++++++++-----
 5 files changed, 92 insertions(+), 10 deletions(-)
---
diff --git a/deps/sparse/lib.c b/deps/sparse/lib.c
index 396e9f1..fdf1a3c 100644
--- a/deps/sparse/lib.c
+++ b/deps/sparse/lib.c
@@ -217,6 +217,7 @@ int dbg_entry = 0;
 int dbg_dead = 0;
 
 int preprocess_only;
+int keep_comment_tokens;
 
 static enum { STANDARD_C89,
               STANDARD_C94,
diff --git a/deps/sparse/lib.h b/deps/sparse/lib.h
index 2cea252..1e48574 100644
--- a/deps/sparse/lib.h
+++ b/deps/sparse/lib.h
@@ -93,6 +93,7 @@ extern void expression_error(struct expression *, const char *, ...) FORMAT_ATTR
 extern void add_pre_buffer(const char *fmt, ...) FORMAT_ATTR(1);
 
 extern int preprocess_only;
+extern int keep_comment_tokens;
 
 extern int Waddress_space;
 extern int Wbitwise;
diff --git a/deps/sparse/pre-process.c b/deps/sparse/pre-process.c
index 8a16f8b..3fe2a5f 100644
--- a/deps/sparse/pre-process.c
+++ b/deps/sparse/pre-process.c
@@ -1300,6 +1300,11 @@ static int expression_value(struct token **where)
 	int state = 0;
 
 	while (!eof_token(p = scan_next(list))) {
+		if (token_type(p) == TOKEN_COMMENT) {
+			*list = p->next;
+			__free_token(p);
+			continue;
+		}
 		switch (state) {
 		case 0:
 			if (token_type(p) != TOKEN_IDENT)
@@ -1814,7 +1819,12 @@ static void do_preprocess(struct token **list)
 		case TOKEN_STREAMBEGIN:
 			*list = next->next;
 			continue;
-
+		case TOKEN_COMMENT:
+			if (!keep_comment_tokens) {
+				*list = next->next;
+				__free_token(next);
+				continue;
+			}
 		default:
 			dirty_stream(stream);
 			if (false_nesting) {
diff --git a/deps/sparse/token.h b/deps/sparse/token.h
index cd29233..96ec55b 100644
--- a/deps/sparse/token.h
+++ b/deps/sparse/token.h
@@ -84,6 +84,7 @@ enum token_type {
 	TOKEN_IF,
 	TOKEN_SKIP_GROUPS,
 	TOKEN_ELSE,
+	TOKEN_COMMENT,
 };
 
 /* Combination tokens */
@@ -165,6 +166,7 @@ struct token {
 		struct ident *ident;
 		unsigned int special;
 		struct string *string;
+		struct string *comment;
 		int character;
 		int argnum;
 		struct argcount count;
@@ -172,6 +174,7 @@ struct token {
 };
 
 #define MAX_STRING 4095
+#define MAX_COMMENT 8192
 
 static inline struct token *containing_token(struct token **p)
 {
diff --git a/deps/sparse/tokenize.c b/deps/sparse/tokenize.c
index d4f05e5..fc81584 100644
--- a/deps/sparse/tokenize.c
+++ b/deps/sparse/tokenize.c
@@ -137,6 +137,9 @@ const char *show_token(const struct token *token)
 	case TOKEN_IDENT:
 		return show_ident(token->ident);
 
+	case TOKEN_COMMENT:
+		return show_string(token->comment);
+
 	case TOKEN_STRING:
 	case TOKEN_WIDE_STRING:
 		return show_string(token->string);
@@ -612,38 +615,102 @@ static int get_string_token(int next, stream_t *stream, enum token_type type)
 	return next;
 }
 
-static int drop_stream_eoln(stream_t *stream)
+static int get_oneline_comment_token(stream_t *stream)
 {
-	drop_token(stream);
+	static char buffer[MAX_COMMENT];
+	struct string *comment;
+	int len = 0;
+	int next;
+	struct token *token;
+
+	buffer[len++] = '/';
+	buffer[len++] = '/';
+
 	for (;;) {
-		switch (nextchar(stream)) {
+		next = nextchar(stream);
+		switch (next) {
 		case EOF:
-			return EOF;
+			goto done;
 		case '\n':
-			return nextchar(stream);
+			next = nextchar(stream);
+			goto done;
+		default:
+			if (len < MAX_COMMENT)
+				buffer[len] = next;
+			len++;
 		}
 	}
+done:
+
+	if (len > MAX_COMMENT) {
+		warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_COMMENT);
+		len = MAX_COMMENT;
+	}
+
+	comment = __alloc_string(len+1);
+	memcpy(comment->data, buffer, len);
+	comment->data[len] = '\0';
+	comment->length = len+1;
+
+	/* Pass it on.. */
+	token = stream->token;
+	token_type(token) = TOKEN_COMMENT;
+	token->comment = comment;
+	add_token(stream);
+
+	return next;
 }
 
-static int drop_stream_comment(stream_t *stream)
+static int get_comment_token(stream_t *stream)
 {
+	static char buffer[MAX_COMMENT];
+	struct string *comment;
 	int newline;
 	int next;
-	drop_token(stream);
+	struct token *token;
+	int len = 0;
+
+	buffer[len++] = '/';
+	buffer[len++] = '*';
+
 	newline = stream->newline;
 
 	next = nextchar(stream);
+	buffer[len++] = next;
 	for (;;) {
 		int curr = next;
 		if (curr == EOF) {
 			warning(stream_pos(stream), "End of file in the middle of a comment");
+			drop_token(stream);
 			return curr;
 		}
 		next = nextchar(stream);
+
+		if (len < MAX_COMMENT)
+			buffer[len] = next;
+		len++;
+
 		if (curr == '*' && next == '/')
 			break;
 	}
 	stream->newline = newline;
+
+	if (len > MAX_COMMENT) {
+		warning(stream_pos(stream), "comment too long (%d bytes, %d bytes max)", len, MAX_COMMENT);
+		len = MAX_COMMENT;
+	}
+
+	comment = __alloc_string(len+1);
+	memcpy(comment->data, buffer, len);
+	comment->data[len] = '\0';
+	comment->length = len+1;
+
+	/* Pass it on.. */
+	token = stream->token;
+	token_type(token) = TOKEN_COMMENT;
+	token->comment = comment;
+	add_token(stream);
+	
 	return nextchar(stream);
 }
 
@@ -730,9 +797,9 @@ static int get_one_special(int c, stream_t *stream)
 		return get_char_token(next, stream, TOKEN_CHAR);
 	case '/':
 		if (next == '/')
-			return drop_stream_eoln(stream);
+			return get_oneline_comment_token(stream);
 		if (next == '*')
-			return drop_stream_comment(stream);
+			return get_comment_token(stream);
 	}
 
 	/*
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]