[anjuta/cxxparser] cxxparser: adding an extra tokenizer fixes some expression parsing problems.

From: Massimo Cora' <mcora src gnome org>
To: svn-commits-list gnome org
Cc:
Subject: [anjuta/cxxparser] cxxparser: adding an extra tokenizer fixes some expression parsing problems.
Date: Sat, 14 Nov 2009 00:07:05 +0000 (UTC)
commit a4f883e3546534f2055a86842b68a58432a2d553
Author: Massimo CorÃ  <mcora src gnome org>
Date:   Sat Nov 14 00:55:22 2009 +0100

    cxxparser: adding an extra tokenizer fixes some expression parsing problems.
    
    the optimizeScope () function was resetting the text on the main tokenized, so breaking the
    correct token parsing. Now it's on extra_tokenizer.

 .gitignore                                         |    2 +
 plugins/symbol-db/cxxparser/engine-parser-priv.h   |    7 +-
 plugins/symbol-db/cxxparser/engine-parser.cpp      |   83 ++++++++-------
 plugins/symbol-db/cxxparser/main.c                 |  111 +++++++++++++++++++-
 .../cxxparser/sample-db/test-complex-klass.cxx     |   45 ++++++++
 .../cxxparser/sample-db/test-simple-klass.cxx      |   23 ++++
 6 files changed, 226 insertions(+), 45 deletions(-)
---
diff --git a/.gitignore b/.gitignore
index 102d163..d69cf0d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,6 +48,7 @@
 /po/be latin gmo
 /po/bg.gmo
 /po/ca.gmo
+/po/ca valencia gmo
 /po/cs.gmo
 /po/da.gmo
 /po/de.gmo
@@ -73,6 +74,7 @@
 /po/ko.gmo
 /po/lt.gmo
 /po/lv.gmo
+/po/mai.gmo
 /po/mk.gmo
 /po/ml.gmo
 /po/mr.gmo
diff --git a/plugins/symbol-db/cxxparser/engine-parser-priv.h b/plugins/symbol-db/cxxparser/engine-parser-priv.h
index c3b88ca..dff2b4a 100644
--- a/plugins/symbol-db/cxxparser/engine-parser-priv.h
+++ b/plugins/symbol-db/cxxparser/engine-parser-priv.h
@@ -92,13 +92,13 @@ private:
 	
 	/**
 	 * Return the next token and the delimiter found, the source string is taken from the
-	 * _tokenizer member of this class.
+	 * _main_tokenizer member of this class.
 	 *
 	 * @param token Next token
 	 * @param delim Delimiter found (as ".", "::", or "->")
 	 * @return true If token was found false otherwise
 	 */	
-	bool nextToken (string &out_token, string &out_delimiter);
+	bool nextMainToken (string &out_token, string &out_delimiter);
 
 	/**
 	 * Trim a string using some default chars.
@@ -121,7 +121,8 @@ private:
 	 */	
 	static EngineParser *s_engine;	
 
-	CppTokenizer *_tokenizer;
+	CppTokenizer *_main_tokenizer;
+	CppTokenizer *_extra_tokenizer;
 	SymbolDBEngine *_dbe;
 };
 
diff --git a/plugins/symbol-db/cxxparser/engine-parser.cpp b/plugins/symbol-db/cxxparser/engine-parser.cpp
index 3fc0be6..71b6bc1 100644
--- a/plugins/symbol-db/cxxparser/engine-parser.cpp
+++ b/plugins/symbol-db/cxxparser/engine-parser.cpp
@@ -45,18 +45,19 @@ EngineParser::getInstance ()
 
 EngineParser::EngineParser ()
 {	
-	_tokenizer = new CppTokenizer ();	
+	_main_tokenizer = new CppTokenizer ();	
+	_extra_tokenizer = new CppTokenizer ();	
 	_dbe = NULL;
 }
 
 bool 
-EngineParser::nextToken (string &out_token, string &out_delimiter)
+EngineParser::nextMainToken (string &out_token, string &out_delimiter)
 {
 	out_token.clear ();
 	
 	int type(0);
 	int depth(0);
-	while ( (type = _tokenizer->yylex()) != 0 ) 
+	while ( (type = _main_tokenizer->yylex()) != 0 ) 
 	{		
 		switch (type) 
 		{			
@@ -65,12 +66,12 @@ EngineParser::nextToken (string &out_token, string &out_delimiter)
 		case lexARROW:
 			if (depth == 0) 
 			{
-				out_delimiter = _tokenizer->YYText();
+				out_delimiter = _main_tokenizer->YYText();
 				trim (out_token);
 				return true;
 			} else 
 			{
-				out_token.append (" ").append (_tokenizer->YYText());
+				out_token.append (" ").append (_main_tokenizer->YYText());
 			}
 			break;
 				
@@ -79,7 +80,7 @@ EngineParser::nextToken (string &out_token, string &out_delimiter)
 		case '(':
 		case '{':
 			depth++;
-			out_token.append (" ").append (_tokenizer->YYText());
+			out_token.append (" ").append (_main_tokenizer->YYText());
 			break;
 				
 		case '>':
@@ -87,11 +88,11 @@ EngineParser::nextToken (string &out_token, string &out_delimiter)
 		case ')':
 		case '}':
 			depth--;
-			out_token.append (" ").append (_tokenizer->YYText());
+			out_token.append (" ").append (_main_tokenizer->YYText());
 			break;
 				
 		default:
-			out_token.append (" ").append (_tokenizer->YYText());
+			out_token.append (" ").append (_main_tokenizer->YYText());
 			break;
 		}
 	}
@@ -103,12 +104,12 @@ void
 EngineParser::DEBUG_printTokens (const string& text)
 {
 	// FIXME
-	_tokenizer->setText (text.c_str ());
+	_main_tokenizer->setText (text.c_str ());
 
 	string op;
 	string token;
 	int i = 0;
-	while (nextToken(token, op)) 
+	while (nextMainToken(token, op)) 
 	{
 		printf ("tok %d %s [op %s]\n", i, token.c_str (), op.c_str ());
 		//ExpressionResult result = parse_expression(token);
@@ -132,13 +133,13 @@ EngineParser::parseExpression(const string &in)
 void
 EngineParser::testParseExpression (const string &str)
 {
-	_tokenizer->setText(str.c_str ());
+	_main_tokenizer->setText(str.c_str ());
 
 	string word;
 	string op;
 	ExpressionResult result;
 	
-	while (nextToken (word, op)) {
+	while (nextMainToken (word, op)) {
 
 		cout << "--------\ngot word " << word << " op " << op << endl; 
 		// fill up ExpressionResult
@@ -324,7 +325,7 @@ EngineParser::getTypeNameAndScopeByToken (ExpressionResult &result,
 			if (token == var.m_name) 
 			{
 				cout << "wh0a! we found the variable type to parse... it's \"" << 
-					var.m_type << "\"" << endl;
+					var.m_type << "\" with typescope \"" << var.m_typeScope << "\"" << endl;
 				out_type_name = var.m_type;
 				out_type_scope = var.m_typeScope;
 
@@ -358,10 +359,13 @@ EngineParser::getCurrentSearchableScope (string &type_name, string &type_scope)
 		SymbolDBEngineIteratorNode *node;
 
 		node = SYMBOL_DB_ENGINE_ITERATOR_NODE (curr_searchable_scope);
-	
-		cout << "Current Searchable Scope " <<
+
+		const gchar *skind = symbol_db_engine_iterator_node_get_symbol_extra_string (node,
+		    SYMINFO_KIND);
+		
+		cout << "Current Searchable Scope name \"" <<
     		symbol_db_engine_iterator_node_get_symbol_name (node) << 					
-			" and id "<< symbol_db_engine_iterator_node_get_symbol_id (node) << 
+			"\" kind \"" << skind << "\" and id "<< symbol_db_engine_iterator_node_get_symbol_id (node) << 
 			endl;
 
 		/* is it a typedef? In that case find the parent struct */
@@ -389,7 +393,8 @@ EngineParser::getCurrentSearchableScope (string &type_name, string &type_scope)
 
 /**
  * @param test Must be searched with SYMINFO_KIND 
- * @return or the same test or a new struct. In that second case test is unreffed
+ * @return or the same test iterator or a new struct. In that second case the input 
+ * iterator test is unreffed.
  * 
  */
 SymbolDBEngineIterator *
@@ -442,7 +447,7 @@ EngineParser::switchMemberToContainer (SymbolDBEngineIterator * test)
 
 		test = new_container;
 
-		cout << ".. found new conainer with n items " << 
+		cout << ".. found new container with n items " << 
 			symbol_db_engine_iterator_get_n_items (test) << endl;
 	}
 	else 
@@ -469,14 +474,15 @@ EngineParser::processExpression(const string& stmt,
 	string type_scope;
 
 	/* first token */
-	_tokenizer->setText (stmt.c_str ());
+	cout << "setting text " << stmt.c_str () << " to the tokenizer " << endl;
+	_main_tokenizer->setText (stmt.c_str ());
 
 	/* get the fist one */
-	nextToken (current_token, op);		
+	nextMainToken (current_token, op);		
 
-	cout << "--------" << endl << "First token \"" << current_token << "\" with op \"" << op 
+	cout << "--------" << endl << "First main token \"" << current_token << "\" with op \"" << op 
 		 << "\"" << endl; 
-		
+
 	/* parse the current sub-expression of a statement and fill up 
 	 * ExpressionResult object
 	 */
@@ -493,16 +499,15 @@ EngineParser::processExpression(const string& stmt,
     									  above_text,
     									  type_name, 
     									  type_scope);
-
 	if (process_res == false)
 	{
 		cout << "Well, you haven't much luck, the first token failed and then "  <<
 			"I cannot continue. " << endl;
 		return NULL;
 	}
-
-	cout << "Going to search for curr_searchable_scope with type_name " << type_name <<
-		" and type_scope " << type_scope << endl;
+	
+	cout << "Going to search for curr_searchable_scope with type_name \"" << type_name << "\"" << 
+		" and type_scope \"" << type_scope << "\"" << endl;
 
 	/* at this time we're enough ready to issue a first query to our db. 
 	 * We absolutely need to find the searchable object scope of the first result 
@@ -519,9 +524,9 @@ EngineParser::processExpression(const string& stmt,
 	}	
 	
 	/* fine. Have we more tokens left? */
-	while (nextToken (current_token, op)) 
+	while (nextMainToken (current_token, op) == 1) 
 	{
-		cout << "--------\nNext token \"" << current_token << "\" with op \"" << op 
+		cout << "--------\nNext main token \"" << current_token << "\" with op \"" << op 
 			 << "\"" << endl;
 
 		/* parse the current sub-expression of a statement and fill up 
@@ -577,7 +582,7 @@ EngineParser::processExpression(const string& stmt,
 			sym_kind = symbol_db_engine_iterator_node_get_symbol_extra_string (node, 
 		    										SymExtraInfo (SYMINFO_KIND));
 			
-			cout << ".. it has sym_kind " << sym_kind << endl;
+			cout << ".. it has sym_kind \"" << sym_kind << "\"" << endl;
 
 			/* the same check as in the engine-core on sdb_engine_add_new_sym_type () */
 			if (g_strcmp0 (sym_kind, "member") == 0 || 
@@ -606,7 +611,7 @@ EngineParser::processExpression(const string& stmt,
 		}
 	}
 
-	cout << "returning curr_searchable_scope" << endl;
+	cout << "END of expression processing. Returning curr_searchable_scope" << endl;
 	return curr_searchable_scope;
 }
 
@@ -624,13 +629,13 @@ EngineParser::optimizeScope(const string& srcString)
 
 	/* Initialize the scanner with the string to search */
 	const char * scannerText =  srcString.c_str ();
-	_tokenizer->setText (scannerText);
+	_extra_tokenizer->setText (scannerText);
 	bool changedLine = false;
 	bool prepLine = false;
 	int curline = 0;
 	while (true) 
 	{
-		type = _tokenizer->yylex();
+		type = _extra_tokenizer->yylex();
 
 		/* Eof ? */
 		if (type == 0) 
@@ -641,23 +646,23 @@ EngineParser::optimizeScope(const string& srcString)
 		}
 
 		/* eat up all tokens until next line */
-		if ( prepLine && _tokenizer->lineno() == curline) 
+		if ( prepLine && _extra_tokenizer->lineno() == curline) 
 		{
 			currScope += " ";
-			currScope += _tokenizer->YYText();
+			currScope += _extra_tokenizer->YYText();
 			continue;
 		}
 
 		prepLine = false;
 
 		/* Get the current line number, it will help us detect preprocessor lines */
-		changedLine = (_tokenizer->lineno() > curline);
+		changedLine = (_extra_tokenizer->lineno() > curline);
 		if (changedLine) 
 		{
 			currScope += "\n";
 		}
 
-		curline = _tokenizer->lineno();
+		curline = _extra_tokenizer->lineno();
 		switch (type) 
 		{
 		case (int)'(':
@@ -695,18 +700,18 @@ EngineParser::optimizeScope(const string& srcString)
 				 * consume everything until new line is found or end of text
 				 */
 				currScope += " ";
-				currScope += _tokenizer->YYText();
+				currScope += _extra_tokenizer->YYText();
 				prepLine = true;
 				break;
 			}
 		default:
 			currScope += " ";
-			currScope += _tokenizer->YYText();
+			currScope += _extra_tokenizer->YYText();
 			break;
 		}
 	}
 
-	_tokenizer->reset();
+	_extra_tokenizer->reset();
 
 	if (scope_stack.empty())
 		return srcString;
diff --git a/plugins/symbol-db/cxxparser/main.c b/plugins/symbol-db/cxxparser/main.c
index 591070f..d31c2a1 100644
--- a/plugins/symbol-db/cxxparser/main.c
+++ b/plugins/symbol-db/cxxparser/main.c
@@ -61,6 +61,27 @@
 	engine_parser_init (dbe);	\
 }
 
+#define INIT_CXX_TEST(source_file,callback) { \
+	gchar *associated_source_file = SAMPLE_DB_ABS_PATH""source_file".cxx";	\
+	gchar *associated_db = source_file;	\
+	gchar *root_dir = SAMPLE_DB_ABS_PATH;	\
+	SymbolDBEngine *dbe = symbol_db_engine_new_full (ANJUTA_TAGS, associated_db);	\
+	symbol_db_engine_open_db (dbe, root_dir, root_dir);	\
+	symbol_db_engine_add_new_project (dbe, NULL, root_dir);	\
+	g_signal_connect (dbe, "scan-end", G_CALLBACK (callback), NULL);	\
+	\
+	GPtrArray *files_array = g_ptr_array_new ();	\
+	g_ptr_array_add (files_array, associated_source_file);	\
+	GPtrArray *source_array = g_ptr_array_new ();	\
+	g_ptr_array_add (source_array, "CXX");	\
+	\
+	if (symbol_db_engine_add_new_files_full (dbe, root_dir, files_array, source_array, TRUE) < 0)	\
+		g_warning ("Error on scanning");	\
+	\
+	engine_parser_init (dbe);	\
+}
+
+
 static SymbolDBEngineIterator * 
 get_children_by_iterator (SymbolDBEngine *dbe, SymbolDBEngineIterator * iter)
 {
@@ -94,6 +115,85 @@ get_children_by_iterator (SymbolDBEngine *dbe, SymbolDBEngineIterator * iter)
 	return children;
 }
 
+
+/******************************************************************************/
+static void 
+on_test_complex_klass_scan_end (SymbolDBEngine* dbe, gpointer user_data)
+{
+	gchar *associated_source_file = SAMPLE_DB_ABS_PATH"test-complex-klass.cxx";	
+	gchar *file_content;
+	SymbolDBEngineIterator *iter;
+	SymbolDBEngineIterator *children;
+	
+	g_file_get_contents (associated_source_file, &file_content, NULL, NULL);
+
+	iter = engine_parser_process_expression ("kl->pm_first_klass->", 
+	                                         file_content, 
+	    									 associated_source_file, 
+	                                         45);
+
+	g_free (file_content);
+
+	/* process the reult */
+	g_assert (iter != NULL);
+
+	children = get_children_by_iterator (dbe, iter);
+	
+	DBI_TEST_NAME (children, 0, "foo1_private");
+	DBI_TEST_NAME (children, 1, "foo2_private");
+	DBI_TEST_NAME (children, 2, "foo3_private");
+	DBI_TEST_NAME (children, 3, "FirstKlass");
+
+	g_object_unref (iter);
+	g_object_unref (children);		
+}
+
+static void
+test_complex_klass ()
+{		
+	INIT_CXX_TEST("test-complex-klass", on_test_complex_klass_scan_end);
+}
+
+
+/******************************************************************************/
+static void 
+on_test_simple_klass_scan_end (SymbolDBEngine* dbe, gpointer user_data)
+{
+	gchar *associated_source_file = SAMPLE_DB_ABS_PATH"test-simple-klass.cxx";	
+	gchar *file_content;
+	SymbolDBEngineIterator *iter;
+	SymbolDBEngineIterator *children;
+	
+	g_file_get_contents (associated_source_file, &file_content, NULL, NULL);
+
+	iter = engine_parser_process_expression ("kl->", 
+	                                         file_content, 
+	    									 associated_source_file, 
+	                                         23);
+
+	g_free (file_content);
+
+	/* process the reult */
+	g_assert (iter != NULL);
+
+	children = get_children_by_iterator (dbe, iter);
+	
+	DBI_TEST_NAME (children, 0, "foo1_private");
+	DBI_TEST_NAME (children, 1, "foo2_private");
+	DBI_TEST_NAME (children, 2, "foo3_private");
+	DBI_TEST_NAME (children, 3, "FirstKlass");
+
+	g_object_unref (iter);
+	g_object_unref (children);		
+}
+
+static void
+test_simple_klass ()
+{		
+	INIT_CXX_TEST("test-simple-klass", on_test_simple_klass_scan_end);
+}
+
+
 /******************************************************************************/
 static void 
 on_test_complex_struct_scan_end (SymbolDBEngine* dbe, gpointer user_data)
@@ -105,7 +205,7 @@ on_test_complex_struct_scan_end (SymbolDBEngine* dbe, gpointer user_data)
 	
 	g_file_get_contents (associated_source_file, &file_content, NULL, NULL);
 
-	iter = engine_parser_process_expression ("((_foo*)var)->asd_struct->", 
+	iter = engine_parser_process_expression ("((foo*)var)->asd_struct->", 
 	                                         file_content, 
 	    									 associated_source_file, 
 	                                         18);
@@ -221,11 +321,16 @@ int	main (int argc, char *argv[])
 
 	g_message ("SAMPLE_DB_ABS_PATH %s", SAMPLE_DB_ABS_PATH);
  	g_test_init (&argc, &argv, NULL);
-
+/*
+	g_test_add_func ("/complex_c/test-complex-struct", test_complex_struct);
 	g_test_add_func ("/simple_c/test-simple-struct", test_simple_struct);
 	g_test_add_func ("/simple_c/test-cast-simple-struct", test_cast_simple_struct);
-	g_test_add_func ("/complex_c/test-complex-struct", test_complex_struct);
 
+	g_test_add_func ("/simple_cxx/test_simple_klass", test_simple_klass);
+*/	
+	g_test_add_func ("/simple_cxx/test_complex_klass", test_complex_klass);
+	
+	
 	g_test_run ();
 	g_message ("test run finished");
 
diff --git a/plugins/symbol-db/cxxparser/sample-db/test-complex-klass.cxx b/plugins/symbol-db/cxxparser/sample-db/test-complex-klass.cxx
new file mode 100644
index 0000000..7449e24
--- /dev/null
+++ b/plugins/symbol-db/cxxparser/sample-db/test-complex-klass.cxx
@@ -0,0 +1,45 @@
+// FirstKlass Decl
+class FirstKlass 
+{
+private:
+	int foo1_private ();
+	void foo2_private () {
+	};
+
+	const char * foo3_private () {
+		return "hey";
+	};
+	
+public:
+	FirstKlass () {};
+};
+
+// SecondKlass Decl
+class SecondKlass 
+{
+private:
+
+	int foo4_private ();
+	void foo5_private () {
+		return;
+	};
+	
+protected:
+	char foo6_protected () {
+		return 'c';
+	};
+	
+public:
+	SecondKlass (int a, char b, char * c);
+
+	// data
+	FirstKlass *pm_first_klass;
+};
+
+
+int main ()
+{
+
+	SecondKlass * kl = new SecondKlass ();
+
+	kl->pm_first_klass->
diff --git a/plugins/symbol-db/cxxparser/sample-db/test-simple-klass.cxx b/plugins/symbol-db/cxxparser/sample-db/test-simple-klass.cxx
new file mode 100644
index 0000000..e45202e
--- /dev/null
+++ b/plugins/symbol-db/cxxparser/sample-db/test-simple-klass.cxx
@@ -0,0 +1,23 @@
+class FirstKlass {
+
+private:
+	int foo1_private ();
+	void foo2_private () {
+	};
+
+	const char * foo3_private () {
+		return "hey";
+	};
+	
+public:
+	FirstKlass () {};
+};
+
+
+
+int main ()
+{
+
+	FirstKlass * kl = new FirstKlass ();
+
+	kl->
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]