[buildj/yaml: 1/2] initial take at writing a parser



commit 3638bf3ef126ff44c7e21e6b449e8993e78e8979
Author: Abderrahim Kitouni <a kitouni gmail com>
Date:   Sat Jul 17 11:17:11 2010 +0100

    initial take at writing a parser
    
    The format used is a (sane) subset of YAML. Basically, JSON +
    comments + unquoted strings + block style sequences and mappings.
    Right now, the latter aren't yet implemented

 buildj.py |   23 ++++--------
 parser.py |  112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 120 insertions(+), 15 deletions(-)
---
diff --git a/buildj.py b/buildj.py
index 860fd67..fb20ca3 100644
--- a/buildj.py
+++ b/buildj.py
@@ -1,15 +1,4 @@
-try:
-	import json
-except ImportError:
-	# Python < 2.6 doesn't have json
-	import simplejson as json
-
-try:
-	json.decoder
-except AttributeError:
-	# this is json-py and not Python's json
-	import simplejson as json
-
+import parser
 import re
 
 WAF_TOOLS = {'cc':   'compiler_cc',
@@ -46,12 +35,14 @@ def normalize_package_name (name):
 
 class ProjectFile:
 	def __init__ (self, project="project.js"):
-		dec = json.decoder.JSONDecoder ()
+		p = parser.BuilDjParser()
+
 		prj = open(project)
 		data = prj.read ()
-		self._project = dec.decode (data)
 		prj.close ()
 
+		self._project = p.parse(data)
+
 		#TODO: try to raise some meaningful (and consistent) error
 		self._project_name = self._project['project']['name']
 		self._project_version = self._project['project']['version']
@@ -63,7 +54,9 @@ class ProjectFile:
 		for subdir in self._project.get ('subdirs', []):
 			prj = open ('%s/%s' % (subdir, project))
 			data = prj.read ()
-			subproject = dec.decode (data)
+			prj.close()
+
+			subproject = p.parse(data)
 			for target_name, target_data in subproject['targets'].iteritems():
 				assert target_name not in self._project['targets']
 				if 'path' in target_data:
diff --git a/parser.py b/parser.py
new file mode 100644
index 0000000..8f87bc8
--- /dev/null
+++ b/parser.py
@@ -0,0 +1,112 @@
+import tokenize
+from cStringIO import StringIO
+
+class BuilDjParser:
+	def parse(self, s):
+		self.tokeniter = tokenize.generate_tokens(StringIO(s).next)
+		self.buf = []
+		self.next()
+		try:
+			return self.parse_value()
+		finally:
+			self.skip()
+			while self.current[0] == tokenize.NEWLINE:
+				self.next()
+			if self.current[0] != tokenize.ENDMARKER:
+				print "WARNING: expected end of file at %d:%d" % self.current[2]
+
+	def next(self):
+		if self.buf:
+			self.current = self.buf.pop()
+		self.current = self.tokeniter.next()
+		return self.current
+
+	def accept(self, toktype):
+		if self.current[0] == toktype:
+			self.next()
+			return True
+
+	def accept_op(self, exp):
+		if self.current[0] == tokenize.OP and self.current[1] == exp:
+			self.next()
+			return True
+
+	def expect_op(self, exp):
+		if not self.accept_op(exp):
+			raise SyntaxError('expected "%s" at %d:%d, found %s' % (exp, self.current[2][0], self.current[2][1], self.current[1]))
+
+	def skip(self):
+		while self.current[0] in (tokenize.NL, tokenize.COMMENT):
+			self.next()
+
+	def parse_value(self):
+		self.skip()
+
+		val = None
+		if self.current[0] == tokenize.OP:
+			if self.current[1] == '[':
+				val = self.parse_flow_seq()
+			elif self.current[1] == '{':
+				val = self.parse_flow_map()
+			else:
+				raise SyntaxError('unexpected operator at %d:%d' % self.current[2])
+		elif self.current[0] == tokenize.INDENT:
+			print "INDENT"
+		elif self.current[0] == tokenize.STRING:
+			val = self.current[1][1:-1].decode('string_escape')
+			self.next()
+		elif self.current[0] == tokenize.NAME:
+			val = self.current[1]
+			self.next()
+
+		if val is None:
+			assert False, self.current
+
+		self.skip()
+		return val
+
+	def parse_flow_seq(self):
+		self.expect_op('[')
+
+		if self.accept(']'):
+			return []
+
+		seq = [self.parse_value()]
+		while self.accept_op(','):
+			seq.append(self.parse_value())
+		self.expect_op(']')
+		return seq
+
+	def parse_flow_map(self):
+		self.expect_op('{')
+
+		if self.accept_op('}'):
+			return {}
+
+		k = self.parse_value()
+		self.expect_op(':')
+		v = self.parse_value()
+		map = {k : v}
+
+		while self.accept_op(','):
+			k = self.parse_value()
+			self.expect_op(':')
+			v = self.parse_value()
+			map[k] = v
+
+		self.expect_op('}')
+
+		return map
+
+if __name__ == "__main__":
+	for s in ('{"one":"two"}',
+	          '["list","list2"]',
+	          '{"one":"two", "three": ["list","list2"]}',
+	          '{\n\t"indented flow mappings": work, # including comments\n\t"and":\n\t{\n\t\tsub: mappings,\n\t\t"a1": "a2"\n\t},\n\t"key": "val"\n}',
+	          ):
+		print
+		print s
+		p = BuilDjParser()
+		pval =  p.parse(s)
+		print "parsed", pval
+



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]