[buildj/yaml: 2/2] parser: add support for block style sequences and mappings

From: Abderrahim Kitouni <akitouni src gnome org>
To: commits-list gnome org
Cc:
Subject: [buildj/yaml: 2/2] parser: add support for block style sequences and mappings
Date: Thu, 23 Sep 2010 16:24:01 +0000 (UTC)
commit 1a79e263c414c5e0281dd6a5d8a2a4e798b2f17c
Author: Abderrahim Kitouni <a kitouni gmail com>
Date:   Wed Jul 21 19:53:00 2010 +0100

    parser: add support for block style sequences and mappings

 parser.py |   84 +++++++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 73 insertions(+), 11 deletions(-)
---
diff --git a/parser.py b/parser.py
index 8f87bc8..238d2a6 100644
--- a/parser.py
+++ b/parser.py
@@ -5,26 +5,59 @@ class BuilDjParser:
 	def parse(self, s):
 		self.tokeniter = tokenize.generate_tokens(StringIO(s).next)
 		self.buf = []
+		self._extra_indent = False
 		self.next()
-		try:
-			return self.parse_value()
-		finally:
-			self.skip()
-			while self.current[0] == tokenize.NEWLINE:
-				self.next()
-			if self.current[0] != tokenize.ENDMARKER:
-				print "WARNING: expected end of file at %d:%d" % self.current[2]
+
+		cur = self.current
+		next = self.next()
+		if cur[1] == '-' and next[1] == '-':
+			self.expect_op('-')
+		else:
+			# document starting with a block style sequence
+			self.buf.append(next)
+			self.current = cur
+
+		if self.current[1] not in ('[', '{'):
+			self.buf.append(self.current)
+			self.current = (tokenize.INDENT, '', (0, 0), (0, 0), '')
+			self._extra_indent = True
+
+		val = self.parse_value()
+
+		self.skip()
+		while self.accept(tokenize.NEWLINE):
+			pass
+		if self.accept_op('.'):
+			self.expect_op('.')
+			self.expect_op('.')
+		elif self.current[0] != tokenize.ENDMARKER:
+			print "WARNING: expected end of file at %d:%d" % self.current[2]
+
+		return val
 
 	def next(self):
 		if self.buf:
 			self.current = self.buf.pop()
-		self.current = self.tokeniter.next()
+		else:
+			self.current = self.tokeniter.next()
+
+		if self.current[0] == tokenize.ENDMARKER and self._extra_indent:
+			self.buf.append(self.current)
+			self.current = (tokenize.DEDENT, '', (0, 0), (0, 0), '')
+			self._extra_indent = False
+
 		return self.current
 
 	def accept(self, toktype):
 		if self.current[0] == toktype:
 			self.next()
 			return True
+	def expect(self, toktype):
+		if not self.accept(toktype):
+			raise SyntaxError('expected "%s" at %d:%d, found %s' % (tokenize.tok_name[toktype],
+			                                                        self.current[2][0],
+			                                                        self.current[2][1],
+			                                                        tokenize.tok_name[self.current[0]]))
 
 	def accept_op(self, exp):
 		if self.current[0] == tokenize.OP and self.current[1] == exp:
@@ -33,7 +66,10 @@ class BuilDjParser:
 
 	def expect_op(self, exp):
 		if not self.accept_op(exp):
-			raise SyntaxError('expected "%s" at %d:%d, found %s' % (exp, self.current[2][0], self.current[2][1], self.current[1]))
+			raise SyntaxError('expected "%s" at %d:%d, found %s' % (exp,
+			                                                        self.current[2][0],
+			                                                        self.current[2][1],
+			                                                        self.current[1]))
 
 	def skip(self):
 		while self.current[0] in (tokenize.NL, tokenize.COMMENT):
@@ -51,7 +87,10 @@ class BuilDjParser:
 			else:
 				raise SyntaxError('unexpected operator at %d:%d' % self.current[2])
 		elif self.current[0] == tokenize.INDENT:
-			print "INDENT"
+			if self.next()[1] == '-':
+				val = self.parse_block_seq()
+			else:
+				val = self.parse_block_map()
 		elif self.current[0] == tokenize.STRING:
 			val = self.current[1][1:-1].decode('string_escape')
 			self.next()
@@ -98,11 +137,34 @@ class BuilDjParser:
 
 		return map
 
+	def parse_block_seq(self):
+		seq = []
+		while self.accept('-'):
+			seq.append(self.parse_value())
+		self.expect_op(tokenize.DEDENT)
+		return seq
+
+	def parse_block_map(self):
+		map = {}
+		self.accept(tokenize.INDENT)
+		while not self.accept(tokenize.DEDENT):
+			k = self.parse_value()
+			self.expect_op(':')
+			self.accept(tokenize.NEWLINE)
+			v = self.parse_value()
+			self.accept(tokenize.NEWLINE)
+			self.skip()
+			self.accept(tokenize.NEWLINE)
+			map[k] = v
+
+		return map
+
 if __name__ == "__main__":
 	for s in ('{"one":"two"}',
 	          '["list","list2"]',
 	          '{"one":"two", "three": ["list","list2"]}',
 	          '{\n\t"indented flow mappings": work, # including comments\n\t"and":\n\t{\n\t\tsub: mappings,\n\t\t"a1": "a2"\n\t},\n\t"key": "val"\n}',
+	          '"indented flow mappings": work # including comments\n"and":\n\tsub: mappings\n\t"a1": "a2"\n"key": "val"',
 	          ):
 		print
 		print s
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]