[gcompris/gcomprixogoo] Now the descriptions of the definitions are managed individually in a python tree structure.
- From: Bruno Coudoin <bcoudoin src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gcompris/gcomprixogoo] Now the descriptions of the definitions are managed individually in a python tree structure.
- Date: Sun, 10 Oct 2010 19:12:59 +0000 (UTC)
commit 2345329f709d54ce3954eeb08ded3fee2693d623
Author: Bruno Coudoin <bruno coudoin free fr>
Date: Sun Oct 10 21:08:39 2010 +0200
Now the descriptions of the definitions are managed individually in a python tree structure.
tools/wiktio2xml/wiktio.py | 83 +++++++++++++++++++++++++++++++---
tools/wiktio2xml/wiktio2xml.py | 97 +++++++++++++++++----------------------
2 files changed, 119 insertions(+), 61 deletions(-)
---
diff --git a/tools/wiktio2xml/wiktio.py b/tools/wiktio2xml/wiktio.py
index 934bfdd..617673f 100644
--- a/tools/wiktio2xml/wiktio.py
+++ b/tools/wiktio2xml/wiktio.py
@@ -22,14 +22,82 @@
import os.path
+id = 0
+# Represent the description of a definition
+# This is recursive, a description can hold an
+# unlimited number of subdescription
+class Description:
+
+ def __init__ (self, parent, text, level, numbered = False):
+ global id
+ self.id = id
+ id += 1
+ self.parent = parent
+ self.level = level
+ self.text = text
+ self.numbered = numbered
+ self.descriptions = []
+
+ def isEmpty(self):
+ if len(self.descriptions) > 0:
+ return False
+ return True
+
+ # Return True if this node's text field of one of its
+ # children is not empty
+ def hasContent(self):
+ if len(self.text) > 0:
+ return True
+ else:
+ for d in self.descriptions:
+ if d.hasContent():
+ return True
+ return False
+
+ # Recursively find the node at the given level
+ def getNodeAtLevel(self, level):
+ if level == self.level:
+ return self
+ elif level < self.level:
+ return self.parent.getNodeAtLevel(level)
+ else:
+ return None
+
+ def addDescription(self, text, level, numbered):
+ node = self.getNodeAtLevel(level - 1)
+ if node:
+ description = Description(node, text, level, numbered)
+ node.descriptions.append( description )
+ return description
+ return None
+
+ def dump2html(self, f):
+ if len(self.text) > 0:
+ f.write ( "<li>" + self.text + "</li>" )
+ if len(self.descriptions) > 0:
+ if self.level >= 0:
+ if self.numbered:
+ f.write ( "<ul>" )
+ else:
+ f.write ( "<ol>" )
+ for d in self.descriptions:
+ d.dump2html(f)
+ if self.level >= 0:
+ if self.numbered:
+ f.write ( "</ul>" )
+ else:
+ f.write ( "</ol>" )
+
+
class Definition:
def __init__ (self):
- self.text = ""
self.type = ""
self.subType = ""
self.filtered = False
self.gender = ""
+ self.rootDescription = Description(None, "", -1)
+ self.currentDescription = self.rootDescription
self.synonym = []
self.antonym = []
self.anagram = []
@@ -39,9 +107,6 @@ class Definition:
self.category = []
self.image = []
- def addText(self, text):
- self.text += text
-
def setType(self, type):
self.type = type
@@ -51,6 +116,12 @@ class Definition:
def setGender(self, gender):
self.gender = gender
+ # A definition may hold several descriptions, each one can
+ # have several sub descriptions.
+ def addDescription(self, text, level, numbered):
+ self.currentDescription = \
+ self.currentDescription.addDescription(text, level, numbered)
+
def add(self, atype, text):
if len(text) == 0:
return
@@ -102,13 +173,13 @@ class Definition:
f.write ( s + ", " )
def dump2html(self, f):
- if self.filtered or self.text == "":
+ if self.filtered or not self.rootDescription.hasContent():
return
f.write ( "<h3>" + self.type + \
" " + self.subType + \
" " + self.gender + "</h3>" )
self.dump2htmlImage(f)
- f.write ( self.text )
+ self.rootDescription.dump2html(f)
self.dump2htmlItem(f, "Synonymes", self.synonym)
self.dump2htmlItem(f, "Antonymes", self.antonym)
diff --git a/tools/wiktio2xml/wiktio2xml.py b/tools/wiktio2xml/wiktio2xml.py
index 3947688..1569a50 100755
--- a/tools/wiktio2xml/wiktio2xml.py
+++ b/tools/wiktio2xml/wiktio2xml.py
@@ -46,7 +46,7 @@ class WikiHandler(ContentHandler):
self.isTextElement = False
self.textContent = u""
- self.lilevel = []
+ self.lilevel = 0
def startElement(self, name, attrs):
@@ -164,42 +164,30 @@ class WikiHandler(ContentHandler):
# Notes:
# These may be nested.
#
- # We keep the level of indentation to close in the stack:
+ # We keep the level of indentation to close in:
# self.lilevel
#
+ # Returns a list [text, level, numbered]
+ # numbered = True if this is a numbered list
+ #
def indents2xml(self, text, asText):
+ numbered = False
result = re.search(r"^[ ]*[*#:;]+[ ]*", text)
if not result:
- close = ""
- while self.lilevel:
- close += self.lilevel.pop()
- if not asText:
- text = close + text
- return text
+ self.lilevel = 0
+ return [text, self.lilevel, numbered]
indent = result.group(0).rstrip()
+ self.lilevel = len(indent)
text = text[result.end():]
if asText:
- return text
-
- result = ""
- # Close indents if needed
- while len(self.lilevel) > len(indent):
- result += self.lilevel.pop()
-
- # Open new indents
- # Remove the current level from it
- indent = indent[len(self.lilevel):]
- for char in list(indent):
- if char in "*:;":
- result += "<ul>"
- self.lilevel.append("</ul>")
- elif char == "#":
- result += "<ol>"
- self.lilevel.append("</ol>")
-
- return result + "<li>" + text + "</li>"
+ return [text, self.lilevel, numbered]
+
+ if indent[-1:] == "#":
+ numbered = True
+
+ return [text, self.lilevel, numbered]
# Replaces '''xx''' and ''xx'' from the given text
# with openXml xx closeXml
@@ -216,6 +204,8 @@ class WikiHandler(ContentHandler):
return text
# Replace standard Wiki tags to XML
+ # Returns a list [text, level, numbered]
+ # numbered = True if this is a numbered list
def wiki2xml(self, text, asText):
text = re.sub(r"{{[-\)\(]}}", "", text)
@@ -224,7 +214,7 @@ class WikiHandler(ContentHandler):
if text == "":
return self.indents2xml(text, asText)
- text = self.indents2xml(text, asText)
+ [text, level, numbered] = self.indents2xml(text, asText)
text = re.sub(ur"{{par ext[^}]*}}", ur"(Par extension)", text)
text = re.sub(ur"{{figuré[^}]*}}", ur"(Figuré)", text)
text = re.sub(ur"{{w\|([^}]+)}}", ur"<i>\1</i>", text)
@@ -250,7 +240,7 @@ class WikiHandler(ContentHandler):
text = text[:start] + text[pipe+1:]
text = text.replace("]]", "", 1)
- return text
+ return [text, level, numbered]
# Wikipedia text content is interpreted and transformed in XML
def parseText(self):
@@ -280,7 +270,7 @@ class WikiHandler(ContentHandler):
concat = ""
next = False
- if debug: print "<br/>l:" + l + ":"
+ if debug: print " l:" + l + ":"
if re.search(r"<[^>]+$", l):
# Wiki uses a trick to format text area by ending in uncomplete
# html tags. In this case, we concat this line with the next one
@@ -296,23 +286,33 @@ class WikiHandler(ContentHandler):
# Get rid of non wiki tags
l = re.sub(r'}}[^}]+{{', r'}} {{', l)
state = Wiktio.DEFINITION
+
+ for wt in self.genders.keys():
+ if re.search(wt, l):
+ gender = self.genders[wt]
+ definition.setGender(gender)
+ break
+
+ for wt in self.wordSubTypes.keys():
+ if re.search(wt, l):
+ wordSubType = self.wordSubTypes[wt]
+ definition.setSubType(wordSubType)
+ break
+
+ definition.addDescription("", 0, False)
+ continue
+
elif l == "{{-anagr-}}":
- definition.addText(self.wiki2xml("", False))
state = Wiktio.ANAGRAM
elif l == "{{-syn-}}":
- definition.addText(self.wiki2xml("", False))
state = Wiktio.SYNONYM
elif l == "{{-ant-}}":
- definition.addText(self.wiki2xml("", False))
state = Wiktio.ANTONYM
elif l == "{{-hyper-}}":
- definition.addText(self.wiki2xml("", False))
state = Wiktio.HYPERONYM
elif l == "{{-hypo-}}":
- definition.addText(self.wiki2xml("", False))
state = Wiktio.HYPONYM
elif l == "{{-pron-}}":
- definition.addText(self.wiki2xml("", False))
state = Wiktio.PRON
elif l == "{{-note-}}":
state = Wiktio.SKIP
@@ -331,10 +331,8 @@ class WikiHandler(ContentHandler):
elif l == u"{{-réf-}}":
state = Wiktio.SKIP
elif re.search(r"{{-.*-.*}}", l):
- if definition.text != "":
- if debug: print "<br/>new definition:" + l + ":"
- # Force a <ul> close if needed
- definition.addText(self.wiki2xml("", False))
+ if not definition.rootDescription.isEmpty():
+ if debug: print " new definition:" + l + ":"
# Next definition
filterIndent = ""
definition = wiktio.Definition()
@@ -364,18 +362,6 @@ class WikiHandler(ContentHandler):
definition.filtered = True
break
- for wt in self.genders.keys():
- if re.search(wt, l):
- gender = self.genders[wt]
- definition.setGender(gender)
- break
-
- for wt in self.wordSubTypes.keys():
- if re.search(wt, l):
- wordSubType = self.wordSubTypes[wt]
- definition.setSubType(wordSubType)
- break
-
if state == Wiktio.SKIP:
continue
@@ -409,7 +395,7 @@ class WikiHandler(ContentHandler):
# We already found a meaning for this word, we pick
# other senses restrictively
- if definition.text != "":
+ if not definition.rootDescription.isEmpty():
for filter in self.filterSecondDefinitionType:
if re.search(filter, l, re.I):
result = re.search(r"^[ ]*[*#:;]+[ ]*", l)
@@ -429,14 +415,15 @@ class WikiHandler(ContentHandler):
continue
if state == Wiktio.DEFINITION:
- definition.addText(self.wiki2xml(l, False))
+ [text, level, numbered] = self.wiki2xml(l, False)
+ definition.addDescription(text, level, numbered)
elif state == Wiktio.PRON:
file = re.subn(r".*audio=([^|}]+).*", r"\1", l)
if file[1] == 1:
definition.add(state, file[0])
else:
if len(l) > 0:
- definition.add(state, self.wiki2xml(l, True))
+ definition.add(state, self.wiki2xml(l, True)[0])
return inWord
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]