[gtk-doc] md_to_db: comment and cleanup the link parsing
- From: Stefan Sauer <stefkost src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtk-doc] md_to_db: comment and cleanup the link parsing
- Date: Fri, 24 Nov 2017 20:16:29 +0000 (UTC)
commit 154e4d910b7e87149c372900b0552b161756cf78
Author: Stefan Sauer <ensonic users sf net>
Date: Wed Nov 15 20:20:32 2017 +0100
md_to_db: comment and cleanup the link parsing
Document the keys. Change them from cryptic one char keys to words. Escape
xml chars in the output text though a new helper.
gtkdoc/md_to_db.py | 49 ++++++++++++++++++++++++++++++-------------------
1 files changed, 30 insertions(+), 19 deletions(-)
---
diff --git a/gtkdoc/md_to_db.py b/gtkdoc/md_to_db.py
index 9a5d269..30a68f3 100644
--- a/gtkdoc/md_to_db.py
+++ b/gtkdoc/md_to_db.py
@@ -407,6 +407,11 @@ def MarkDownParseSpanElementsInner(text, markersref):
markers_rest = {k: v for k, v in markers.items() if v and k != closest_marker}
if closest_marker == '![' or closest_marker == '[':
+ # 'id-ref' : local id reference
+ # 'title' : link short description/alt-text/tooltip
+ # 'a' : linked text
+ # 'href' : external link
+ # 'is-media': is link to media object
element = None
# FIXME: '(?R)' is a recursive subpattern
@@ -414,48 +419,52 @@ def MarkDownParseSpanElementsInner(text, markersref):
# m = re.search(r'\[((?:[^][]|(?R))*)\]', text)
m = re.search(r'\[((?:[^][])*)\]', text)
if ']' in text and m:
- element = {'!': text[0] == '!',
- 'a': m.group(1),
+ element = {'is-media': text[0] == '!',
+ 'a': EscapeEntities(m.group(1)),
}
offset = len(m.group(0))
- if element['!']:
+ if element['is-media']:
offset += 1
logging.debug("Recursive md-expr match: off=%d, text='%s', match='%s'", offset, text,
m.group(1))
remaining_text = text[offset:]
+ # (link "alt-text")
m2 = re.search(r'''^\([ ]*([^)'"]*?)(?:[ ]+['"](.+?)['"])?[ ]*\)''', remaining_text)
+ # [id-reference]
m3 = re.search(r'^\s*\[([^\]<]*?)\]', remaining_text)
if m2:
- element['»'] = m2.group(1)
+ element['href'] = m2.group(1)
if m2.group(2):
- element['#'] = m2.group(2)
+ element['title'] = m2.group(2)
offset += len(m2.group(0))
elif m3:
- element['ref'] = m3.group(1)
+ element['id-ref'] = m3.group(1)
offset += len(m3.group(0))
else:
element = None
if element:
- if '»' in element:
- element['»'] = element['»'].replace('&', '&').replace('<', '<')
+ logging.debug("output link for", element)
- if element['!']:
+ if 'href' in element:
+ element['href'] = EscapeEntities(element['href'])
+
+ if element['is-media']:
# media link
markup += '<inlinemediaobject><imageobject><imagedata fileref="' + \
- element['»'] + '"></imagedata></imageobject>'
+ element['href'] + '"></imagedata></imageobject>'
if 'a' in element:
markup += "<textobject><phrase>" + element['a'] + "</phrase></textobject>"
markup += "</inlinemediaobject>"
- elif 'ref' in element:
+ elif 'id-ref' in element:
# internal link
element['a'] = MarkDownParseSpanElementsInner(element['a'], markers_rest)
- markup += '<link linkend="' + element['ref'] + '"'
+ markup += '<link linkend="' + element['id-ref'] + '"'
- if '#' in element:
+ if 'title' in element:
# title attribute not supported
pass
@@ -463,9 +472,9 @@ def MarkDownParseSpanElementsInner(text, markersref):
else:
# external link
element['a'] = MarkDownParseSpanElementsInner(element['a'], markers_rest)
- markup += '<ulink url="' + element['»'] + '"'
+ markup += '<ulink url="' + element['href'] + '"'
- if '#' in element:
+ if 'title' in element:
# title attribute not supported
pass
@@ -483,7 +492,7 @@ def MarkDownParseSpanElementsInner(text, markersref):
m5 = re.search(r'^<([A-Za-z0-9._-]+?@[A-Za-z0-9._-]+?)>', text)
m6 = re.search(r'^<[^>]+?>', text)
if m4:
- element_url = m4.group(1).replace('&', '&').replace('<', '<')
+ element_url = EscapeEntities(m4.group(1))
markup += '<ulink url="' + element_url + '">' + element_url + '</ulink>'
offset = len(m4.group(0))
@@ -511,7 +520,7 @@ def MarkDownParseSpanElementsInner(text, markersref):
elif closest_marker == "`":
m7 = re.search(r'^(`+)([^`]+?)\1(?!`)', text)
if m7:
- element_text = m7.group(2)
+ element_text = EscapeEntities(m7.group(2))
markup += "<literal>" + element_text + "</literal>"
offset = len(m7.group(0))
else:
@@ -593,6 +602,10 @@ def MarkDownParseSpanElements(text):
return text
+def EscapeEntities(text):
+ return text.replace('&', '&').replace('<', '<').replace('<', '>')
+
+
def ReplaceEntities(text):
entities = [["<", '<'],
[">", '>'],
@@ -606,8 +619,6 @@ def ReplaceEntities(text):
["&", '&'], # Do this last, or the others get messed up.
]
- # Expand entities in <programlisting> even inside CDATA since
- # we changed the definition of |[ to add CDATA
for i in entities:
text = re.sub(i[0], i[1], text)
return text
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]