[gtk-doc] common: port helpers for mkdb
- From: Stefan Sauer <stefkost src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtk-doc] common: port helpers for mkdb
- Date: Sun, 23 Apr 2017 15:39:00 +0000 (UTC)
commit 1b45a142c72d4cb9f4e2faa7e47f7f673ad743f1
Author: Stefan Sauer <ensonic users sf net>
Date: Mon Apr 17 19:51:09 2017 +0200
common: port helpers for mkdb
gtkdoc/common.py | 428 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 420 insertions(+), 8 deletions(-)
---
diff --git a/gtkdoc/common.py b/gtkdoc/common.py
index fb778b5..703c939 100644
--- a/gtkdoc/common.py
+++ b/gtkdoc/common.py
@@ -37,8 +37,8 @@ def UpdateFileIfChanged(old_file, new_file, make_backup):
tracking.
Args:
- old_file (string): The pathname of the old file.
- new_file (string): The pathname of the new version of the file.
+ old_file (str): The pathname of the old file.
+ new_file (str): The pathname of the new version of the file.
make_backup (bool): True if a backup of the old file should be kept.
It will have the .bak suffix added to the file name.
@@ -73,7 +73,7 @@ def GetModuleDocDir(module_name):
module_name (string): The module, e.g. 'glib-2.0'
Returns:
- string: the doc directory
+ str: the doc directory
"""
path = subprocess.check_output([config.pkg_config, '--variable=prefix', module_name],
universal_newlines=True)
return os.path.join(path.strip(), 'share/gtk-doc/html')
@@ -83,9 +83,9 @@ def LogWarning(filename, line, message):
"""Log a warning in gcc style format
Args:
- file (string): The file the error comes from
+ file (str): The file the error comes from
line (int): line number in the file
- message (string): the error message to print
+ message (str): the error message to print
"""
filename = filename or "unknown"
@@ -106,14 +106,14 @@ def CreateValidSGMLID(xml_id):
the case that mixed-case identifiers would collide.)
Args:
- id (string): The text to be converted into a valid SGML id.
+ id (str): The text to be converted into a valid SGML id.
Returns:
- string: The converted id.
+ str: The converted id.
"""
# Special case, '_' would end up as '' so we use 'gettext-macro' instead.
- if xml_id is "_":
+ if xml_id == '_':
return "gettext-macro"
xml_id = re.sub(r'[,;]', '', xml_id)
@@ -128,3 +128,415 @@ def CreateValidSGMLID(xml_id):
xml_id += ':CAPS'
return xml_id
+
+
+# Parsing helpers (move to mkdb ?)
+
+class ParseError(Exception):
+ pass
+
+
+# TODO: output_function_params is always passed as 0
+# TODO: we always pass both functions
+def ParseStructDeclaration(declaration, is_object, output_function_params, typefunc=None, namefunc=None):
+ """ Parse a struct declaration.
+
+ Takes a structure declaration and breaks it into individual type declarations.
+
+ Args:
+ declaration (str): the declaration to parse
+ is_object (bool): true if this is an object structure
+ output_function_params (bool): true if full type is wanted for function pointer members
+ typefunc (func): function to apply to type
+ namefunc (func): function to apply to name
+
+ Returns:
+ str: list of strings describing the public declaration
+ """
+
+ # For forward struct declarations just return an empty array.
+ if re.search(r'(?:struct|union)\s+\S+\s*;', declaration, flags=re.MULTILINE | re.DOTALL):
+ return ()
+
+ # Remove all private parts of the declaration
+ # For objects, assume private
+ if is_object:
+ declaration = re.sub(r'''((?:struct|union)\s+\w*\s*\{)
+ .*?
+ (?:/\*\s*<\s*public\s*>\s*\*/|(?=\}))''',
+ r'\1', declaration, flags=re.MULTILINE | re.DOTALL | re.VERBOSE)
+
+ # Remove private symbols
+ # Assume end of declaration if line begins with '}'
+ declaration = re.sub(r'\n?[
\t]*/\*\s*<\s*(private|protected)\s*>\s*\*/.*?(?:/\*\s*<\s*public\s*>\s*\*/|(?=^\}))',
+ '', declaration, flags=re.MULTILINE | re.DOTALL)
+
+ # Remove all other comments
+ declaration = re.sub(r'\n\s*/\*([^*]+|\*(?!/))*\*/\s*\n', r'\n', declaration, flags=re.MULTILINE |
re.DOTALL)
+ declaration = re.sub(r'/\*([^*]+|\*(?!/))*\*/', r' ', declaration)
+ declaration = re.sub(r'\n\s*//.*?\n', r'\n', declaration, flags=re.MULTILINE | re.DOTALL)
+ declaration = re.sub(r'//.*', '', declaration)
+
+ # Remove g_iface, parent_instance and parent_class if they are first member
+ declaration = re.sub(r'(\{)\s*(\w)+\s+(g_iface|parent_instance|parent_class)\s*;', r'\1', declaration)
+
+ if declaration.strip() == '':
+ return ()
+
+ # Prime match after "struct/union {" declaration
+ if not re.search(r'(?:struct|union)\s+\w*\s*\{', declaration, flags=re.MULTILINE | re.DOTALL):
+ raise ParseError('Declaration "%s" does not begin with "struct/union [NAME] {"' % declaration)
+
+ logging.debug('public fields in struct/union: %s', declaration)
+
+ result = []
+
+ # Treat lines in sequence, allowing singly nested anonymous structs and unions.
+ for m in re.finditer(r'\s*([^{;]+(\{[^\}]*\}[^{;]+)?);', declaration, flags=re.MULTILINE | re.DOTALL):
+ line = m.group(1)
+
+ if re.search(r'^\s*\}\s*\w*\s*$', line):
+ break
+
+ # FIXME: Just ignore nested structs and unions for now
+ if '{' in line:
+ continue
+
+ # ignore preprocessor directives
+ line = re.sub(r'^#.*?\n\s*', '', line, flags=re.MULTILINE | re.DOTALL)
+
+ if re.search(r'^\s*\}\s*\w*\s*$', line):
+ break
+
+ func_match = re.search(r'''^
+
(const\s+|G_CONST_RETURN\s+|unsigned\s+|signed\s+|long\s+|short\s+)*(struct\s+|enum\s+)? # mod1
+ (\w+)\s* # type
+ (\**(?:\s*restrict)?)\s* # ptr1
+ (const\s+)? # mod2
+ (\**\s*) # ptr2
+ (const\s+)? # mod3
+ \(\s*\*\s*(\w+)\s*\)\s* # name
+ \(([^)]*)\)\s* # func_params
+ $''', line, flags=re.VERBOSE)
+ vars_match = re.search(r'''^
+
((?:const\s+|volatile\s+|unsigned\s+|signed\s+|short\s+|long\s+)?)(struct\s+|enum\s+)? # mod1
+ (\w+)\s* # type
+ (\** \s* const\s+)? # mod2
+ (.*) # variables
+ $''', line, flags=re.VERBOSE)
+
+ # Try to match structure members which are functions
+ if func_match:
+ mod1 = func_match.group(1) or ''
+ if func_match.group(2):
+ mod1 += func_match.group(2)
+ func_type = func_match.group(3)
+ ptr1 = func_match.group(4)
+ mod2 = func_match.group(5) or ''
+ ptr2 = func_match.group(6)
+ mod3 = func_match.group(7) or ''
+ name = func_match.group(8)
+ func_params = func_match.group(9)
+ ptype = func_type
+ if typefunc:
+ ptype = typefunc(func_type, '<type>%s</type>' % func_type)
+
+ result.append(name)
+ if namefunc:
+ name = namefunc(name)
+
+ if output_function_params:
+ result.append('%s%s%s%s%s%s (*%s) (%s)' %
+ (mod1, ptype, ptr1, mod2, ptr2, mod3, name, func_params))
+ else:
+ result.append('%s ()' % name)
+
+ # Try to match normal struct fields of comma-separated variables/
+ elif vars_match:
+ mod1 = vars_match.group(1) or ''
+ if vars_match.group(2):
+ mod1 += vars_match.group(2)
+ vtype = vars_match.group(3)
+ ptype = vtype
+ if typefunc:
+ ptype = typefunc(vtype, '<type>%s</type>' % vtype)
+ mod2 = vars_match.group(4) or ''
+ if mod2:
+ mod2 = ' ' + mod2
+ var_list = vars_match.group(5)
+
+ logging.debug('"%s" "%s" "%s" "%s"', mod1, vtype, mod2, var_list)
+
+ mod1 = mod1.replace(' ', ' ')
+ mod2 = mod2.replace(' ', ' ')
+
+ for n in var_list.split(','):
+ # Each variable can have any number of '*' before the identifier,
+ # and be followed by any number of pairs of brackets or a bit field specifier.
+ # e.g. *foo, ***bar, *baz[12][23], foo : 25.
+ m = re.search(
+ r'^\s* (\**(?:\s*restrict\b)?) \s* (\w+) \s* (?: ((?:\[[^\]]*\]\s*)+) | (:\s*\d+)?) \s*
$',
+ line, flags=re.VERBOSE)
+ if m:
+ ptrs = m.group(1)
+ name = m.group(2)
+ array = m.group(3) or ''
+ bits = m.group(4)
+ if bits:
+ bits = ' ' + bits
+ if ptrs and not ptrs.endswith('*'):
+ ptrs += ' '
+
+ array = array.replace(' ', ' ')
+ bits = bits.replace(' ', ' ')
+
+ result.append(name)
+ if namefunc:
+ name = namefunc(name)
+
+ result.append('%s%s%s %s%s%s%s;' % (mod1, ptype, mod2, ptrs, name, array, bits))
+
+ logging.debug('Matched line: %s%s%s %s%s%s%s', mod1, ptype, mod2, ptrs, name, array,
bits)
+ else:
+ logging.warning('Cannot parse struct field: "%s"', n)
+
+ else:
+ logging.warning('Cannot parse structure field: "%s"', line)
+
+ return result
+
+
+def ParseEnumDeclaration(declaration):
+ """Parse an enum declaration.
+
+ This function takes a enumeration declaration and breaks it into individual
+ enum member declarations.
+
+ Args:
+ declaration (str): the declaration to parse
+
+ Returns:
+ str: list of strings describing the public declaration
+ """
+
+ # For forward struct declarations just return an empty array.
+ if re.search(r'enum\s+\S+\s*;', declaration, flags=re.MULTILINE | re.DOTALL):
+ return ()
+
+ # Remove private symbols
+ # Assume end of declaration if line begins with '}'
+ declaration = re.sub(r'\n?[
\t]*/\*\s*<\s*(private|protected)\s*>\s*\*/.*?(?:/\*\s*<\s*public\s*>\s*\*/|(?=^\}))',
+ '', declaration, flags=re.MULTILINE | re.DOTALL)
+
+ # Remove all other comments
+ declaration = re.sub(r'\n\s*/\*([^*]+|\*(?!/))*\*/\s*\n', r'\n', declaration, flags=re.MULTILINE |
re.DOTALL)
+ declaration = re.sub(r'/\*([^*]+|\*(?!/))*\*/', r' ', declaration)
+ declaration = re.sub(r'\n\s*//.*?\n', r'\n', declaration, flags=re.MULTILINE | re.DOTALL)
+ declaration = re.sub(r'//.*', '', declaration)
+
+ if declaration.strip() == '':
+ return ()
+
+ result = []
+
+ # Remove parenthesized expressions (in macros like GTK_BLAH = BLAH(1,3))
+ # to avoid getting confused by commas they might contain. This doesn't
+ # handle nested parentheses correctly.
+ declaration = re.sub(r'\([^)\n]+\)', '', declaration)
+
+ # Remove apostrophed characters (e.g. '}' or ',') values to avoid getting
+ # confused with end of enumeration.
+ # See https://bugzilla.gnome.org/show_bug.cgi?id=741305
+ declaration = re.sub(r'\'.\'', '', declaration)
+
+ # Remove comma from comma - possible whitespace - closing brace sequence
+ # since it is legal in GNU C and C99 to have a trailing comma but doesn't
+ # result in an actual enum member
+ declaration = re.sub(r',(\s*})', r'\1', declaration)
+
+ # Prime match after "typedef enum {" declaration
+ if not re.search(r'(typedef\s+)?enum\s*(\S+\s*)?\{', declaration, flags=re.MULTILINE | re.DOTALL):
+ raise ParseError('Enum declaration "%s" does not begin with "typedef enum {" or "enum [NAME] {"' %
declaration)
+
+ logging.debug("public fields in enum: %s', declaration")
+
+ # Treat lines in sequence.
+ for m in re.finditer(r'\s*([^,\}]+)([,\}])', declaration, flags=re.MULTILINE | re.DOTALL):
+ line = m.group(1)
+ terminator = m.group(2)
+
+ # ignore preprocessor directives
+ line = re.sub(r'^#.*?\n\s*', '', line, flags=re.MULTILINE | re.DOTALL)
+
+ m1 = re.search(r'^(\w+)\s*(=.*)?$', line, flags=re.MULTILINE | re.DOTALL)
+ # Special case for GIOCondition, where the values are specified by
+ # macros which expand to include the equal sign like '=1'.
+ m2 = re.search(r'^(\w+)\s*GLIB_SYSDEF_POLL', line, flags=re.MULTILINE | re.DOTALL)
+ if m1:
+ result.append(m1.group(1))
+ elif m2:
+ result.append(m2.group(1))
+ elif line.strip().startswith('#'):
+ # Special case include of <gdk/gdkcursors.h>, just ignore it
+ # Special case for #ifdef/#else/#endif, just ignore it
+ break
+ else:
+ logging.warning('Cannot parse enumeration member: %s', line)
+
+ if terminator == '}':
+ break
+
+ return result
+
+
+def ParseFunctionDeclaration(declaration, typefunc, namefunc):
+ """Parse a function declaration.
+
+ This function takes a function declaration and breaks it into individual
+ parameter declarations.
+
+ Args:
+ declaration (str): the declaration to parse
+ typefunc (func): function to apply to type
+ namefunc (func): function to apply to name
+
+ Returns:
+ str: list of strings describing the prototype
+ """
+
+ result = []
+
+ param_num = 0
+ while declaration:
+ logging.debug('decl=[%s]', declaration)
+
+ # skip whitespace and commas
+ declaration, n = re.subn(r'^[\s,]+', '', declaration)
+ if n:
+ continue
+
+ declaration, n = re.subn(r'^void\s*[,\n]', '', declaration)
+ if n:
+ if param_num != 0:
+ logging.warning('void used as parameter %d in function %s', param_num, declaration)
+ result.append('void')
+ result.append(namefunc('<type>void</type>'))
+ param_num += 1
+ continue
+
+ declaration, n = re.subn(r'^\s*[_a-zA-Z0-9]*\.\.\.\s*[,\n]', '', declaration)
+ if n:
+ result.append('...')
+ result.append(namefunc('...'))
+ param_num += 1
+ continue
+
+ # allow alphanumerics, '_', '[' & ']' in param names, try to match a standard parameter
+ # $1
$2 $3
$4 $5
+ regex = r'^\s*((?:(?:G_CONST_RETURN|G_GNUC_[A-Z_]+\s+|unsigned long|unsigned short|signed
long|signed
short|unsigned|signed|long|short|volatile|const)\s+)*)((?:struct\b|enum\b)?\s*\w+)\s*((?:(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*\*?\s*(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*)*)(\w+)?\s*((?:\[\S*\])*)\s*(?:G_GNUC_[A-Z_]+)?\s*[,\n]'
+ m = re.match(regex, declaration)
+ if m:
+ declaration = re.sub(regex, '', declaration)
+
+ pre = m.group(1) or ''
+ type = m.group(2)
+ ptr = m.group(3) or ''
+ name = m.group(4) or ''
+ array = m.group(5) or ''
+
+ pre = re.sub(r'\s+', ' ', pre)
+ type = re.sub(r'\s+', ' ', type)
+ ptr = re.sub(r'\s+', ' ', ptr)
+ ptr = re.sub(r'\s+$', '', ptr)
+ if ptr and not ptr.endswith('*'):
+ ptr += ' '
+
+ logging.debug('"%s" "%s" "%s" "%s" "%s"', pre, type, ptr, name, array)
+
+ m = re.search(r'^((un)?signed .*)\s?', pre)
+ if name == '' and m:
+ name = type
+ type = m.group(1)
+ pre = ''
+
+ if name == '':
+ name = 'Param' + str(param_num + 1)
+
+ logging.debug('"%s" "%s" "%s" "%s" "%s"', pre, type, ptr, name, array)
+
+ result.append(name)
+ xref = typefunc(type, '<type>%s</type>' % type)
+ label = namefunc('%s%s %s%s%s' % (pre, xref, ptr, name, array))
+ result.append(label)
+ param_num += 1
+ continue
+
+ # Try to match parameters which are functions
+ # $1 $2 $3
$4 $5 $6 $7 $8
+ regex =
r'^(const\s+|G_CONST_RETURN\s+|G_GNUC_[A-Z_]+\s+|signed\s+|unsigned\s+)*(struct\s+)?(\w+)\s*(\**)\s*(?:restrict\b)?\s*(const\s+)?\(\s*(\*[\s\*]*)\s*(\w+)\s*\)\s*\(([^)]*)\)\s*[,\n]'
+ m = re.match(regex, declaration)
+ if m:
+ declaration = re.sub(regex, '', declaration)
+
+ mod1 = m.group(1) or ''
+ if m.group(2):
+ mod1 += m.group(2)
+ type = m.group(3)
+ ptr1 = m.group(4)
+ mod2 = m.group(5) or ''
+ func_ptr = m.group(6)
+ name = m.group(7)
+ func_params = m.group(8) or ''
+
+ if ptr and not ptr.endswith('*'):
+ ptr += ' '
+ func_ptr = re.sub(r'\s+', ' ', func_ptr)
+
+ logging.debug('"%s" "%s" "%s" "%s" "%s"', mod1, type, mod2, func_ptr, name)
+
+ result.append(name)
+ xref = typefunc(type, '<type>%s</type>' % type)
+ label = namefunc('%s%s%s%s (%s%s) (%s)' % (mod1, xref, ptr1, mod2, func_ptr, name, func_params))
+ result.append(label)
+ param_num += 1
+ continue
+
+ logging.warning('Cannnot parse args for function in "%s"', declaration)
+ break
+
+ return result
+
+
+def ParseMacroDeclaration(declaration, namefunc):
+ """Parse a macro declaration.
+
+ This function takes a macro declaration and breaks it into individual
+ parameter declarations.
+
+ Args:
+ declaration (str): the declaration to parse
+ namefunc (func): function to apply to name
+
+ Returns:
+ str: list of strings describing the macro
+ """
+
+ result = []
+
+ m = re.search(r'^\s*#\s*define\s+\w+\(([^\)]*)\)', declaration)
+ if m:
+ params = m.group(1)
+ params = re.sub(r'\\\n', '', params)
+
+ for param in params.split(','):
+ param = param.strip()
+
+ # Allow varargs variations
+ if param.endswith('...'):
+ param = '...'
+
+ if param.strip() != '':
+ result.append(param)
+ result.append(namefunc(param))
+
+ return result
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]