[gtk-doc] common: port helpers for mkdb



commit 1b45a142c72d4cb9f4e2faa7e47f7f673ad743f1
Author: Stefan Sauer <ensonic users sf net>
Date:   Mon Apr 17 19:51:09 2017 +0200

    common: port helpers for mkdb

 gtkdoc/common.py |  428 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 420 insertions(+), 8 deletions(-)
---
diff --git a/gtkdoc/common.py b/gtkdoc/common.py
index fb778b5..703c939 100644
--- a/gtkdoc/common.py
+++ b/gtkdoc/common.py
@@ -37,8 +37,8 @@ def UpdateFileIfChanged(old_file, new_file, make_backup):
     tracking.
 
     Args:
-        old_file (string): The pathname of the old file.
-        new_file (string): The pathname of the new version of the file.
+        old_file (str): The pathname of the old file.
+        new_file (str): The pathname of the new version of the file.
         make_backup (bool): True if a backup of the old file should be kept.
                            It will have the .bak suffix added to the file name.
 
@@ -73,7 +73,7 @@ def GetModuleDocDir(module_name):
       module_name (string): The module, e.g. 'glib-2.0'
 
     Returns:
-      string: the doc directory
+      str: the doc directory
     """
     path = subprocess.check_output([config.pkg_config, '--variable=prefix', module_name], 
universal_newlines=True)
     return os.path.join(path.strip(), 'share/gtk-doc/html')
@@ -83,9 +83,9 @@ def LogWarning(filename, line, message):
     """Log a warning in gcc style format
 
     Args:
-      file (string): The file the error comes from
+      file (str): The file the error comes from
       line (int): line number in the file
-      message (string): the error message to print
+      message (str): the error message to print
     """
     filename = filename or "unknown"
 
@@ -106,14 +106,14 @@ def CreateValidSGMLID(xml_id):
     the case that mixed-case identifiers would collide.)
 
     Args:
-      id (string): The text to be converted into a valid SGML id.
+      id (str): The text to be converted into a valid SGML id.
 
     Returns:
-      string: The converted id.
+      str: The converted id.
     """
 
     # Special case, '_' would end up as '' so we use 'gettext-macro' instead.
-    if xml_id is "_":
+    if xml_id == '_':
         return "gettext-macro"
 
     xml_id = re.sub(r'[,;]', '', xml_id)
@@ -128,3 +128,415 @@ def CreateValidSGMLID(xml_id):
         xml_id += ':CAPS'
 
     return xml_id
+
+
+# Parsing helpers (move to mkdb ?)
+
+class ParseError(Exception):
+    pass
+
+
+# TODO: output_function_params is always passed as 0
+# TODO: we always pass both functions
+def ParseStructDeclaration(declaration, is_object, output_function_params, typefunc=None, namefunc=None):
+    """ Parse a struct declaration.
+
+    Takes a structure declaration and breaks it into individual type declarations.
+
+    Args:
+      declaration (str): the declaration to parse
+      is_object (bool): true if this is an object structure
+      output_function_params (bool): true if full type is wanted for function pointer members
+      typefunc (func): function to apply to type
+      namefunc (func): function to apply to name
+
+    Returns:
+      str: list of strings describing the public declaration
+    """
+
+    # For forward struct declarations just return an empty array.
+    if re.search(r'(?:struct|union)\s+\S+\s*;', declaration, flags=re.MULTILINE | re.DOTALL):
+        return ()
+
+    # Remove all private parts of the declaration
+    # For objects, assume private
+    if is_object:
+        declaration = re.sub(r'''((?:struct|union)\s+\w*\s*\{)
+                                 .*?
+                                 (?:/\*\s*<\s*public\s*>\s*\*/|(?=\}))''',
+                             r'\1', declaration, flags=re.MULTILINE | re.DOTALL | re.VERBOSE)
+
+    # Remove private symbols
+    # Assume end of declaration if line begins with '}'
+    declaration = re.sub(r'\n?[ 
\t]*/\*\s*<\s*(private|protected)\s*>\s*\*/.*?(?:/\*\s*<\s*public\s*>\s*\*/|(?=^\}))',
+                         '', declaration, flags=re.MULTILINE | re.DOTALL)
+
+    # Remove all other comments
+    declaration = re.sub(r'\n\s*/\*([^*]+|\*(?!/))*\*/\s*\n', r'\n', declaration, flags=re.MULTILINE | 
re.DOTALL)
+    declaration = re.sub(r'/\*([^*]+|\*(?!/))*\*/', r' ', declaration)
+    declaration = re.sub(r'\n\s*//.*?\n', r'\n', declaration, flags=re.MULTILINE | re.DOTALL)
+    declaration = re.sub(r'//.*', '', declaration)
+
+    # Remove g_iface, parent_instance and parent_class if they are first member
+    declaration = re.sub(r'(\{)\s*(\w)+\s+(g_iface|parent_instance|parent_class)\s*;', r'\1', declaration)
+
+    if declaration.strip() == '':
+        return ()
+
+    # Prime match after "struct/union {" declaration
+    if not re.search(r'(?:struct|union)\s+\w*\s*\{', declaration, flags=re.MULTILINE | re.DOTALL):
+        raise ParseError('Declaration "%s" does not begin with "struct/union [NAME] {"' % declaration)
+
+    logging.debug('public fields in struct/union: %s', declaration)
+
+    result = []
+
+    # Treat lines in sequence, allowing singly nested anonymous structs and unions.
+    for m in re.finditer(r'\s*([^{;]+(\{[^\}]*\}[^{;]+)?);', declaration, flags=re.MULTILINE | re.DOTALL):
+        line = m.group(1)
+
+        if re.search(r'^\s*\}\s*\w*\s*$', line):
+            break
+
+        # FIXME: Just ignore nested structs and unions for now
+        if '{' in line:
+            continue
+
+        # ignore preprocessor directives
+        line = re.sub(r'^#.*?\n\s*', '', line, flags=re.MULTILINE | re.DOTALL)
+
+        if re.search(r'^\s*\}\s*\w*\s*$', line):
+            break
+
+        func_match = re.search(r'''^
+                                   
(const\s+|G_CONST_RETURN\s+|unsigned\s+|signed\s+|long\s+|short\s+)*(struct\s+|enum\s+)?  # mod1
+                                   (\w+)\s*                             # type
+                                   (\**(?:\s*restrict)?)\s*             # ptr1
+                                   (const\s+)?                          # mod2
+                                   (\**\s*)                             # ptr2
+                                   (const\s+)?                          # mod3
+                                   \(\s*\*\s*(\w+)\s*\)\s*              # name
+                                   \(([^)]*)\)\s*                       # func_params
+                                   $''', line, flags=re.VERBOSE)
+        vars_match = re.search(r'''^
+                                   
((?:const\s+|volatile\s+|unsigned\s+|signed\s+|short\s+|long\s+)?)(struct\s+|enum\s+)? # mod1
+                                   (\w+)\s*                            # type
+                                   (\** \s* const\s+)?                 # mod2
+                                   (.*)                                # variables
+                                   $''', line, flags=re.VERBOSE)
+
+        # Try to match structure members which are functions
+        if func_match:
+            mod1 = func_match.group(1) or ''
+            if func_match.group(2):
+                mod1 += func_match.group(2)
+            func_type = func_match.group(3)
+            ptr1 = func_match.group(4)
+            mod2 = func_match.group(5) or ''
+            ptr2 = func_match.group(6)
+            mod3 = func_match.group(7) or ''
+            name = func_match.group(8)
+            func_params = func_match.group(9)
+            ptype = func_type
+            if typefunc:
+                ptype = typefunc(func_type, '<type>%s</type>' % func_type)
+
+            result.append(name)
+            if namefunc:
+                name = namefunc(name)
+
+            if output_function_params:
+                result.append('%s%s%s%s%s%s&#160;(*%s)&#160;(%s)' %
+                              (mod1, ptype, ptr1, mod2, ptr2, mod3, name, func_params))
+            else:
+                result.append('%s&#160;()' % name)
+
+        # Try to match normal struct fields of comma-separated variables/
+        elif vars_match:
+            mod1 = vars_match.group(1) or ''
+            if vars_match.group(2):
+                mod1 += vars_match.group(2)
+            vtype = vars_match.group(3)
+            ptype = vtype
+            if typefunc:
+                ptype = typefunc(vtype, '<type>%s</type>' % vtype)
+            mod2 = vars_match.group(4) or ''
+            if mod2:
+                mod2 = ' ' + mod2
+            var_list = vars_match.group(5)
+
+            logging.debug('"%s" "%s" "%s" "%s"', mod1, vtype, mod2, var_list)
+
+            mod1 = mod1.replace(' ', '&#160;')
+            mod2 = mod2.replace(' ', '&#160;')
+
+            for n in var_list.split(','):
+                # Each variable can have any number of '*' before the identifier,
+                # and be followed by any number of pairs of brackets or a bit field specifier.
+                # e.g. *foo, ***bar, *baz[12][23], foo : 25.
+                m = re.search(
+                    r'^\s* (\**(?:\s*restrict\b)?) \s* (\w+) \s* (?: ((?:\[[^\]]*\]\s*)+) | (:\s*\d+)?) \s* 
$',
+                    line, flags=re.VERBOSE)
+                if m:
+                    ptrs = m.group(1)
+                    name = m.group(2)
+                    array = m.group(3) or ''
+                    bits = m.group(4)
+                    if bits:
+                        bits = ' ' + bits
+                    if ptrs and not ptrs.endswith('*'):
+                        ptrs += ' '
+
+                    array = array.replace(' ', '&#160;')
+                    bits = bits.replace(' ', '&#160;')
+
+                    result.append(name)
+                    if namefunc:
+                        name = namefunc(name)
+
+                    result.append('%s%s%s&#160;%s%s%s%s;' % (mod1, ptype, mod2, ptrs, name, array, bits))
+
+                    logging.debug('Matched line: %s%s%s %s%s%s%s', mod1, ptype, mod2, ptrs, name, array, 
bits)
+                else:
+                    logging.warning('Cannot parse struct field: "%s"', n)
+
+        else:
+            logging.warning('Cannot parse structure field: "%s"', line)
+
+    return result
+
+
+def ParseEnumDeclaration(declaration):
+    """Parse an enum declaration.
+
+    This function takes a enumeration declaration and breaks it into individual
+    enum member declarations.
+
+    Args:
+      declaration (str): the declaration to parse
+
+    Returns:
+      str: list of strings describing the public declaration
+    """
+
+    # For forward struct declarations just return an empty array.
+    if re.search(r'enum\s+\S+\s*;', declaration, flags=re.MULTILINE | re.DOTALL):
+        return ()
+
+    # Remove private symbols
+    # Assume end of declaration if line begins with '}'
+    declaration = re.sub(r'\n?[ 
\t]*/\*\s*<\s*(private|protected)\s*>\s*\*/.*?(?:/\*\s*<\s*public\s*>\s*\*/|(?=^\}))',
+                         '', declaration, flags=re.MULTILINE | re.DOTALL)
+
+    # Remove all other comments
+    declaration = re.sub(r'\n\s*/\*([^*]+|\*(?!/))*\*/\s*\n', r'\n', declaration, flags=re.MULTILINE | 
re.DOTALL)
+    declaration = re.sub(r'/\*([^*]+|\*(?!/))*\*/', r' ', declaration)
+    declaration = re.sub(r'\n\s*//.*?\n', r'\n', declaration, flags=re.MULTILINE | re.DOTALL)
+    declaration = re.sub(r'//.*', '', declaration)
+
+    if declaration.strip() == '':
+        return ()
+
+    result = []
+
+    # Remove parenthesized expressions (in macros like GTK_BLAH = BLAH(1,3))
+    # to avoid getting confused by commas they might contain. This doesn't
+    # handle nested parentheses correctly.
+    declaration = re.sub(r'\([^)\n]+\)', '', declaration)
+
+    # Remove apostrophed characters (e.g. '}' or ',') values to avoid getting
+    # confused with end of enumeration.
+    # See https://bugzilla.gnome.org/show_bug.cgi?id=741305
+    declaration = re.sub(r'\'.\'', '', declaration)
+
+    # Remove comma from comma - possible whitespace - closing brace sequence
+    # since it is legal in GNU C and C99 to have a trailing comma but doesn't
+    # result in an actual enum member
+    declaration = re.sub(r',(\s*})', r'\1', declaration)
+
+    # Prime match after "typedef enum {" declaration
+    if not re.search(r'(typedef\s+)?enum\s*(\S+\s*)?\{', declaration, flags=re.MULTILINE | re.DOTALL):
+        raise ParseError('Enum declaration "%s" does not begin with "typedef enum {" or "enum [NAME] {"' % 
declaration)
+
+    logging.debug("public fields in enum: %s', declaration")
+
+    # Treat lines in sequence.
+    for m in re.finditer(r'\s*([^,\}]+)([,\}])', declaration, flags=re.MULTILINE | re.DOTALL):
+        line = m.group(1)
+        terminator = m.group(2)
+
+        # ignore preprocessor directives
+        line = re.sub(r'^#.*?\n\s*', '', line, flags=re.MULTILINE | re.DOTALL)
+
+        m1 = re.search(r'^(\w+)\s*(=.*)?$', line, flags=re.MULTILINE | re.DOTALL)
+        # Special case for GIOCondition, where the values are specified by
+        # macros which expand to include the equal sign like '=1'.
+        m2 = re.search(r'^(\w+)\s*GLIB_SYSDEF_POLL', line, flags=re.MULTILINE | re.DOTALL)
+        if m1:
+            result.append(m1.group(1))
+        elif m2:
+            result.append(m2.group(1))
+        elif line.strip().startswith('#'):
+            # Special case include of <gdk/gdkcursors.h>, just ignore it
+            # Special case for #ifdef/#else/#endif, just ignore it
+            break
+        else:
+            logging.warning('Cannot parse enumeration member: %s', line)
+
+        if terminator == '}':
+            break
+
+    return result
+
+
+def ParseFunctionDeclaration(declaration, typefunc, namefunc):
+    """Parse a function declaration.
+
+    This function takes a function declaration and breaks it into individual
+    parameter declarations.
+
+    Args:
+      declaration (str): the declaration to parse
+      typefunc (func): function to apply to type
+      namefunc (func): function to apply to name
+
+    Returns:
+      str: list of strings describing the prototype
+    """
+
+    result = []
+
+    param_num = 0
+    while declaration:
+        logging.debug('decl=[%s]', declaration)
+
+        # skip whitespace and commas
+        declaration, n = re.subn(r'^[\s,]+', '', declaration)
+        if n:
+            continue
+
+        declaration, n = re.subn(r'^void\s*[,\n]', '', declaration)
+        if n:
+            if param_num != 0:
+                logging.warning('void used as parameter %d in function %s', param_num, declaration)
+            result.append('void')
+            result.append(namefunc('<type>void</type>'))
+            param_num += 1
+            continue
+
+        declaration, n = re.subn(r'^\s*[_a-zA-Z0-9]*\.\.\.\s*[,\n]', '', declaration)
+        if n:
+            result.append('...')
+            result.append(namefunc('...'))
+            param_num += 1
+            continue
+
+        # allow alphanumerics, '_', '[' & ']' in param names, try to match a standard parameter
+        #              $1                                                                                    
                                                        $2                             $3                     
                                                                           $4       $5
+        regex = r'^\s*((?:(?:G_CONST_RETURN|G_GNUC_[A-Z_]+\s+|unsigned long|unsigned short|signed 
long|signed 
short|unsigned|signed|long|short|volatile|const)\s+)*)((?:struct\b|enum\b)?\s*\w+)\s*((?:(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*\*?\s*(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*)*)(\w+)?\s*((?:\[\S*\])*)\s*(?:G_GNUC_[A-Z_]+)?\s*[,\n]'
+        m = re.match(regex, declaration)
+        if m:
+            declaration = re.sub(regex, '', declaration)
+
+            pre = m.group(1) or ''
+            type = m.group(2)
+            ptr = m.group(3) or ''
+            name = m.group(4) or ''
+            array = m.group(5) or ''
+
+            pre = re.sub(r'\s+', ' ', pre)
+            type = re.sub(r'\s+', ' ', type)
+            ptr = re.sub(r'\s+', ' ', ptr)
+            ptr = re.sub(r'\s+$', '', ptr)
+            if ptr and not ptr.endswith('*'):
+                ptr += ' '
+
+            logging.debug('"%s" "%s" "%s" "%s" "%s"', pre, type, ptr, name, array)
+
+            m = re.search(r'^((un)?signed .*)\s?', pre)
+            if name == '' and m:
+                name = type
+                type = m.group(1)
+                pre = ''
+
+            if name == '':
+                name = 'Param' + str(param_num + 1)
+
+            logging.debug('"%s" "%s" "%s" "%s" "%s"', pre, type, ptr, name, array)
+
+            result.append(name)
+            xref = typefunc(type, '<type>%s</type>' % type)
+            label = namefunc('%s%s %s%s%s' % (pre, xref, ptr, name, array))
+            result.append(label)
+            param_num += 1
+            continue
+
+        # Try to match parameters which are functions
+        #           $1                                                                  $2          $3      
$4                        $5              $6            $7             $8
+        regex = 
r'^(const\s+|G_CONST_RETURN\s+|G_GNUC_[A-Z_]+\s+|signed\s+|unsigned\s+)*(struct\s+)?(\w+)\s*(\**)\s*(?:restrict\b)?\s*(const\s+)?\(\s*(\*[\s\*]*)\s*(\w+)\s*\)\s*\(([^)]*)\)\s*[,\n]'
+        m = re.match(regex, declaration)
+        if m:
+            declaration = re.sub(regex, '', declaration)
+
+            mod1 = m.group(1) or ''
+            if m.group(2):
+                mod1 += m.group(2)
+            type = m.group(3)
+            ptr1 = m.group(4)
+            mod2 = m.group(5) or ''
+            func_ptr = m.group(6)
+            name = m.group(7)
+            func_params = m.group(8) or ''
+
+            if ptr and not ptr.endswith('*'):
+                ptr += ' '
+            func_ptr = re.sub(r'\s+', ' ', func_ptr)
+
+            logging.debug('"%s" "%s" "%s" "%s" "%s"', mod1, type, mod2, func_ptr, name)
+
+            result.append(name)
+            xref = typefunc(type, '<type>%s</type>' % type)
+            label = namefunc('%s%s%s%s (%s%s) (%s)' % (mod1, xref, ptr1, mod2, func_ptr, name, func_params))
+            result.append(label)
+            param_num += 1
+            continue
+
+        logging.warning('Cannnot parse args for function in "%s"', declaration)
+        break
+
+    return result
+
+
+def ParseMacroDeclaration(declaration, namefunc):
+    """Parse a macro declaration.
+
+    This function takes a macro declaration and breaks it into individual
+    parameter declarations.
+
+    Args:
+      declaration (str): the declaration to parse
+      namefunc (func): function to apply to name
+
+    Returns:
+      str: list of strings describing the macro
+    """
+
+    result = []
+
+    m = re.search(r'^\s*#\s*define\s+\w+\(([^\)]*)\)', declaration)
+    if m:
+        params = m.group(1)
+        params = re.sub(r'\\\n', '', params)
+
+        for param in params.split(','):
+            param = param.strip()
+
+            # Allow varargs variations
+            if param.endswith('...'):
+                param = '...'
+
+            if param.strip() != '':
+                result.append(param)
+                result.append(namefunc(param))
+
+    return result


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]