[gtk-doc] scan: break long function that scans headers apart.
- From: Stefan Sauer <stefkost src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtk-doc] scan: break long function that scans headers apart.
- Date: Sun, 18 Nov 2018 21:24:16 +0000 (UTC)
commit e089fa7914d7554bf3eeb6801ca99d29d2d4bd76
Author: Stefan Sauer <ensonic users sf net>
Date: Sun Nov 18 22:22:47 2018 +0100
scan: break long function that scans headers apart.
This will help to add tests for this code and should in turn help with bugs
like #60.
gtkdoc/scan.py | 287 ++++++++++++++++++++++++++++++---------------------------
1 file changed, 153 insertions(+), 134 deletions(-)
---
diff --git a/gtkdoc/scan.py b/gtkdoc/scan.py
index 085aeea..0d99558 100644
--- a/gtkdoc/scan.py
+++ b/gtkdoc/scan.py
@@ -40,10 +40,6 @@ import shutil
from . import common
-# do not read files twice; checking it here permits to give both srcdir and
-# builddir as --source-dir without fear of duplicities
-seen_headers = {}
-
def Run(options):
logging.info('options: %s', str(options.__dict__))
@@ -68,11 +64,15 @@ def Run(options):
decl_list = []
get_types = []
+ # do not read files twice; checking it here permits to give both srcdir and
+ # builddir as --source-dir without fear of duplicities
+ seen_headers = {}
+
for file in options.headers:
- ScanHeader(file, section_list, decl_list, get_types, options)
+ ScanHeader(file, section_list, decl_list, get_types, seen_headers, options)
for dir in options.source_dir:
- ScanHeaders(dir, section_list, decl_list, get_types, options)
+ ScanHeaders(dir, section_list, decl_list, get_types, seen_headers, options)
with open(new_decl_list, 'w', encoding='utf-8') as f:
for section in sorted(section_list.keys()):
@@ -117,11 +117,12 @@ def Run(options):
# Function : ScanHeaders
# Description : This scans a directory tree looking for header files.
#
-# Arguments : $source_dir - the directory to scan.
-# $section_list - a reference to the hashmap of sections.
+# Arguments : source_dir - the directory to scan.
+# section_list - a reference to the hashmap of sections.
+# seen_headers - set to avoid scanning headers twice
#
-def ScanHeaders(source_dir, section_list, decl_list, get_types, options):
+def ScanHeaders(source_dir, section_list, decl_list, get_types, seen_headers, options):
logging.info('Scanning source directory: %s', source_dir)
# This array holds any subdirectories found.
@@ -134,7 +135,8 @@ def ScanHeaders(source_dir, section_list, decl_list, get_types, options):
if os.path.isdir(fullname):
subdirs.append(file)
elif file.endswith('.h'):
- ScanHeader(fullname, section_list, decl_list, get_types, options)
+ ScanHeader(fullname, section_list, decl_list, get_types,
+ seen_headers, options)
# Now recursively scan the subdirectories.
for dir in subdirs:
@@ -142,7 +144,7 @@ def ScanHeaders(source_dir, section_list, decl_list, get_types, options):
if re.search(matchstr, options.ignore_headers):
continue
ScanHeaders(os.path.join(source_dir, dir), section_list, decl_list,
- get_types, options)
+ get_types, seen_headers, options)
#
@@ -150,51 +152,13 @@ def ScanHeaders(source_dir, section_list, decl_list, get_types, options):
# Description : This scans a header file, looking for declarations of
# functions, macros, typedefs, structs and unions, which it
# outputs to the decl_list.
-# Arguments : $input_file - the header file to scan.
-# $section_list - a map of sections.
-# $decl_list - a list of declarations
+# Arguments : input_file - the header file to scan.
+# section_list - a map of sections.
+# decl_list - a list of declarations
+# seen_headers - set to avoid scanning headers twice
# Returns : it adds declarations to the appropriate list.
#
-
-def ScanHeader(input_file, section_list, decl_list, get_types, options):
- global seen_headers
- slist = [] # Holds the resulting list of declarations.
- title = '' # Holds the title of the section
- in_comment = 0 # True if we are in a comment.
- in_declaration = '' # The type of declaration we are in, e.g.
- # 'function' or 'macro'.
- skip_block = 0 # True if we should skip a block.
- symbol = None # The current symbol being declared.
- decl = '' # Holds the declaration of the current symbol.
- ret_type = None # For functions and function typedefs this
- # holds the function's return type.
- pre_previous_line = '' # The pre-previous line read in - some Gnome
- # functions have the return type on one
- # line, the function name on the next,
- # and the rest of the declaration after.
- previous_line = '' # The previous line read in - some Gnome
- # functions have the return type on one line
- # and the rest of the declaration after.
- first_macro = 1 # Used to try to skip the standard #ifdef XXX
- # define XXX at the start of headers.
- level = None # Used to handle structs/unions which contain
- # nested structs or unions.
- internal = 0 # Set to 1 for internal symbols, we need to
- # fully parse, but don't add them to docs
- forward_decls = {} # Dict of forward declarations, we skip
- # them if we find the real declaration
- # later.
- doc_comments = {} # Dict of doc-comments we found.
- # The key is lowercase symbol name, val=1.
-
- file_basename = None
-
- deprecated_conditional_nest = 0
- ignore_conditional_nest = 0
-
- deprecated = ''
- doc_comment = ''
-
+def ScanHeader(input_file, section_list, decl_list, get_types, seen_headers, options):
# Don't scan headers twice
canonical_input_file = os.path.realpath(input_file)
if canonical_input_file in seen_headers:
@@ -223,7 +187,139 @@ def ScanHeader(input_file, section_list, decl_list, get_types, options):
logging.info('Scanning %s', input_file)
- for line in open(input_file, 'r', encoding='utf-8'):
+ with open(input_file, 'r', encoding='utf-8') as hdr:
+ input_lines = hdr.readlines()
+
+ slist, doc_comments = ScanHeaderContent(input_lines, decl_list, get_types, options)
+
+ logging.info("Scanning %s done", input_file)
+
+ # Try to separate the standard macros and functions, placing them at the
+ # end of the current section, in a subsection named 'Standard'.
+ # do this in a loop to catch object, enums and flags
+ klass = lclass = prefix = lprefix = None
+ standard_decl = []
+ liststr = '\n'.join(s for s in slist if s) + '\n'
+ while True:
+ m = re.search(r'^(\S+)_IS_(\S*)_CLASS\n', liststr, flags=re.MULTILINE)
+ m2 = re.search(r'^(\S+)_IS_(\S*)\n', liststr, flags=re.MULTILINE)
+ m3 = re.search(r'^(\S+?)_(\S*)_get_type\n', liststr, flags=re.MULTILINE)
+ if m:
+ prefix = m.group(1)
+ lprefix = prefix.lower()
+ klass = m.group(2)
+ lclass = klass.lower()
+ logging.info("Found gobject type '%s_%s' from is_class macro", prefix, klass)
+ elif m2:
+ prefix = m2.group(1)
+ lprefix = prefix.lower()
+ klass = m2.group(2)
+ lclass = klass.lower()
+ logging.info("Found gobject type '%s_%s' from is_ macro", prefix, klass)
+ elif m3:
+ lprefix = m3.group(1)
+ prefix = lprefix.upper()
+ lclass = m3.group(2)
+ klass = lclass.upper()
+ logging.info("Found gobject type '%s_%s' from get_type function", prefix, klass)
+ else:
+ break
+
+ cclass = lclass
+ cclass = cclass.replace('_', '')
+ mtype = lprefix + cclass
+
+ liststr, standard_decl = replace_once(liststr, standard_decl, r'^%sPrivate\n' % mtype)
+
+ # We only leave XxYy* in the normal section if they have docs
+ if mtype not in doc_comments:
+ logging.info(" Hide instance docs for %s", mtype)
+ liststr, standard_decl = replace_once(liststr, standard_decl, r'^%s\n' % mtype)
+
+ if mtype + 'class' not in doc_comments:
+ logging.info(" Hide class docs for %s", mtype)
+ liststr, standard_decl = replace_once(liststr, standard_decl, r'^%sClass\n' % mtype)
+
+ if mtype + 'interface' not in doc_comments:
+ logging.info(" Hide iface docs for %s", mtype)
+ liststr, standard_decl = replace_once(liststr, standard_decl, r'%sInterface\n' % mtype)
+
+ if mtype + 'iface' not in doc_comments:
+ logging.info(" Hide iface docs for " + mtype)
+ liststr, standard_decl = replace_once(liststr, standard_decl, r'%sIface\n' % mtype)
+
+ liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_IS_%s\n' % klass)
+ liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_TYPE_%s\n' % klass)
+ liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_get_type\n' % lclass)
+ liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_CLASS\n' % klass)
+ liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_IS_%s_CLASS\n' % klass)
+ liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_GET_CLASS\n' % klass)
+ liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_GET_IFACE\n' % klass)
+ liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_GET_INTERFACE\n' % klass)
+ # We do this one last, otherwise it tends to be caught by the IS_$class macro
+ liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s\n' % klass)
+
+ logging.info('Decl:%s---', liststr)
+ logging.info('Std :%s---', ''.join(sorted(standard_decl)))
+ if len(standard_decl):
+ # sort the symbols
+ liststr += '<SUBSECTION Standard>\n' + ''.join(sorted(standard_decl))
+
+ if liststr != '':
+ if file_basename not in section_list:
+ section_list[file_basename] = ''
+ section_list[file_basename] += "<SECTION>\n<FILE>%s</FILE>\n%s</SECTION>\n\n" % (file_basename,
liststr)
+
+# Scan the the given content lines.
+# Returns: a list of symbols found and a set of symbols for which we have a
+# doc-comment
+
+
+def ScanHeaderContent(input_lines, decl_list, get_types, options):
+ # Holds the resulting list of declarations.
+ slist = []
+ # Holds the title of the section
+ title = ''
+ # True if we are in a comment.
+ in_comment = 0
+ # The type of declaration we are in, e.g. 'function' or 'macro'.
+ in_declaration = ''
+ # True if we should skip a block.
+ skip_block = 0
+ # The current symbol being declared.
+ symbol = None
+ # Holds the declaration of the current symbol.
+ decl = ''
+ # For functions and function typedefs this holds the function's return type.
+ ret_type = None
+ # The pre-previous line read in - some Gnome functions have the return type
+ # on one line, the function name on the next, and the rest of the
+ # declaration after.
+ pre_previous_line = ''
+ # The previous line read in - some Gnome functions have the return type on
+ # one line and the rest of the declaration after.
+ previous_line = ''
+ # Used to try to skip the standard #ifdef XXX #define XXX at the start of
+ # headers.
+ first_macro = 1
+ # Used to handle structs/unions which contain nested structs or unions.
+ level = None
+ # Set to 1 for internal symbols, we need to fully parse, but don't add them
+ # to docs
+ internal = 0
+ # Dict of forward declarations, we skip them if we find the real declaration
+ # later.
+ forward_decls = {}
+ # Dict of doc-comments we found. The key is lowercase symbol name, val=1.
+ doc_comments = {}
+
+ deprecated_conditional_nest = 0
+ ignore_conditional_nest = 0
+
+ deprecated = ''
+ doc_comment = ''
+
+ for line in input_lines:
# If this is a private header, skip it.
if re.search(r'^\s*/\*\s*<\s*private_header\s*>\s*\*/', line):
return
@@ -705,7 +801,7 @@ def ScanHeader(input_file, section_list, decl_list, get_types, options):
# check if this looks like a get_type function and if so remember
if symbol.endswith('_get_type') and 'GType' in ret_type and
re.search(r'^(void|)$', decl):
logging.info(
- "Adding get-type: [%s] [%s] [%s]\tfrom %s", ret_type, symbol, decl,
input_file)
+ "Adding get-type: [%s] [%s] [%s]", ret_type, symbol, decl)
get_types.append(symbol)
else:
internal = 0
@@ -783,84 +879,7 @@ def ScanHeader(input_file, section_list, decl_list, get_types, options):
# add title
slist = [title] + slist
-
- logging.info("Scanning %s done", input_file)
-
- # Try to separate the standard macros and functions, placing them at the
- # end of the current section, in a subsection named 'Standard'.
- # do this in a loop to catch object, enums and flags
- klass = lclass = prefix = lprefix = None
- standard_decl = []
- liststr = '\n'.join(s for s in slist if s) + '\n'
- while True:
- m = re.search(r'^(\S+)_IS_(\S*)_CLASS\n', liststr, flags=re.MULTILINE)
- m2 = re.search(r'^(\S+)_IS_(\S*)\n', liststr, flags=re.MULTILINE)
- m3 = re.search(r'^(\S+?)_(\S*)_get_type\n', liststr, flags=re.MULTILINE)
- if m:
- prefix = m.group(1)
- lprefix = prefix.lower()
- klass = m.group(2)
- lclass = klass.lower()
- logging.info("Found gobject type '%s_%s' from is_class macro", prefix, klass)
- elif m2:
- prefix = m2.group(1)
- lprefix = prefix.lower()
- klass = m2.group(2)
- lclass = klass.lower()
- logging.info("Found gobject type '%s_%s' from is_ macro", prefix, klass)
- elif m3:
- lprefix = m3.group(1)
- prefix = lprefix.upper()
- lclass = m3.group(2)
- klass = lclass.upper()
- logging.info("Found gobject type '%s_%s' from get_type function", prefix, klass)
- else:
- break
-
- cclass = lclass
- cclass = cclass.replace('_', '')
- mtype = lprefix + cclass
-
- liststr, standard_decl = replace_once(liststr, standard_decl, r'^%sPrivate\n' % mtype)
-
- # We only leave XxYy* in the normal section if they have docs
- if mtype not in doc_comments:
- logging.info(" Hide instance docs for %s", mtype)
- liststr, standard_decl = replace_once(liststr, standard_decl, r'^%s\n' % mtype)
-
- if mtype + 'class' not in doc_comments:
- logging.info(" Hide class docs for %s", mtype)
- liststr, standard_decl = replace_once(liststr, standard_decl, r'^%sClass\n' % mtype)
-
- if mtype + 'interface' not in doc_comments:
- logging.info(" Hide iface docs for %s", mtype)
- liststr, standard_decl = replace_once(liststr, standard_decl, r'%sInterface\n' % mtype)
-
- if mtype + 'iface' not in doc_comments:
- logging.info(" Hide iface docs for " + mtype)
- liststr, standard_decl = replace_once(liststr, standard_decl, r'%sIface\n' % mtype)
-
- liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_IS_%s\n' % klass)
- liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_TYPE_%s\n' % klass)
- liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_get_type\n' % lclass)
- liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_CLASS\n' % klass)
- liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_IS_%s_CLASS\n' % klass)
- liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_GET_CLASS\n' % klass)
- liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_GET_IFACE\n' % klass)
- liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_GET_INTERFACE\n' % klass)
- # We do this one last, otherwise it tends to be caught by the IS_$class macro
- liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s\n' % klass)
-
- logging.info('Decl:%s---', liststr)
- logging.info('Std :%s---', ''.join(sorted(standard_decl)))
- if len(standard_decl):
- # sort the symbols
- liststr += '<SUBSECTION Standard>\n' + ''.join(sorted(standard_decl))
-
- if liststr != '':
- if file_basename not in section_list:
- section_list[file_basename] = ''
- section_list[file_basename] += "<SECTION>\n<FILE>%s</FILE>\n%s</SECTION>\n\n" % (file_basename,
liststr)
+ return slist, doc_comments
def replace_once(liststr, standard_decl, regex):
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]