[mm-common] Add doc_postprocess.py and doc_install.py



commit b022a2ea6694b7b7f2116836945f8fb79db3227e
Author: Kjell Ahlstedt <kjellahlstedt gmail com>
Date:   Mon Aug 16 18:35:55 2021 +0200

    Add doc_postprocess.py and doc_install.py
    
    Generating documentation does not require Perl in Meson builds.
    
    util/build_scripts/doc-reference.py calls doc_postprocess.py and
    doc_install.py instead of doc-postprocess.pl and doc-install.pl.
    
    The Perl scripts are kept. They are used in Autotools builds.
    mm-common-prepare still copies them.

 Makefile.am                         |   2 +
 README                              |   2 +
 meson.build                         |   2 +
 skeletonmm/meson.build              |  15 +-
 skeletonmm/untracked/README         |   4 +-
 util/build_scripts/doc-reference.py |  67 ++++----
 util/doc_install.py                 | 317 ++++++++++++++++++++++++++++++++++++
 util/doc_postprocess.py             | 130 +++++++++++++++
 util/mm-common-get.1.in             |   4 +-
 util/mm-common-get.in               |   2 +-
 10 files changed, 509 insertions(+), 36 deletions(-)
---
diff --git a/Makefile.am b/Makefile.am
index c7511bd..c3182b4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -57,7 +57,9 @@ dist_aclocal_macro_DATA =             \
 doctooldir = $(pkgdatadir)/doctool
 dist_doctool_DATA =                    \
        util/doc-install.pl             \
+       util/doc_install.py             \
        util/doc-postprocess.pl         \
+       util/doc_postprocess.py         \
        util/doxygen.css                \
        util/doxygen-extra.css          \
        util/tagfile-to-devhelp2.xsl
diff --git a/README b/README
index 0cf0f24..326a314 100644
--- a/README
+++ b/README
@@ -242,6 +242,7 @@ that it should copy the documentation utilities into the project's source tree.
 Otherwise the files installed with mm-common will be used automatically.
 
 util/doc-postprocess.pl:
+util/doc_postprocess.py:
   A simple script to post-process the HTML files generated by Doxygen.
   It replaces various code constructs that do not match the coding style
   used throughout the C++ bindings.  For instance, it rewrites function
@@ -249,6 +250,7 @@ util/doc-postprocess.pl:
   instead of the name of the argument.
 
 util/doc-install.pl:
+util/doc_install.py:
   A replacement for the installdox script generated by Doxygen.  Its
   purpose is to translate references to external documentation at the
   time the documentation is installed.  This step is necessary because
diff --git a/meson.build b/meson.build
index b19da8b..71381be 100644
--- a/meson.build
+++ b/meson.build
@@ -126,7 +126,9 @@ endif
 # into projects at Meson setup or configure time.
 doctool_basefiles = [
   'doc-install.pl',
+  'doc_install.py',
   'doc-postprocess.pl',
+  'doc_postprocess.py',
   'doxygen.css',
   'doxygen-extra.css',
   'tagfile-to-devhelp2.xsl',
diff --git a/skeletonmm/meson.build b/skeletonmm/meson.build
index 770ff11..003697a 100644
--- a/skeletonmm/meson.build
+++ b/skeletonmm/meson.build
@@ -114,7 +114,6 @@ if maintainer_mode and not mm_common_get.found()
   mm_common_get = find_program('mm-common-get', required: true)
 endif
 m4 = find_program('m4', required: maintainer_mode) # Used by gmmproc (in glibmm)
-perl = find_program('perl', required: maintainer_mode or build_documentation)
 doxygen = find_program('doxygen', required: build_documentation)
 dot = find_program('dot', required: build_documentation) # Used by Doxygen
 xsltproc = find_program('xsltproc', required: build_documentation)
@@ -146,6 +145,20 @@ sys.exit(os.path.isfile("@0@"))
   endif
 endif
 
+# Check if perl is required and available.
+# Done now, when the doc_reference script is available.
+doc_perl_prop = run_command(
+  python3, doc_reference, 'get_script_property',
+  '', # MMDOCTOOLDIR is not used
+  'requires_perl')
+doc_requires_perl = true
+if doc_perl_prop.returncode() == 0 and doc_perl_prop.stdout() == 'false'
+  doc_requires_perl = false
+endif
+
+perl = find_program('perl', required: maintainer_mode or \
+  (build_documentation and doc_requires_perl))
+
 cpp_compiler = meson.get_compiler('cpp')
 
 # Set compiler warnings.
diff --git a/skeletonmm/untracked/README b/skeletonmm/untracked/README
index b51c2cb..66d4040 100644
--- a/skeletonmm/untracked/README
+++ b/skeletonmm/untracked/README
@@ -15,8 +15,8 @@ or the tarball is created with Meson.
 
 1. Files copied by mm-common-get
 --------------------------------
-untracked/doc/doc-install.pl
-              doc-postprocess.pl
+untracked/doc/doc_install.py
+              doc_postprocess.py
               doxygen-extra.css
               tagfile-to-devhelp2.xsl
 untracked/build_scripts/check-dllexport-usage.py
diff --git a/util/build_scripts/doc-reference.py b/util/build_scripts/doc-reference.py
index 9769688..29cab62 100755
--- a/util/build_scripts/doc-reference.py
+++ b/util/build_scripts/doc-reference.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# External command, intended to be called with custom_target(),
+# External command, intended to be called with run_command(), custom_target(),
 # meson.add_install_script() or meson.add_dist_script() in meson.build.
 
 #                     argv[1]      argv[2]     argv[3:]
@@ -26,6 +26,10 @@ def doxygen():
   doxytagfile = sys.argv[3]
   doc_outdir = os.path.dirname(doxytagfile)
 
+  # Search for doc_postprocess.py first in MMDOCTOOLDIR.
+  sys.path.insert(0, MMDOCTOOLDIR)
+  from doc_postprocess import doc_postprocess
+
   # Export this variable for use in the Doxygen configuration file.
   child_env = os.environ.copy()
   child_env['MMDOCTOOLDIR'] = MMDOCTOOLDIR
@@ -51,13 +55,7 @@ def doxygen():
   if result.returncode:
     return result.returncode
 
-  cmd = [
-    'perl',
-    '--',
-    os.path.join(MMDOCTOOLDIR, 'doc-postprocess.pl'),
-    os.path.join(doc_outdir, 'html', '*.html'),
-  ]
-  return subprocess.run(cmd).returncode
+  return doc_postprocess(os.path.join(doc_outdir, 'html', '*.html'))
 
 # Invoked from custom_target() in meson.build.
 def devhelp():
@@ -96,6 +94,10 @@ def install_doc():
   prefix_htmlrefdir = os.path.join(os.getenv('MESON_INSTALL_PREFIX'), sys.argv[5])
   build_dir = os.path.dirname(devhelpfile)
 
+  # Search for doc_install.py first in MMDOCTOOLDIR.
+  sys.path.insert(0, MMDOCTOOLDIR)
+  from doc_install import doc_install_cmdargs, doc_install_funcargs
+
   # Create the installation directories, if they do not exist.
   os.makedirs(destdir_htmlrefdir, exist_ok=True)
   os.makedirs(destdir_devhelpdir, exist_ok=True)
@@ -105,37 +107,28 @@ def install_doc():
     verbose = ['--verbose']
 
   # Install html files.
-  cmd = [
-    'perl',
-    '--',
-    os.path.join(MMDOCTOOLDIR, 'doc-install.pl'),
-    '--mode=0644',
+  cmdargs = [
+    '--mode=0o644',
   ] + verbose + sys.argv[6:] + [
     '-t', destdir_htmlrefdir,
     '--glob',
     '--',
     os.path.join(build_dir, 'html', '*'),
   ]
-  result1 = subprocess.run(cmd)
+  result1 = doc_install_cmdargs(cmdargs)
 
   # Install the Devhelp file.
   # rstrip('/') means remove trailing /, if any.
-  cmd = [
-    'perl',
-    '--',
-    os.path.join(MMDOCTOOLDIR, 'doc-install.pl'),
-    '--mode=0644',
-  ] + verbose + [
-    '--book-base=' + prefix_htmlrefdir.rstrip('/'),
-    '-t', destdir_devhelpdir,
-    '--',
-    devhelpfile,
-  ]
-  result2 = subprocess.run(cmd)
+  result2 = doc_install_funcargs(
+    sources=[devhelpfile],
+    target=destdir_devhelpdir,
+    target_is_dir=True,
+    mode=0o644,
+    verbose=bool(verbose),
+    book_base=prefix_htmlrefdir.rstrip('/'),
+  )
 
-  if result1.returncode:
-    return result1.returncode
-  return result2.returncode
+  return max(result1, result2)
 
 # Invoked from meson.add_dist_script().
 def dist_doc():
@@ -158,7 +151,7 @@ def dist_doc():
 
   # Distribute files that mm-common-get has copied to MMDOCTOOLDIR.
   # shutil.copy() does not copy timestamps.
-  for file in ['doc-install.pl', 'doc-postprocess.pl', 'doxygen-extra.css', 'tagfile-to-devhelp2.xsl']:
+  for file in ['doc_install.py', 'doc_postprocess.py', 'doxygen-extra.css', 'tagfile-to-devhelp2.xsl']:
     shutil.copy(os.path.join(MMDOCTOOLDIR, file), doctool_dist_dir)
 
   # Distribute built files: tag file, devhelp file, html files.
@@ -169,6 +162,18 @@ def dist_doc():
                   copy_function=shutil.copy)
   return 0
 
+# Invoked from run_command() in meson.build.
+def get_script_property():
+  #  argv[3]
+  # <property>
+  # argv[2] (MMDOCTOOLDIR) is not used.
+  prop = sys.argv[3]
+  if prop == 'requires_perl':
+    print('false', end='') # stdout can be read in the meson.build file.
+    return 0
+  print(sys.argv[0], ': unknown property,', prop)
+  return 1
+
 # ----- Main -----
 if subcommand == 'doxygen':
   sys.exit(doxygen())
@@ -178,5 +183,7 @@ if subcommand == 'install_doc':
   sys.exit(install_doc())
 if subcommand == 'dist_doc':
   sys.exit(dist_doc())
+if subcommand == 'get_script_property':
+  sys.exit(get_script_property())
 print(sys.argv[0], ': illegal subcommand,', subcommand)
 sys.exit(1)
diff --git a/util/doc_install.py b/util/doc_install.py
new file mode 100755
index 0000000..c1c2922
--- /dev/null
+++ b/util/doc_install.py
@@ -0,0 +1,317 @@
+#!/usr/bin/env python3
+
+# doc_install.py [OPTION]... [-T] SOURCE DEST
+# doc_install.py [OPTION]... SOURCE... DIRECTORY
+# doc_install.py [OPTION]... -t DIRECTORY SOURCE...
+
+# Copy SOURCE to DEST or multiple SOURCE files to the existing DIRECTORY,
+# while setting permission modes. For HTML files, translate references to
+# external documentation.
+
+# Mandatory arguments to long options are mandatory for short options, too.
+#       --book-base=BASEPATH          use reference BASEPATH for Devhelp book
+#   -l, --tag-base=TAGFILE\@BASEPATH   use BASEPATH for references from TAGFILE (Doxygen <= 1.8.15)
+#   -l, --tag-base=s\@BASEPUB\@BASEPATH substitute BASEPATH for BASEPUB (Doxygen >= 1.8.16)
+#   -m, --mode=MODE                   override file permission MODE (octal)
+#   -t, --target-directory=DIRECTORY  copy all SOURCE arguments into DIRECTORY
+#   -T, --no-target-directory         treat DEST as normal file
+#       --glob                        expand SOURCE as filename glob pattern
+#   -v, --verbose                     enable informational messages
+#   -h, --help                        display help and exit
+
+import os
+import sys
+import re
+import glob
+
+# Globals
+g_verbose = False
+tags_dict = {}
+subst_dict = {}
+perm_mode = 0o644
+g_book_base = None
+html_doxygen_count = 0
+
+message_prefix = os.path.basename(__file__) + ':'
+
+# The installed files are read and written in binary mode.
+# All regular expressions and replacement strings must be bytes objects.
+html_start_pattern = re.compile(rb'\s*(?:<[?!][^<]+)*<html[>\s]')
+html_split1_pattern = re.compile(rb'''
+  \bdoxygen="([^:"]+):([^"]*)"  # doxygen="(TAGFILE):(BASEPATH)"
+  \s+((?:href|src)=")\2([^"]*") # (href="|src=")BASEPATH(RELPATH")
+  ''', re.VERBOSE)
+html_split2_pattern = re.compile(rb'''
+  \b((?:href|src)=")([^"]+") # (href="|src=")(BASEPUB RELPATH")
+  ''', re.VERBOSE)
+
+devhelp_start_pattern = re.compile(rb'\s*(?:<[?!][^<]+)*<book\s')
+devhelp_subst_pattern = re.compile(rb'(<book\s+[^<>]*?\bbase=")[^"]*(?=")')
+
+def notice(*msg):
+  if g_verbose:
+    print(message_prefix, ''.join(msg))
+
+def error(*msg):
+  print(message_prefix, 'Error:', ''.join(msg), file=sys.stderr)
+  raise RuntimeError(''.join(msg))
+
+def html_split1_func(group1, group2):
+  global html_doxygen_count
+  if group1 in tags_dict:
+    html_doxygen_count += 1
+    return tags_dict[group1]
+  return group2
+
+def html_split2_func(group2):
+  for key in subst_dict:
+    # Don't use regular expressions here. key may contain characters
+    # that are special in regular expressions.
+    if group2.startswith(key):
+      return subst_dict[key] + group2[len(key):]
+  return None
+
+def install_file(in_name, out_name):
+  '''
+  Copy file to destination while translating references on the fly.
+  '''
+  global html_doxygen_count
+
+  # Some installed files are binary (e.g. .png).
+  # Read and write all files in binary mode, thus avoiding decoding/encoding errors.
+  in_basename = os.path.basename(in_name)
+  with open(in_name, mode='rb') as in_file:
+    # Read the whole file into a string buffer.
+    buf = in_file.read()
+
+  if (tags_dict or subst_dict) and html_start_pattern.match(buf):
+    # Probably an html file. Modify it, if appropriate.
+    #
+    # It would be possible to modify with a call to Pattern.sub() or Pattern.subn()
+    # and let a function calculate the replacement string. Example:
+    # (buf, number_of_subs) = html_split2_pattern.subn(html_subst2_func, buf)
+    # A previous Perl script does just that. However, calling a function from
+    # sub() or subn() is a slow operation. Installing doc files for a typical
+    # module such as glibmm or gtkmm takes about 8 times as long as with the
+    # present split+join solution. (Measured with python 3.9.5)
+    html_doxygen_count = 0
+    number_of_subs = 0
+    change = 'no'
+    if tags_dict and b'doxygen="' in buf:
+      # Doxygen 1.8.15 and earlier stores the tag file name and BASEPATH in the html files.
+      split_buf = html_split1_pattern.split(buf)
+      for i in range(0, len(split_buf)-4, 5):
+        basepath = html_split1_func(split_buf[i+1], split_buf[i+2])
+        split_buf[i+1] = b''
+        split_buf[i+2] = b''
+        split_buf[i+3] += basepath
+      number_of_subs = len(split_buf) // 5
+      if number_of_subs > 0:
+        buf = b''.join(split_buf)
+        change = 'rewrote ' + str(html_doxygen_count) + ' of ' + str(number_of_subs)
+
+    if number_of_subs == 0 and subst_dict:
+      # Doxygen 1.8.16 and later does not store the tag file name and BASEPATH in the html files.
+      # The previous html_split1_pattern.split() won't find anything to substitute.
+      split_buf = html_split2_pattern.split(buf)
+      for i in range(2, len(split_buf), 3):
+        basepath = html_split2_func(split_buf[i])
+        if basepath:
+          split_buf[i] = basepath
+          html_doxygen_count += 1
+      number_of_subs = len(split_buf) // 3
+      if html_doxygen_count > 0:
+        buf = b''.join(split_buf)
+      if number_of_subs > 0:
+        change = 'rewrote ' + str(html_doxygen_count)
+    notice('Translating ', in_basename, ' (', change, ' references)')
+
+  elif g_book_base and devhelp_start_pattern.match(buf):
+    # Probably a devhelp file.
+    # Substitute new value for attribute "base" of element <book>.
+    (buf, number_of_subs) = devhelp_subst_pattern.subn(rb'\1' + g_book_base, buf, 1)
+    change = 'rewrote base path' if number_of_subs else 'base path not set'
+    notice('Translating ', in_basename, ' (', change, ')')
+  else:
+    # A file that shall not be modified.
+    notice('Copying ', in_basename)
+
+  with open(out_name, mode='wb') as out_file:
+    # Write the whole buffer into the target file.
+    out_file.write(buf)
+
+  os.chmod(out_name, perm_mode)
+
+def split_key_value(mapping):
+  '''
+  Split TAGFILE@BASEPATH or s@BASEPUB@BASEPATH argument into key/value pair
+  '''
+  (name, path) = mapping.split('@', 1)
+  if name != 's': # Doxygen 1.8.15 and earlier
+    if not name:
+      error('Invalid base path mapping: ', mapping)
+    if path != None:
+      return (name, path, False)
+    notice('Not changing base path for tag file ', name);
+
+  else: # name=='s', Doxygen 1.8.16 and later
+    (name, path) = path.split('@', 1)
+    if not name:
+      error('Invalid base path mapping: ', mapping)
+    if path != None:
+      return (name, path, True)
+    notice('Not changing base path for ', name);
+
+  return (None, None, None)
+
+def string_to_bytes(s):
+  if isinstance(s, str):
+    return s.encode('utf-8')
+  return s # E.g. None
+
+def make_dicts(tags):
+  global tags_dict, subst_dict
+
+  tags_dict = {}
+  subst_dict = {}
+  if not tags:
+    return
+
+  for tag in tags:
+    (name, path, subst) = split_key_value(tag)
+    if subst == None:
+      continue
+    # Translate a local absolute path to URI.
+    path = path.replace('\\', '/').replace(' ', '%20')
+    if path.startswith('/'):
+      path = 'file://' + path
+    path = re.sub(r'^([A-Za-z]:/)', r'file:///\1', path, count=1) # Windows: C:/path
+    if not path.endswith('/'):
+      path += '/'
+    if subst:
+      notice('Using base path ', path, ' for ', name)
+      subst_dict[string_to_bytes(name)] = string_to_bytes(path)
+    else:
+      notice('Using base path ', path, ' for tag file ', name)
+      tags_dict[string_to_bytes(name)] = string_to_bytes(path)
+
+def doc_install_funcargs(sources=[], target=None, book_base=None, tags=[],
+  mode=0o644, target_is_dir=True, expand_glob=False, verbose=False):
+  '''
+  Copy source files to target files or target directory.
+  '''
+  global g_verbose, perm_mode, g_book_base
+
+  g_verbose = verbose
+  perm_mode = mode
+  make_dicts(tags)
+  g_book_base = string_to_bytes(book_base)
+
+  if not target:
+    error('Target file or directory required.')
+  if book_base:
+    notice('Using base path ', book_base, ' for Devhelp book')
+
+  if not target_is_dir:
+    if expand_glob:
+      error('Filename globbing requires target directory.')
+    if len(sources) != 1:
+      error('Only one source file allowed when target is a filename.')
+
+    install_file(sources[0], target)
+    return 0
+
+  if expand_glob:
+    expanded_sources = []
+    for source in sources:
+      expanded_sources += glob.glob(source)
+    sources = expanded_sources
+
+  basename_set = set()
+  for source in sources:
+    basename = os.path.basename(source)
+
+    # If there are multiple files with the same base name in the list, only
+    # the first one will be installed. This behavior makes it very easy to
+    # implement a VPATH search for each individual file.
+    if basename not in basename_set:
+      basename_set.add(basename)
+      out_name = os.path.join(target, basename)
+      install_file(source, out_name)
+  return 0
+
+def doc_install_cmdargs(args=None):
+  '''
+  Parse command line parameters, or a sequence of strings equal to
+  command line parameters. Then copy source files to target file or
+  target directory.
+  '''
+  import argparse
+
+  parser = argparse.ArgumentParser(
+    formatter_class=argparse.RawTextHelpFormatter,
+    prog=os.path.basename(__file__),
+    usage='''
+      %(prog)s [OPTION]... [-T] SOURCE DEST
+      %(prog)s [OPTION]... SOURCE... DIRECTORY
+      %(prog)s [OPTION]... -t DIRECTORY SOURCE...''',
+    description='''
+      Copy SOURCE to DEST or multiple SOURCE files to the existing DIRECTORY,
+      while setting permission modes. For HTML files, translate references to
+      external documentation.'''
+  )
+  parser.add_argument('--book-base', dest='book_base', metavar='BASEPATH',
+    help='use reference BASEPATH for Devhelp book')
+  parser.add_argument('-l', '--tag-base', action='append', dest='tags', metavar='SUBST',
+    help='''TAGFILE@BASEPATH   use BASEPATH for references from TAGFILE (Doxygen <= 1.8.15)
+s@BASEPUB@BASEPATH substitute BASEPATH for BASEPUB (Doxygen >= 1.8.16)'''
+  )
+  parser.add_argument('-m', '--mode', dest='mode', metavar='MODE', default='0o644',
+    help='override file permission MODE (octal)')
+
+  group = parser.add_mutually_exclusive_group()
+  group.add_argument('-t', '--target-directory', dest='target_dir', metavar='DIRECTORY',
+    help='copy all SOURCE arguments into DIRECTORY')
+  group.add_argument('-T', '--no-target-directory', action='store_false', dest='target_is_dir',
+    help='treat DEST as normal file')
+
+  parser.add_argument('--glob', action='store_true', dest='expand_glob',
+    help='expand SOURCE as filename glob pattern')
+  parser.add_argument('-v', '--verbose', action='store_true', dest='verbose',
+    help='enable informational messages')
+  parser.add_argument('source_dest', nargs='+',
+    help='''SOURCE DEST
+SOURCE... DIRECTORY
+SOURCE...'''
+  )
+  parsed_args = parser.parse_args(args)
+
+  if not parsed_args.target_is_dir:
+    if len(parsed_args.source_dest) != 2:
+      error('Source and destination filenames expected.')
+    sources = [parsed_args.source_dest[0]]
+    target = parsed_args.source_dest[1]
+  else:
+    target = parsed_args.target_dir
+    if not target:
+      if len(parsed_args.source_dest) < 2:
+        error('At least one source file and destination directory expected.')
+      target = parsed_args.source_dest[-1]
+      sources = parsed_args.source_dest[0:-1]
+    else:
+      sources = parsed_args.source_dest
+
+  return doc_install_funcargs(
+    sources=sources,
+    target=target,
+    book_base=parsed_args.book_base,
+    tags=parsed_args.tags,
+    mode=int(parsed_args.mode, base=8),
+    target_is_dir=parsed_args.target_is_dir,
+    expand_glob=parsed_args.expand_glob,
+    verbose=parsed_args.verbose
+  )
+
+# ----- Main -----
+if __name__ == '__main__':
+  sys.exit(doc_install_cmdargs())
diff --git a/util/doc_postprocess.py b/util/doc_postprocess.py
new file mode 100755
index 0000000..35be171
--- /dev/null
+++ b/util/doc_postprocess.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+
+# doc_postprocess.py [-h|--help] <pattern>...
+
+# Post-process the Doxygen-generated HTML files matching pattern.
+
+import os
+import sys
+import re
+import glob
+
+# Substitutions with regular expressions are somewhat slow in Python 3.9.5.
+# Use str.replace() rather than re.sub() where possible.
+
+# [search string, compiled regular expression or None, substitution string, count]
+class_el_patterns = [
+  # return value
+  [ ' &amp;&nbsp;', re.compile(r' &amp;&nbsp; *'), '&amp;&#160;', 1],
+  [ ' *&nbsp;', re.compile(r' \*&nbsp; *'), '*&#160;', 1],
+  # parameters
+  [ ' &amp;', None, '&amp;', 0],
+  [ '&amp;', re.compile(r'&amp;\b'), '&amp; ', 0],
+  [ ' *', None, '*', 0],
+  [ '*', re.compile(r'\*\b'), '* ', 0],
+  # templates
+  [ 'template&lt;', re.compile(r'\btemplate&lt;'), 'template &lt;', 1]
+]
+
+class_md_patterns = [
+  # left parenthesis
+  [ '(&nbsp;', re.compile(r'\(&nbsp; *'), '(', 1],
+  # return value
+  [ ' &amp; ', None, '&amp; ', 0],
+  [ ' * ', None, '* ', 0],
+  # parameters
+  [ ' &amp;&nbsp;', re.compile(r' &amp;&nbsp; *'), '&amp;&#160;', 0],
+  [ ' *&nbsp;', re.compile(r' \*&nbsp; *'), '*&#160;', 0],
+  # templates
+  [ 'template&lt;', re.compile(r'\btemplate&lt;'), 'template &lt;', 1]
+]
+
+else_patterns = [
+  # template decls
+  [ 'template&lt;', re.compile(r'^(<h\d>|)template&lt;'), '\\1template &lt;', 1]
+]
+
+all_lines_patterns = [
+  # For some reason, some versions of Doxygen output the full path to
+  # referenced tag files. This is bad since it breaks doc_install.py,
+  # and also because it leaks local path names into source tarballs.
+  # Thus, strip the directory prefix here.
+  [ ' doxygen="', re.compile(r' doxygen="[^":]*/([^":]+\.tag):'), ' doxygen="\\1:', 0],
+
+  [ '&copy;', None, '&#169;', 0],
+  [ '&mdash;', None, '&#8212;', 0],
+  [ '&ndash;', None, '&#8211;', 0],
+  [ '&nbsp;', re.compile(r' *&nbsp; *'), '&#160;', 0]
+]
+
+def doc_postprocess(patterns):
+  if not (isinstance(patterns, list) or isinstance(patterns, tuple)):
+    patterns = [] if patterns == None else [patterns]
+
+  filepaths = []
+  for pattern in patterns:
+    filepaths += glob.glob(pattern)
+
+  for filepath in filepaths:
+    # Assume that the file is UTF-8 encoded.
+    # If illegal UTF-8 bytes in the range 0x80..0xff are encountered, they are
+    # replaced by Unicode Private Use characters in the range 0xdc80..0xdcff
+    # and restored to their original values when the file is rewritten.
+    with open(filepath, mode='r', encoding='utf-8', errors='surrogateescape') as file:
+      # Read the whole file into a buffer, a list with one line per element.
+      buf = file.readlines()
+
+    for line_number in range(len(buf)):
+      line = buf[line_number]
+
+      # Substitute
+      if '<a class="el"' in line:
+        for subst in class_el_patterns:
+          if subst[0] in line:
+            if subst[1]:
+              line = subst[1].sub(subst[2], line, count=subst[3])
+            else:
+              line = line.replace(subst[0], subst[2], subst[3])
+
+      elif ('<td class="md"' in line) or ('<td class="mdname"' in line):
+        for subst in class_md_patterns:
+          if subst[0] in line:
+            if subst[1]:
+              line = subst[1].sub(subst[2], line, count=subst[3])
+            else:
+              line = line.replace(subst[0], subst[2], subst[3])
+
+      else:
+        for subst in else_patterns:
+          if subst[0] in line:
+            if subst[1]:
+              line = subst[1].sub(subst[2], line, count=subst[3])
+            else:
+              line = line.replace(subst[0], subst[2], subst[3])
+
+      for subst in all_lines_patterns:
+        if subst[0] in line:
+          if subst[1]:
+            line = subst[1].sub(subst[2], line, count=subst[3])
+          else:
+            line = line.replace(subst[0], subst[2], subst[3])
+
+      buf[line_number] = line
+
+    with open(filepath, mode='w', encoding='utf-8', errors='surrogateescape') as file:
+      # Write the whole buffer back into the file.
+      file.writelines(buf)
+
+  return 0
+
+# ----- Main -----
+if __name__ == '__main__':
+  import argparse
+
+  parser = argparse.ArgumentParser(
+    description='Post-process the Doxygen-generated HTML files matching pattern.')
+  parser.add_argument('patterns', nargs='*', metavar='pattern', help='filename pattern')
+  args = parser.parse_args()
+  print(args.patterns)
+
+  sys.exit(doc_postprocess(args.patterns))
diff --git a/util/mm-common-get.1.in b/util/mm-common-get.1.in
index 951f7ea..392b022 100644
--- a/util/mm-common-get.1.in
+++ b/util/mm-common-get.1.in
@@ -44,9 +44,9 @@ are listed below.
 .BI "Documentation utilities copied to " DOCTOOL-DIR :
 .PD 0
 .IP
-.I doc-install.pl
+.I doc_install.py
 .IP
-.I doc-postprocess.pl
+.I doc_postprocess.py
 .IP
 .I doxygen.css
 .IP
diff --git a/util/mm-common-get.in b/util/mm-common-get.in
index 8a21f02..e843dd8 100644
--- a/util/mm-common-get.in
+++ b/util/mm-common-get.in
@@ -53,7 +53,7 @@ for file in ['check-dllexport-usage.py', 'dist-build-scripts.py', 'dist-changelo
 
 print(progname + ': putting documentation utilities in ' + doctooldir)
 os.makedirs(doctooldir, exist_ok=True)
-for file in ['doc-install.pl', 'doc-postprocess.pl',
+for file in ['doc_install.py', 'doc_postprocess.py',
              'doxygen.css', 'doxygen-extra.css', 'tagfile-to-devhelp2.xsl']:
   src_file = os.path.join(pkgdatadir, 'doctool', file)
   dest_file = os.path.join(doctooldir, file)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]