[gtk-doc] highlight: also use pygments for highlighting in fixxref



commit 232698c6d21ad263e1d0eced3281e2001d21fada
Author: Stefan Sauer <ensonic users sf net>
Date:   Sun Apr 14 16:06:03 2019 +0200

    highlight: also use pygments for highlighting in fixxref
    
    Extract the pygments highlighter from mkhtml2 and also use it in fixxref. This
    lets us drop the relevant configure options and avoid shelling out to external
    tools. This has been a frequent source of issue reports.
    
    Fixes #78

 Makefile.am         |   1 +
 README              |   1 +
 configure.ac        |  63 ----------------------
 gtkdoc/config.py.in |   2 -
 gtkdoc/fixxref.py   | 149 +++++++++-------------------------------------------
 gtkdoc/highlight.py |  49 +++++++++++++++++
 gtkdoc/mkhtml2.py   |  25 ++-------
 requirements.txt    |   1 +
 style/style.css     |  20 +------
 9 files changed, 82 insertions(+), 229 deletions(-)
---
diff --git a/Makefile.am b/Makefile.am
index debfdfa..134b42a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -38,6 +38,7 @@ pylibdata_DATA = \
   gtkdoc/common.py \
   gtkdoc/config.py \
   gtkdoc/fixxref.py \
+  gtkdoc/highlight.py \
   gtkdoc/md_to_db.py \
   gtkdoc/mkdb.py \
   gtkdoc/mkhtml.py \
diff --git a/README b/README
index 7ff045c..b67ff6c 100644
--- a/README
+++ b/README
@@ -31,6 +31,7 @@ Python 3.x
 Additional python modules:
     For the tests: unittest, parameterized
     For mkhtml2 (experimental): anytree, lxml and pygments
+    For fixxref: pygments
 
 For XML output (recommended):
 
diff --git a/configure.ac b/configure.ac
index 1503ca4..97608d2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -57,66 +57,6 @@ dnl check for DocBook DTD and stylesheets in the local catalog.
 JH_CHECK_XML_CATALOG([-//OASIS//DTD DocBook XML V4.3//EN], [DocBook XML DTD V4.3])
 JH_CHECK_XML_CATALOG([http://docbook.sourceforge.net/release/xsl/current/html/chunk.xsl], [DocBook XSL 
Stylesheets])
 
-dnl
-dnl Check for syntax highlighters
-dnl
-AC_ARG_WITH([highlight],
-       AS_HELP_STRING([--with-highlight], [Select source code syntax highlighter 
(no|source-highlight|highlight|vim|auto)]),
-       , [with_highlight=auto])
-
-case $with_highlight in
-       no|source-highlight|highlight|vim|auto) ;;
-       *) AC_MSG_ERROR([Invalid value for syntax highlighting option.]) ;;
-esac
-
-HIGHLIGHT_OPTIONS=""
-if test "$with_highlight" = "auto"; then
-       AC_PATH_PROG([HIGHLIGHT], [source-highlight])
-       if test -n "$HIGHLIGHT"; then
-               HIGHLIGHT_OPTIONS="-t4 -s\$SRC_LANG -cstyle.css --no-doc -i"
-       else
-               AC_PATH_PROG([HIGHLIGHT], [highlight])
-               if test -n "$HIGHLIGHT"; then
-                       HIGHLIGHT_OPTIONS="--syntax=\$SRC_LANG --out-format=xhtml -f --class-name=gtkdoc "
-               else
-                       AC_PATH_PROG([HIGHLIGHT], [vim])
-                       if test -n "$HIGHLIGHT"; then
-                               dnl vim is useless if it does not support syntax highlighting
-                               AC_MSG_CHECKING([whether vim has +syntax feature])
-                               if $HIGHLIGHT --version | grep '+syntax' >/dev/null; then
-                                       AC_MSG_RESULT([yes])
-                               else
-                                       AC_MSG_RESULT([no])
-                                       HIGHLIGHT=
-                               fi
-                       fi
-               fi
-       fi
-else
-       if test "$with_highlight" != "no"; then
-               AC_PATH_PROG([HIGHLIGHT], [$with_highlight], [no])
-       fi
-
-       case $with_highlight in
-               source-highlight) HIGHLIGHT_OPTIONS="-t4 -s\$SRC_LANG -cstyle.css --no-doc -i";;
-               highlight) HIGHLIGHT_OPTIONS="--syntax=\$SRC_LANG --out-format=xhtml -f --class-name=gtkdoc 
";;
-               vim)
-                       AC_MSG_CHECKING([whether vim has +syntax feature])
-                       if $HIGHLIGHT --version | grep '+syntax' >/dev/null; then
-                               AC_MSG_RESULT([yes])
-                       else
-                               AC_MSG_RESULT([no])
-                               HIGHLIGHT=no
-                       fi
-               ;;
-       esac
-
-       if test "$HIGHLIGHT" = "no" && test "$with_highlight" != "no"; then
-               AC_MSG_ERROR([Could not find requested syntax highlighter])
-       fi
-fi
-AC_SUBST([HIGHLIGHT_OPTIONS])
-
 dnl
 dnl Set runtime package dirs so we can find the script containing common routines.
 dnl
@@ -265,9 +205,6 @@ gtk-doc was configured with the following options:
 test -n "$DBLATEX$FOP" \
     && AC_MSG_NOTICE([** PDF support enabled, using $DBLATEX$FOP]) \
     || AC_MSG_NOTICE([   PDF support disabled, no dblatex or fop available])
-test -n "$HIGHLIGHT" \
-    && AC_MSG_NOTICE([** Syntax highlighting of examples enabled, using $HIGHLIGHT]) \
-    || AC_MSG_NOTICE([   Syntax highlighting of examples disabled])
 test "x$build_tests" != "xno" \
     && AC_MSG_NOTICE([** Building regression tests]) \
     || AC_MSG_NOTICE([   Skipping regression tests])
diff --git a/gtkdoc/config.py.in b/gtkdoc/config.py.in
index 472c7df..1264374 100644
--- a/gtkdoc/config.py.in
+++ b/gtkdoc/config.py.in
@@ -3,8 +3,6 @@ version = "@VERSION@"
 # tools
 dblatex = '@DBLATEX@'
 fop = '@FOP@'
-highlight = '@HIGHLIGHT@'
-highlight_options = '@HIGHLIGHT_OPTIONS@'
 pkg_config = '@PKG_CONFIG@'
 xsltproc = '@XSLTPROC@'
 
diff --git a/gtkdoc/fixxref.py b/gtkdoc/fixxref.py
index 21ee962..356189c 100755
--- a/gtkdoc/fixxref.py
+++ b/gtkdoc/fixxref.py
@@ -24,12 +24,8 @@
 import logging
 import os
 import re
-import shlex
-import subprocess
-import sys
-import tempfile
 
-from . import common, config
+from . import common, highlight
 
 # This contains all the entities and their relative URLs.
 Links = {}
@@ -55,6 +51,7 @@ def Run(options):
     LoadIndicies(options.module_dir, options.html_dir, options.extra_dir)
     ReadSections(options.module)
     FixCrossReferences(options.module_dir, options.module, options.src_lang)
+    highlight.append_style_defs(os.path.join(options.module_dir, 'style.css'))
 
 
 # TODO(ensonic): try to refactor so that we get a list of path's and then just
@@ -227,33 +224,24 @@ def FixHTMLFile(src_lang, module, file):
 
     content = open(file, 'r', encoding='utf-8').read()
 
-    if config.highlight:
-        # FIXME: ideally we'd pass a clue about the example language to the highligher
-        # unfortunately the "language" attribute is not appearing in the html output
-        # we could patch the customization to have <code class="xxx"> inside of <pre>
-        if config.highlight.endswith('vim'):
-            def repl_func(m):
-                return HighlightSourceVim(src_lang, m.group(1), m.group(2))
-            content = re.sub(
-                r'<div class=\"(example-contents|informalexample)\"><pre 
class=\"programlisting\">(.*?)</pre></div>',
-                repl_func, content, flags=re.DOTALL)
-        else:
-            def repl_func(m):
-                return HighlightSource(src_lang, m.group(1), m.group(2))
-            content = re.sub(
-                r'<div class=\"(example-contents|informalexample)\"><pre 
class=\"programlisting\">(.*?)</pre></div>',
-                repl_func, content, flags=re.DOTALL)
-
-        content = re.sub(r'\&lt;GTKDOCLINK\s+HREF=\&quot;(.*?)\&quot;\&gt;(.*?)\&lt;/GTKDOCLINK\&gt;',
-                         r'\<GTKDOCLINK\ HREF=\"\1\"\>\2\</GTKDOCLINK\>', content, flags=re.DOTALL)
-
-        # From the highlighter we get all the functions marked up. Now we can turn them into GTKDOCLINK items
-        def repl_func(m):
-            return MakeGtkDocLink(m.group(1), m.group(2), m.group(3))
-        content = re.sub(r'(<span class=\"function\">)(.*?)(</span>)', repl_func, content, flags=re.DOTALL)
-        # We can also try the first item in stuff marked up as 'normal'
-        content = re.sub(
-            r'(<span class=\"normal\">\s*)(.+?)((\s+.+?)?\s*</span>)', repl_func, content, flags=re.DOTALL)
+    # FIXME: ideally we'd pass a clue about the example language to the highligher
+    # unfortunately the "language" attribute is not appearing in the html output
+    # we could patch the customization to have <code class="xxx"> inside of <pre>
+    def repl_func(m):
+        return HighlightSourcePygments(src_lang, m.group(1), m.group(2))
+    content = re.sub(
+        r'<div class=\"(example-contents|informalexample)\"><pre class=\"programlisting\">(.*?)</pre></div>',
+        repl_func, content, flags=re.DOTALL)
+    content = re.sub(r'\&lt;GTKDOCLINK\s+HREF=\&quot;(.*?)\&quot;\&gt;(.*?)\&lt;/GTKDOCLINK\&gt;',
+                     r'\<GTKDOCLINK\ HREF=\"\1\"\>\2\</GTKDOCLINK\>', content, flags=re.DOTALL)
+
+    # From the highlighter we get all the functions marked up. Now we can turn them into GTKDOCLINK items
+    def repl_func(m):
+        return MakeGtkDocLink(m.group(1), m.group(2), m.group(3))
+    content = re.sub(r'(<span class=\"function\">)(.*?)(</span>)', repl_func, content, flags=re.DOTALL)
+    # We can also try the first item in stuff marked up as 'normal'
+    content = re.sub(
+        r'(<span class=\"normal\">\s*)(.+?)((\s+.+?)?\s*</span>)', repl_func, content, flags=re.DOTALL)
 
     lines = content.rstrip().split('\n')
 
@@ -373,91 +361,7 @@ def MakeGtkDocLink(pre, symbol, post):
     return pre + '<GTKDOCLINK HREF="' + id + '">' + symbol + '</GTKDOCLINK>' + post
 
 
-def HighlightSource(src_lang, type, source):
-    # write source to a temp file
-    # FIXME: use .c for now to hint the language to the highlighter
-    with tempfile.NamedTemporaryFile(mode='w+', suffix='.c') as f:
-        temp_source_file = HighlightSourcePreProcess(f, source)
-        highlight_options = config.highlight_options.replace('$SRC_LANG', src_lang)
-
-        logging.info('running %s %s %s', config.highlight, highlight_options, temp_source_file)
-
-        # format source
-        highlighted_source = subprocess.check_output(
-            [config.highlight] + shlex.split(highlight_options) + [temp_source_file]).decode('utf-8')
-        logging.debug('result: [%s]', highlighted_source)
-        if config.highlight.endswith('/source-highlight'):
-            highlighted_source = re.sub(r'^<\!-- .*? -->', '', highlighted_source, flags=re.MULTILINE | 
re.DOTALL)
-            highlighted_source = re.sub(
-                r'<pre><tt>(.*?)</tt></pre>', r'\1', highlighted_source, flags=re.MULTILINE | re.DOTALL)
-        elif config.highlight.endswith('/highlight'):
-            # need to rewrite the stylesheet classes
-            highlighted_source = highlighted_source.replace('<span class="gtkdoc com">', '<span 
class="comment">')
-            highlighted_source = highlighted_source.replace('<span class="gtkdoc dir">', '<span 
class="preproc">')
-            highlighted_source = highlighted_source.replace('<span class="gtkdoc kwd">', '<span 
class="function">')
-            highlighted_source = highlighted_source.replace('<span class="gtkdoc kwa">', '<span 
class="keyword">')
-            highlighted_source = highlighted_source.replace('<span class="gtkdoc line">', '<span 
class="linenum">')
-            highlighted_source = highlighted_source.replace('<span class="gtkdoc num">', '<span 
class="number">')
-            highlighted_source = highlighted_source.replace('<span class="gtkdoc str">', '<span 
class="string">')
-            highlighted_source = highlighted_source.replace('<span class="gtkdoc sym">', '<span 
class="symbol">')
-            # maybe also do
-            # highlighted_source = re.sub(r'</span>(.+)<span', '</span><span class="normal">\1</span><span')
-
-    return HighlightSourcePostprocess(type, highlighted_source)
-
-
-def HighlightSourceVim(src_lang, type, source):
-    # write source to a temp file
-    f = tempfile.NamedTemporaryFile(mode='w+', suffix='.h', delete=False, encoding='utf-8')
-    try:
-        temp_source_file = HighlightSourcePreProcess(f, source)
-        if os.name == 'nt':
-            temp_source_file = temp_source_file.replace('\\', '/')
-        f.close()
-
-        # format source
-        script = "echo 'let html_number_lines=0|" + \
-                 "let html_use_css=1|" + \
-                 "let html_use_xhtml=1|" + \
-                 "set encoding=utf-8|" \
-                 "e {}|".format(temp_source_file) + \
-                 "syn on|" + \
-                 "set syntax={}|".format(src_lang) + \
-                 "run! plugin/tohtml.vim|" + \
-                 "run! syntax/2html.vim|" + \
-                 "w! {}.html|".format(temp_source_file) + \
-                 "qa' | " + \
-                 "{} -n -e -u NONE -T xterm".format(config.highlight)
-        p = subprocess.Popen([os.getenv('SHELL', 'sh')], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-        p.communicate(script.encode('utf-8'))
-        if p.returncode != 0:
-            raise Exception("Highlighter failed. The command was: {}".format(script))
-
-        html_filename = temp_source_file + ".html"
-        try:
-            with open(html_filename, 'rb') as html_file:
-                highlighted_source = html_file.read().decode('utf-8')
-            highlighted_source = re.sub(r'.*<pre\b[^>]*>\n', '', highlighted_source, flags=re.DOTALL)
-            highlighted_source = re.sub(r'</pre>.*', '', highlighted_source, flags=re.DOTALL)
-
-            # need to rewrite the stylesheet classes
-            highlighted_source = highlighted_source.replace('<span class="Comment">', '<span 
class="comment">')
-            highlighted_source = highlighted_source.replace('<span class="PreProc">', '<span 
class="preproc">')
-            highlighted_source = highlighted_source.replace('<span class="Statement">', '<span 
class="keyword">')
-            highlighted_source = highlighted_source.replace('<span class="Identifier">', '<span 
class="function">')
-            highlighted_source = highlighted_source.replace('<span class="Constant">', '<span 
class="number">')
-            highlighted_source = highlighted_source.replace('<span class="Special">', '<span 
class="symbol">')
-            highlighted_source = highlighted_source.replace('<span class="Type">', '<span class="type">')
-        finally:
-            # remove temp files
-            os.unlink(html_filename)
-    finally:
-        os.unlink(f.name)
-
-    return HighlightSourcePostprocess(type, highlighted_source)
-
-
-def HighlightSourcePreProcess(f, source):
+def HighlightSourcePygments(src_lang, div_class, source):
     # chop of leading and trailing empty lines, leave leading space in first real line
     source = source.strip(' ')
     source = source.strip('\n')
@@ -471,14 +375,11 @@ def HighlightSourcePreProcess(f, source):
     source = source.replace('&lt;', '<')
     source = source.replace('&gt;', '>')
     source = source.replace('&amp;', '&')
-    if sys.version_info < (3,):
-        source = source.encode('utf-8')
-    f.write(source)
-    f.flush()
-    return f.name
 
+    highlighted_source = highlight.highlight_code(source, src_lang)
+    if not highlighted_source:
+        highlighted_source = source
 
-def HighlightSourcePostprocess(type, highlighted_source):
     # chop of leading and trailing empty lines
     highlighted_source = highlighted_source.strip()
 
@@ -501,4 +402,4 @@ def HighlightSourcePostprocess(type, highlighted_source):
     </tbody>
   </table>
 </div>
-""" % (type, source_lines, highlighted_source)
+""" % (div_class, source_lines, highlighted_source)
diff --git a/gtkdoc/highlight.py b/gtkdoc/highlight.py
new file mode 100644
index 0000000..6023d33
--- /dev/null
+++ b/gtkdoc/highlight.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+# -*- python; coding: utf-8 -*-
+#
+# gtk-doc - GTK DocBook documentation generator.
+# Copyright (C) 2018  Stefan Sauer
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+
+"""
+Highlight sourcecode snippets.
+"""
+
+from pygments import highlight
+from pygments.lexers import CLexer
+from pygments.lexers import get_lexer_by_name
+from pygments.formatters import HtmlFormatter
+
+# lazily constructed lexer cache
+LEXERS = {
+    'c': CLexer()
+}
+HTML_FORMATTER = HtmlFormatter(nowrap=True)
+
+
+def highlight_code(code, lang='c'):
+    if lang not in LEXERS:
+        LEXERS[lang] = get_lexer_by_name(lang)
+    lexer = LEXERS.get(lang, None)
+    if not lexer:
+        return None
+    return highlight(code, lexer, HTML_FORMATTER)
+
+
+def append_style_defs(css_file_name):
+    with open(css_file_name, 'at', newline='\n', encoding='utf-8') as css:
+        css.write(HTML_FORMATTER.get_style_defs())
diff --git a/gtkdoc/mkhtml2.py b/gtkdoc/mkhtml2.py
index c663202..91860a7 100644
--- a/gtkdoc/mkhtml2.py
+++ b/gtkdoc/mkhtml2.py
@@ -105,20 +105,9 @@ from anytree import Node, PreOrderIter
 from copy import deepcopy
 from glob import glob
 from lxml import etree
-from pygments import highlight
-from pygments.lexers import CLexer
-from pygments.lexers import get_lexer_by_name
-from pygments.formatters import HtmlFormatter
 from timeit import default_timer as timer
 
-from . import config, fixxref
-
-# pygments setup
-# lazily constructed lexer cache
-LEXERS = {
-    'c': CLexer()
-}
-HTML_FORMATTER = HtmlFormatter(nowrap=True)
+from . import config, highlight, fixxref
 
 
 class ChunkParams(object):
@@ -876,12 +865,8 @@ def convert_programlisting(ctx, xml):
     if xml.attrib.get('role', '') == 'example':
         if xml.text:
             lang = xml.attrib.get('language', ctx['src-lang']).lower()
-            if lang not in LEXERS:
-                LEXERS[lang] = get_lexer_by_name(lang)
-            lexer = LEXERS.get(lang, None)
-            if lexer:
-                highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
-
+            highlighted = highlight.highlight_code(xml.text, lang)
+            if highlighted:
                 # we do own line-numbering
                 line_count = highlighted.count('\n')
                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
@@ -1805,9 +1790,7 @@ def main(module, index_file, out_dir, uninstalled, src_lang, paths):
     css_file = os.path.join(styledir, 'style.css')
     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
         shutil.copy(f, out_dir)
-    css_file = os.path.join(out_dir, 'style.css')
-    with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
-        css.write(HTML_FORMATTER.get_style_defs())
+    highlight.append_style_defs(os.path.join(out_dir, 'style.css'))
     logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
 
     # 3) load xref targets
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a9f49e0
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+pygments
diff --git a/style/style.css b/style/style.css
index 4be4ede..6f0bc25 100644
--- a/style/style.css
+++ b/style/style.css
@@ -293,29 +293,11 @@ h2 .extralinks, h3 .extralinks
   font-weight: normal;
 }
 
-acronym,abbr 
+acronym,abbr
 {
   border-bottom: 1px dotted gray;
 }
 
-/* code listings */
-
-.listing_code .programlisting .normal,
-.listing_code .programlisting .normal a,
-.listing_code .programlisting .number,
-.listing_code .programlisting .cbracket,
-.listing_code .programlisting .symbol     { color: #555753; }
-.listing_code .programlisting .comment,
-.listing_code .programlisting .linenum    { color: #babdb6; } /* tango: aluminium 3 */
-.listing_code .programlisting .function,
-.listing_code .programlisting .function a,
-.listing_code .programlisting .preproc    { color: #204a87; } /* tango: sky blue 3  */
-.listing_code .programlisting .string     { color: #ad7fa8; } /* tango: plum */
-.listing_code .programlisting .keyword,
-.listing_code .programlisting .usertype,
-.listing_code .programlisting .type,
-.listing_code .programlisting .type a     { color: #4e9a06; } /* tango: chameleon 3  */
-
 .listing_frame {
   /* tango:sky blue 1 */
   border: solid 1px #729fcf;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]