[gtk-doc] fixxref: port from perl to python



commit ef06ab98552893f3a74cd72e13709d8ae00cc96e
Author: Stefan Sauer <ensonic users sf net>
Date:   Wed Apr 12 18:26:28 2017 +0200

    fixxref: port from perl to python

 Makefile.am            |    2 +
 gtkdoc-fixxref.in      |  615 +++---------------------------------------------
 gtkdoc/config.py.in    |    2 +
 gtkdoc/fixxref.py      |  440 ++++++++++++++++++++++++++++++++++
 tests/Makefile.am      |    2 +-
 tests/gtkdoc-fixxref.t |   30 ---
 6 files changed, 482 insertions(+), 609 deletions(-)
---
diff --git a/Makefile.am b/Makefile.am
index b69045d..b9f3eb9 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -40,6 +40,7 @@ pylibdata_DATA = \
   gtkdoc/check.py \
   gtkdoc/common.py \
   gtkdoc/config.py \
+  gtkdoc/fixxref.py \
   gtkdoc/mkhtml.py \
   gtkdoc/mkman.py \
   gtkdoc/mkpdf.py \
@@ -87,6 +88,7 @@ CLEANFILES = \
   gtkdoc/check.pyc \
   gtkdoc/common.pyc \
   gtkdoc/config.pyc \
+  gtkdoc/fixxref.pyc \
   gtkdoc/mkhtml.pyc \
   gtkdoc/mkman.pyc \
   gtkdoc/mkpdf.pyc \
diff --git a/gtkdoc-fixxref.in b/gtkdoc-fixxref.in
index faeb7f7..eba83f9 100755
--- a/gtkdoc-fixxref.in
+++ b/gtkdoc-fixxref.in
@@ -1,5 +1,5 @@
-#!@PERL@ -w
-# -*- cperl -*-
+#!@PYTHON@
+# -*- python -*-
 #
 # gtk-doc - GTK DocBook documentation generator.
 # Copyright (C) 1998  Damon Chaplin
@@ -20,579 +20,38 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 #
 
-#############################################################################
-# Script      : gtkdoc-fixxref
-# Description : This fixes cross-references in the HTML documentation.
-#############################################################################
-
-use strict;
-use bytes;
-use Getopt::Long;
-
-push @INC, '@PACKAGE_DATA_DIR@';
-require "gtkdoc-common.pl";
-
-# Options
-
-# name of documentation module
-my $MODULE;
-my $MODULE_DIR;
-my $HTML_DIR = "";
-my @EXTRA_DIRS;
-my $PRINT_VERSION;
-my $PRINT_HELP;
-my $SRC_LANG;
-
-# This contains all the entities and their relative URLs.
-my %Links;
-
-# failing link targets we don't warn about even once
-my %NoLinks = (
-    'char' => 1,
-    'double' => 1,
-    'float' => 1,
-    'int' => 1,
-    'long' => 1,
-    'main' => 1,
-    'signed' => 1,
-    'unsigned' => 1,
-    'va-list' => 1,
-    'void' => 1,
-    'GBoxed' => 1,
-    'GEnum' => 1,
-    'GFlags' => 1,
-    'GInterface' => 1
-);
-
-# Cache of dirs we already scanned for index files
-my %DirCache;
-
-Run() unless caller; # Run program unless loaded as a module
-
-
-sub Run {
-    my %optctl = ('module' => \$MODULE,
-                  'module-dir' => \$MODULE_DIR,
-                  'html-dir' => \$HTML_DIR,
-                  'extra-dir' => \@EXTRA_DIRS,
-                  'version' => \$PRINT_VERSION,
-                  'help' => \$PRINT_HELP,
-                  'src-lang' => \$SRC_LANG);
-
-    GetOptions(\%optctl, "module=s", "module-dir=s", "html-dir:s", "extra-dir=s@",
-              "src-lang=s", "version", "help");
-
-    if ($PRINT_VERSION) {
-        print "@VERSION@\n";
-        exit 0;
-    }
-
-    if ($PRINT_HELP) {
-            print <<EOF;
-gtkdoc-fixxref version @VERSION@ - fix cross references in html files
-
---module=MODULE_NAME    Name of the doc module being parsed
---module-dir=MODULE_DIR The directory which contains the generated HTML
---html-dir=HTML_DIR     The directory where gtk-doc generated documentation is
-                        installed
---extra-dir=EXTRA_DIR   Directories to recursively scan for indices (index.sgml)
-                        in addition to HTML_DIR
-                        May be used more than once for multiple directories
---src-lang=SRC_LANG     Programing language used for syntax highlighting. The
-                        available languages depend on the source source
-                        highlighter you use.
---version               Print the version of this program
---help                  Print this help
-EOF
-        exit 0;
-    }
-
-    if (!$SRC_LANG) {
-        $SRC_LANG="c"
-    }
-
-    my $path_prefix="";
-    if ($HTML_DIR =~ m%(.*?)/share/gtk-doc/html%) {
-        $path_prefix=$1;
-        @TRACE@("Path prefix: $path_prefix");
-    }
-
-    if (!defined $MODULE_DIR) {
-        $MODULE_DIR="$HTML_DIR/$MODULE";
-    }
-
-    my $dir;
-
-    # We scan the directory containing GLib and any directories in GNOME2_PATH
-    # first, but these will be overriden by any later scans.
-    $dir = `@PKG_CONFIG@ --variable=prefix glib-2.0`;
-    $dir =~ s/\s+$//;
-    $dir = $dir . "/share/gtk-doc/html";
-    if (-d $dir) {
-        # Some predefined link targets to get links into type hierarchies as these
-        # have no targets. These are always absolute for now.
-        $Links{'GBoxed'} = "$dir/gobject/gobject-Boxed-Types.html";
-        $Links{'GEnum'} = "$dir/gobject/gobject-Enumeration-and-Flag-Types.html";
-        $Links{'GFlags'} = "$dir/gobject/gobject-Enumeration-and-Flag-Types.html";
-        $Links{'GInterface'} = "$dir/gobject/GTypeModule.html";
-
-        if ($dir ne $HTML_DIR) {
-            @TRACE@("Scanning GLib directory: $dir");
-            if ($dir !~ m%^\Q$path_prefix\E/%) {
-                &ScanIndices ($dir, 1);
-            } else {
-                &ScanIndices ($dir, 0);
-            }
-        }
-    }
-
-    if (defined ($ENV{"GNOME2_PATH"})) {
-        foreach $dir (split (/:/, $ENV{"GNOME2_PATH"})) {
-            $dir = $dir . "/share/gtk-doc/html";
-            if (-d $dir && $dir ne $HTML_DIR) {
-                @TRACE@("Scanning GNOME2_PATH directory: $dir");
-                if ($dir !~ m%^\Q$path_prefix\E/%) {
-                    &ScanIndices ($dir, 1);
-                } else {
-                    &ScanIndices ($dir, 0);
-                }
-            }
-            # ubuntu started to compress this as index.sgml.gz :/
-            # https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/77138
-        }
-    }
-
-    @TRACE@("Scanning HTML_DIR directory: $HTML_DIR");
-    &ScanIndices ($HTML_DIR, 0);
-    @TRACE@("Scanning HTML_DIR directory: $MODULE_DIR");
-    &ScanIndices ($MODULE_DIR, 0);
-
-    # check all extra dirs, but skip already scanned dirs or subdirs of those
-    foreach my $dir (@EXTRA_DIRS) {
-        my $vdir;
-
-        $dir =~ s#/$##;
-        @TRACE@("Scanning EXTRA_DIR directory: $dir");
-
-        # If the --extra-dir option is not relative and is not sharing the same
-        # prefix as the target directory of the docs, we need to use absolute
-        # directories for the links
-        if ($dir !~m/^\.\./ &&  $dir !~ m%\Q$path_prefix\E/%) {
-            &ScanIndices ($dir, 1);
-        } else {
-            &ScanIndices ($dir, 0);
-        }
-    }
-
-    &ReadSections ();
-
-    &FixCrossReferences ($MODULE_DIR);
-}
-
-
-sub ScanIndices {
-    my ($scan_dir, $use_absolute_links) = @_;
-
-    if (exists $DirCache{$scan_dir}) {
-        return;
-    }
-    $DirCache{$scan_dir} = 1;
-
-    @TRACE@("Scanning source directory: $scan_dir absolute: $use_absolute_links");
-
-    # This array holds any subdirectories found.
-    my (@subdirs) = ();
-
-    opendir (HTMLDIR, $scan_dir) || return;
-    my $file;
-    foreach $file (readdir (HTMLDIR)) {
-        if ($file eq '.' || $file eq '..') {
-            next;
-        } elsif (-d "$scan_dir/$file") {
-            push (@subdirs, $file);
-            next;
-        }
-        if ($file =~ m/\.devhelp2$/) {
-            # if devhelp-file is good don't read index.sgml
-            &ReadDevhelp ("$scan_dir/$file", $use_absolute_links);
-        }
-        elsif (($file eq "index.sgml.gz") && ! (-e "$scan_dir/index.sgml")) {
-            # debian/ubuntu started to compress this as index.sgml.gz :/
-            print <<EOF;
-Please fix https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/77138 . For now run:
-gunzip $scan_dir/$file
-EOF
-        }
-        elsif (($file =~ m/(.*?)\.devhelp2.gz$/) && ! (-e "$scan_dir/$1.devhelp2")) {
-            # debian/ubuntu started to compress this as *devhelp2.gz :/
-            print <<EOF;
-Please fix https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/1466210 . For now run:
-gunzip $scan_dir/$file
-EOF
-        }
-        # we could consider supporting: use IO::Zlib;
-    }
-    closedir (HTMLDIR);
-
-    # Now recursively scan the subdirectories.
-    my $dir;
-    foreach $dir (sort(@subdirs)) {
-        &ScanIndices ("$scan_dir/$dir", $use_absolute_links);
-    }
-}
-
-
-sub ReadDevhelp {
-    my ($file, $use_absolute_links) = @_;
-
-    # Determine the absolute directory, to be added to links in $file
-    # if we need to use an absolute link.
-    # $file will be something like /prefix/gnome/share/gtk-doc/html/gtk/$file
-    # We want the part up to 'html/.*' since the links in $file include
-    # the rest.
-    my $dir = "../";
-    if ($use_absolute_links) {
-        # For uninstalled index.sgml files we'd need to map the path to where it
-        # will be installed to
-        if ($file !~ /\.\//) {
-            $file =~ /(.*\/)(.*?)\/.*?\.devhelp2/;
-            $dir = "$1$2/";
-        }
-    } else {
-        if ($file =~ /(.*\/)(.*?)\/.*?\.devhelp2/) {
-            $dir .= "$2/";
-        } else {
-            $dir = "";
-        }
-    }
-
-    @TRACE@("Scanning index file=$file, absolute=$use_absolute_links, dir=$dir");
-
-    open (INDEXFILE, $file)
-        || die "Can't open $file: $!";
-    while (<INDEXFILE>) {
-        if (m/ link="([^#]*)#([^"]*)"/) {
-            @TRACE@("Found id: $2 href: $1#$2");
-            $Links{$2} = "$dir$1#$2";
-        }
-    }
-    close (INDEXFILE);
-}
-
-
-sub ReadSections {
-    if (!defined($MODULE)) {
-        return;
-    }
-
-    open (INPUT, "$MODULE-sections.txt")
-            || die "Can't open $MODULE-sections.txt: $!";
-    my $subsection = "";
-    while (<INPUT>) {
-        if (m/^#/) {
-            next;
-
-        } elsif (m/^<SECTION>/) {
-            $subsection = "";
-        } elsif (m/^<SUBSECTION\s*(.*)>/i) {
-            $subsection = $1;
-        } elsif (m/^<SUBSECTION>/) {
-            next;
-        } elsif (m/^<TITLE>(.*)<\/TITLE>/) {
-            next;
-        } elsif (m/^<FILE>(.*)<\/FILE>/) {
-            next;
-        } elsif (m/^<INCLUDE>(.*)<\/INCLUDE>/) {
-            next;
-        } elsif (m/^<\/SECTION>/) {
-            next;
-        } elsif (m/^(\S+)/) {
-            my $symbol=$1;
-
-            if ($subsection eq "Standard" || $subsection eq "Private") {
-                $NoLinks{CreateValidSGMLID($symbol)} = 1;
-            }
-        }
-    }
-    close (INPUT);
-}
-
-
-sub FixCrossReferences {
-    my ($scan_dir) = @_;
-
-    opendir (HTMLDIR, $scan_dir)
-        || die "Can't open HTML directory $scan_dir: $!";
-    my $file;
-    foreach $file (readdir (HTMLDIR)) {
-        if ($file eq '.' || $file eq '..') {
-            next;
-        } elsif ($file =~ m/.html?$/) {
-            &FixHTMLFile ("$scan_dir/$file");
-        }
-    }
-    closedir (HTMLDIR);
-}
-
-
-sub FixHTMLFile {
-    my ($file) = @_;
-    @TRACE@("Fixing file: $file");
-
-    open (HTMLFILE, $file)
-        || die "Can't open $file: $!";
-    undef $/;
-    my $entire_file = <HTMLFILE>;
-    close (HTMLFILE);
-
-    if ("@HIGHLIGHT@" ne "") {
-        # FIXME: ideally we'd pass a clue about the example language to the highligher
-        # unfortunately the "language" attribute is not appearing in the html output
-        # we could patch the customization to have <code class="xxx"> inside of <pre>
-        if ("@HIGHLIGHT@" =~ m%/vim$%) {
-            $entire_file =~ s%<div class=\"(example-contents|informalexample)\"><pre 
class=\"programlisting\">(.*?)</pre></div>%&HighlightSourceVim($1,$2);%gse;
-        }
-        else {
-            $entire_file =~ s%<div class=\"(example-contents|informalexample)\"><pre 
class=\"programlisting\">(.*?)</pre></div>%&HighlightSource($1,$2);%gse;
-        }
-        # this just broke existing GTKDOCLINK tags
-        # &lt;GTKDOCLINK HREF=&quot;GST-PAD-SINK:CAPS&quot;&gt;GST_PAD_SINK&lt;/GTKDOCLINK&gt;
-        $entire_file =~ 
s%\&lt;GTKDOCLINK\s+HREF=\&quot;(.*?)\&quot;\&gt;(.*?)\&lt;/GTKDOCLINK\&gt;%\<GTKDOCLINK\ 
HREF=\"$1\"\>$2\</GTKDOCLINK\>%gs;
-
-        # from the highlighter we get all the functions marked up
-        # now we could turn them into GTKDOCLINK items
-        $entire_file =~ s%(<span class=\"function\">)(.*?)(</span>)%&MakeGtkDocLink($1,$2,$3);%gse;
-        # we could also try the first item in stuff marked up as 'normal'
-        $entire_file =~ s%(<span 
class=\"normal\">\s*)(.+?)((\s+.+?)?\s*</span>)%&MakeGtkDocLink($1,$2,$3);%gse;
-    }
-
-    my @lines = split(/\n/, $entire_file);
-    for (my $i=0; $i<$#lines; $i++) {
-        $lines[$i] =~ s%<GTKDOCLINK\s+HREF="([^"]*)"\s*>(.*?)</GTKDOCLINK\s*>% &MakeXRef($file,$i+1,$1,$2); 
%ge;
-        if ($lines[$i] =~ m/GTKDOCLINK/) {
-            @TRACE@("make xref failed for line: ".$lines[$i]);
-        }
-    }
-    $entire_file = join("\n",@lines);
-
-    open (NEWFILE, ">$file.new")
-        || die "Can't open $file: $!";
-    print NEWFILE $entire_file;
-    close (NEWFILE);
-
-    unlink ($file)
-        || die "Can't delete $file: $!";
-    rename ("$file.new", $file)
-        || die "Can't rename $file.new: $!";
-}
-
-sub MakeXRef {
-    my ($file, $line, $id, $text) = @_;
-
-    my $href = $Links{$id};
-
-    # this is a workaround for some inconsistency we have with CreateValidSGMLID
-    if (!$href && $id =~ m/:/) {
-        my $tid = $id;
-        $tid =~ s/:/--/g;
-        $href = $Links{$tid};
-    }
-    # poor mans plural support
-    if (!$href && $id =~ m/s$/) {
-        my $tid = $id;
-        $tid =~ s/s$//g;
-        $href = $Links{$tid};
-        if (!$href && defined $Links{"$tid-struct"}) {
-            $href = $Links{"$tid-struct"};
-        }
-    }
-    if (!$href && defined $Links{"$id-struct"}) {
-        $href = $Links{"$id-struct"};
-    }
-
-    if ($href) {
-        # if it is a link to same module, remove path to make it work
-        # uninstalled
-        if (defined($MODULE) && $href =~ m%^\.\./$MODULE/(.*)$%) {
-            $href=$1;
-            @TRACE@("  Fixing link to uninstalled doc: $id, $href, $text");
-        } else {
-            @TRACE@("  Fixing link: $id, $href, $text");
-        }
-        return "<a href=\"$href\">$text</a>";
-    } else {
-        my $warn = 1;
-        @TRACE@("  no link for: $id, $text");
-
-        # don't warn multiple times and also skip blacklisted (ctypes)
-        $warn = 0 if exists $NoLinks{$id};
-        # if it's a function, don't warn if it does not contain a "_"
-        # (transformed to "-")
-        # - gnome coding style would use '_'
-        # - will avoid wrong warnings for ansi c functions
-        $warn = 0 if ($text =~ m/ class=\"function\"/ && $id !~ m/-/);
-        # if it's a 'return value', don't warn (implicitly created link)
-        $warn = 0 if ($text =~ m/ class=\"returnvalue\"/);
-        # if it's a 'type', don't warn if it starts with lowercase
-        # - gnome coding style would use CamelCase
-        $warn = 0 if ($text =~ m/ class=\"type\"/ && ($id =~ m/^[a-z]/));
-        # don't warn for self links
-        $warn = 0 if ($text eq $id);
-
-        if ($warn == 1) {
-          &LogWarning ($file, $line, "no link for: '$id' -> ($text).");
-          $NoLinks{$id} = 1;
-        }
-        return $text;
-    }
-}
-
-
-sub MakeGtkDocLink {
-    my ($pre,$symbol,$post) = @_;
-
-    my $id=CreateValidSGMLID($symbol);
-
-    # these are implicitely created links in highlighed sources
-    # we don't want warnings for those if the links cannot be resolved.
-    $NoLinks{$id} = 1;
-
-    #return "<span class=\"$type\"><GTKDOCLINK HREF=\"$id\">$symbol</GTKDOCLINK></span>";
-    return "$pre<GTKDOCLINK HREF=\"$id\">$symbol</GTKDOCLINK>$post";
-}
-
-
-sub HighlightSource {
-    my ($type, $source) = @_;
-
-    # chop of leading and trailing empty lines
-    $source =~ s/^\s*\n+//gs;
-    $source =~ s/[\s\n]+$//gs;
-    # cut common indent
-    $source =~ m/^(\s*)/;
-    $source =~ s/^$1//gms;
-    # avoid double entity replacement
-    $source =~ s/&lt;/</g;
-    $source =~ s/&gt;/>/g;
-    $source =~ s/&amp;/&/g;
-
-    # write source to a temp file
-    # FIXME: use .c for now to hint the language to the highlighter
-    my $temp_source_file="$MODULE_DIR/_temp_src.$$.c";
-    open (NEWFILE, ">$temp_source_file") || die "Can't open $temp_source_file: $!";
-    print NEWFILE $source;
-    close (NEWFILE);
-
-    @TRACE@(" running @HIGHLIGHT@ @HIGHLIGHT_OPTIONS@$temp_source_file ");
-
-    # format source
-    my $highlighted_source=`@HIGHLIGHT@ @HIGHLIGHT_OPTIONS@$temp_source_file`;
-    if ("@HIGHLIGHT@" =~ m%/source-highlight$%) {
-        $highlighted_source =~ s%^<\!-- .*? -->%%gs;
-        $highlighted_source =~ s%<pre><tt>(.*?)</tt></pre>%$1%gs;
-    }
-    elsif ("@HIGHLIGHT@" =~ m%/highlight$%) {
-        # need to rewrite the stylesheet classes
-        $highlighted_source =~ s%<span class="gtkdoc com">%<span class="comment">%gs;
-        $highlighted_source =~ s%<span class="gtkdoc dir">%<span class="preproc">%gs;
-        $highlighted_source =~ s%<span class="gtkdoc kwd">%<span class="function">%gs;
-        $highlighted_source =~ s%<span class="gtkdoc kwa">%<span class="keyword">%gs;
-        $highlighted_source =~ s%<span class="gtkdoc line">%<span class="linenum">%gs;
-        $highlighted_source =~ s%<span class="gtkdoc num">%<span class="number">%gs;
-        $highlighted_source =~ s%<span class="gtkdoc str">%<span class="string">%gs;
-        $highlighted_source =~ s%<span class="gtkdoc sym">%<span class="symbol">%gs;
-        # maybe also do
-        # $highlighted_source =~ s%</span>(.+)<span%</span><span class="normal">$1</span><span%gs;
-    }
-    # remove temp file
-    unlink ($temp_source_file)
-        || die "Can't delete $temp_source_file: $!";
-
-    return &HighlightSourcePostprocess($type, $highlighted_source);
-}
-
-
-sub HighlightSourceVim {
-    my ($type, $source) = @_;
-
-    # chop of leading and trailing empty lines
-    $source =~ s/^\s*\n+//gs;
-    $source =~ s/[\s\n]+$//gs;
-    # cut common indent
-    $source =~ m/^(\s*)/;
-    $source =~ s/^$1//gms;
-    # avoid double entity replacement
-    $source =~ s/&lt;/</g;
-    $source =~ s/&gt;/>/g;
-    $source =~ s/&amp;/&/g;
-
-    # write source to a temp file
-    my $temp_source_file="$MODULE_DIR/_temp_src.$$.h";
-    open (NEWFILE, ">$temp_source_file") || die "Can't open $temp_source_file: $!";
-    print NEWFILE $source;
-    close (NEWFILE);
-
-    # format source
-    system "echo 'let html_number_lines=0|let html_use_css=1|let html_use_xhtml=1|e $temp_source_file|syn 
on|set syntax=$SRC_LANG|run! syntax/2html.vim|w! $temp_source_file.html|qa!' | @HIGHLIGHT@ -n -e -u NONE -T 
xterm >/dev/null";
-
-    my $highlighted_source;
-    {
-        local $/;
-        open (NEWFILE, "<$temp_source_file.html");
-        $highlighted_source = <NEWFILE>;
-        close (NEWFILE);
-    }
-    $highlighted_source =~ s#.*<pre\b[^>]*>\n##s;
-    $highlighted_source =~ s#</pre>.*##s;
-
-    # need to rewrite the stylesheet classes
-    # FIXME: Vim has somewhat different syntax groups
-    $highlighted_source =~ s%<span class="Comment">%<span class="comment">%gs;
-    $highlighted_source =~ s%<span class="PreProc">%<span class="preproc">%gs;
-    $highlighted_source =~ s%<span class="Statement">%<span class="keyword">%gs;
-    $highlighted_source =~ s%<span class="Identifier">%<span class="function">%gs;
-    $highlighted_source =~ s%<span class="Constant">%<span class="number">%gs;
-    $highlighted_source =~ s%<span class="Special">%<span class="symbol">%gs;
-    $highlighted_source =~ s%<span class="Type">%<span class="type">%gs;
-
-    # remove temp files
-    unlink ($temp_source_file)
-        || die "Can't delete $temp_source_file: $!";
-    unlink ("$temp_source_file.html")
-        || die "Can't delete $temp_source_file.html: $!";
-
-    return &HighlightSourcePostprocess($type, $highlighted_source);
-}
-
-
-sub HighlightSourcePostprocess {
-    my ($type, $highlighted_source) = @_;
-
-    # chop of leading and trailing empty lines
-    $highlighted_source =~ s/^[\s\n]+//gs;
-    $highlighted_source =~ s/[\s\n]+$//gs;
-
-    # turn common urls in comments into links
-    $highlighted_source =~ s%<span class="url">(.*?)</span>%<span class="url"><a href="$1">$1</a></span>%gs;
-
-    # we do own line-numbering
-    my $source_lines="";
-    my $line_count = () = $highlighted_source =~ /\n/gs;
-    for (my $i=1; $i < ($line_count+2); $i++) {
-        $source_lines.="$i\n";
-    }
-    $source_lines =~ s/\n\Z//;
-
-    return <<END_OF_HTML
-<div class="$type">
-  <table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
-    <tbody>
-      <tr>
-        <td class="listing_lines" align="right"><pre>$source_lines</pre></td>
-        <td class="listing_code"><pre class="programlisting">$highlighted_source</pre></td>
-      </tr>
-    </tbody>
-  </table>
-</div>
-END_OF_HTML
-}
+import argparse
+import os
+import sys
+sys.path.append('@PYTHON_PACKAGE_DIR@')
+
+from gtkdoc import config, fixxref
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='gtkdoc-fixxref version %s - fix cross references in html files' % config.version)
+    parser.add_argument('--version', action='version', version=config.version)
+    parser.add_argument('--module', default='', help='Name of the doc module being processed.')
+    parser.add_argument('--module-dir', default='',
+                        help='The directory which contains the generated HTML.')
+    parser.add_argument('--html-dir', default='',
+                        help='The directory where gtk-doc generated documentation is'
+                       'installed.')
+    parser.add_argument('--extra-dir', default=[], action='append',
+                        help='Directories to recursively scan for indices (index.sgml)'
+                        'in addition to HTML_DIR')
+    parser.add_argument('--src-lang', default='c',
+                        help='Programing language used for syntax highlighting. The'
+                        'available languages depend on the source source'
+                        'highlighter you use.')
+
+    options = parser.parse_args()
+    if options.module == '':
+        print('Error, missing module.')
+        sys.exit(1)
+
+    if not options.module_dir:
+        options.module_dir = os.path.join(options.html_dir, options.module)
+
+    fixxref.Run(options)
diff --git a/gtkdoc/config.py.in b/gtkdoc/config.py.in
index 1264374..472c7df 100644
--- a/gtkdoc/config.py.in
+++ b/gtkdoc/config.py.in
@@ -3,6 +3,8 @@ version = "@VERSION@"
 # tools
 dblatex = '@DBLATEX@'
 fop = '@FOP@'
+highlight = '@HIGHLIGHT@'
+highlight_options = '@HIGHLIGHT_OPTIONS@'
 pkg_config = '@PKG_CONFIG@'
 xsltproc = '@XSLTPROC@'
 
diff --git a/gtkdoc/fixxref.py b/gtkdoc/fixxref.py
new file mode 100755
index 0000000..cfcde3b
--- /dev/null
+++ b/gtkdoc/fixxref.py
@@ -0,0 +1,440 @@
+# -*- python -*-
+#
+# gtk-doc - GTK DocBook documentation generator.
+# Copyright (C) 1998  Damon Chaplin
+#               2007-2016  Stefan Sauer
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+
+''"Fix cross-references in the HTML documentation.''"
+
+import logging
+import os
+import re
+import shlex
+import subprocess
+import tempfile
+
+from . import common, config
+
+# This contains all the entities and their relative URLs.
+Links = {}
+
+# failing link targets we don't warn about even once
+NoLinks = {
+    'char',
+    'double',
+    'float',
+    'int',
+    'long',
+    'main',
+    'signed',
+    'unsigned',
+    'va-list',
+    'void',
+    'GBoxed',
+    'GEnum',
+    'GFlags',
+    'GInterface'
+}
+
+# Cache of dirs we already scanned for index files
+DirCache = {}
+
+
+def Run(options):
+    #logging.basicConfig(level=logging.INFO)
+
+    path_prefix = ''
+    m = re.search(r'(.*?)/share/gtk-doc/html', options.html_dir)
+    if m:
+        path_prefix = m.group(1)
+        logging.info('Path prefix: %s', path_prefix)
+    prefix_match = r'^' + re.escape(path_prefix) + r'/'
+
+    # We scan the directory containing GLib and any directories in GNOME2_PATH
+    # first, but these will be overriden by any later scans.
+    dir = common.GetModuleDocDir('glib-2.0')
+    if os.path.exists(dir):
+        # Some predefined link targets to get links into type hierarchies as these
+        # have no targets. These are always absolute for now.
+        Links['GBoxed'] = dir + '/gobject/gobject-Boxed-Types.html'
+        Links['GEnum'] = dir + '/gobject/gobject-Enumeration-and-Flag-Types.html'
+        Links['GFlags'] = dir + '/gobject/gobject-Enumeration-and-Flag-Types.html'
+        Links['GInterface'] = dir + '/gobject/GTypeModule.html'
+
+        if dir != options.html_dir:
+            logging.info('Scanning GLib directory: %s', dir)
+            ScanIndices(dir, (re.search(prefix_match, dir) is None))
+
+    path = os.environ.get('GNOME2_PATH')
+    if path:
+        for dir in path.split(':'):
+            dir += '/share/gtk-doc/html'
+            if os.path.exists(dir) and dir != options.html_dir:
+                logging.info('Scanning GNOME2_PATH directory: %s', dir)
+                ScanIndices(dir, (re.search(prefix_match, dir) is None))
+
+    logging.info('Scanning HTML_DIR directory: %s', options.html_dir)
+    ScanIndices(options.html_dir, 0)
+    logging.info('Scanning MODULE_DIR directory: %s', options.module_dir)
+    ScanIndices(options.module_dir, 0)
+
+    # check all extra dirs, but skip already scanned dirs or subdirs of those
+    for dir in options.extra_dir:
+        dir = dir.rstrip('/')
+        logging.info('Scanning EXTRA_DIR directory: %s', dir)
+
+        # If the --extra-dir option is not relative and is not sharing the same
+        # prefix as the target directory of the docs, we need to use absolute
+        # directories for the links
+        if not dir.startswith('..') and re.search(prefix_match, dir) is None:
+            ScanIndices(dir, 1)
+        else:
+            ScanIndices(dir, 0)
+
+    ReadSections(options)
+    FixCrossReferences(options)
+
+
+def ScanIndices(scan_dir, use_absolute_links):
+    if not scan_dir or scan_dir in DirCache:
+        return
+    DirCache[scan_dir] = 1
+
+    logging.info('Scanning index directory: %s, absolute: %d', scan_dir, use_absolute_links)
+
+    # This array holds any subdirectories found.
+    subdirs = []
+
+    # TODO(ensonic): this code is the same as in rebase.py
+    for entry in os.listdir(scan_dir):
+        full_entry = os.path.join(scan_dir, entry)
+        if os.path.isdir(full_entry):
+            subdirs.append(full_entry)
+            continue
+
+        if entry.endswith('.devhelp2'):
+            # if devhelp-file is good don't read index.sgml
+            ReadDevhelp(full_entry, use_absolute_links)
+        elif entry == "index.sgml.gz" and not os.path.exists(os.path.join(scan_dir, 'index.sgml')):
+            # debian/ubuntu started to compress this as index.sgml.gz :/
+            print(''' Please fix https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/77138 . For now run:
+gunzip %s
+''' % full_entry)
+        elif entry.endswith('.devhelp2.gz') and not os.path.exists(full_entry[:-3]):
+            # debian/ubuntu started to compress this as *devhelp2.gz :/
+            print('''Please fix https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/1466210 . For now run:
+gunzip %d
+''' % full_entry)
+        # we could consider supporting: gzip module
+
+    # Now recursively scan the subdirectories.
+    for subdir in subdirs:
+        ScanIndices(subdir, use_absolute_links)
+
+
+def ReadDevhelp(file, use_absolute_links):
+    # Determine the absolute directory, to be added to links in $file
+    # if we need to use an absolute link.
+    # $file will be something like /prefix/gnome/share/gtk-doc/html/gtk/$file
+    # We want the part up to 'html/.*' since the links in $file include
+    # the rest.
+    dir = "../"
+    if use_absolute_links:
+        # For uninstalled index files we'd need to map the path to where it
+        # will be installed to
+        if not file.startswith('./'):
+            m = re.search(r'(.*\/)(.*?)\/.*?\.devhelp2', file)
+            dir = m.group(1) + m.group(2) + '/'
+    else:
+        m = re.search(r'(.*\/)(.*?)\/.*?\.devhelp2', file)
+        if m:
+            dir += m.group(2) + '/'
+        else:
+            dir = ''
+
+    logging.info('Scanning index file=%s, absolute=%d, dir=%s', file, use_absolute_links, dir)
+
+    for line in open(file):
+        m = re.search(r' link="([^#]*)#([^"]*)"', line)
+        if m:
+            link = m.group(1) + '#' + m.group(2)
+            logging.debug('Found id: %s href: %s', m.group(2), link)
+            Links[m.group(2)] = dir + link
+
+
+def ReadSections(options):
+    for line in open(options.module + '-sections.txt'):
+        m1 = re.search('^<SUBSECTION\s*(.*)>', line)
+        if line.startswith('#') or line.strip() == '':
+            continue
+        elif line.startswith('<SECTION>'):
+            subsection = ''
+        elif m1:
+            subsection = m1.group(1)
+        elif line.startswith('<SUBSECTION>') or line.startswith('<\/SECTION>'):
+            continue
+        elif re.search(r'^<TITLE>(.*)<\/TITLE>', line):
+            continue
+        elif re.search(r'^<FILE>(.*)<\/FILE>', line):
+            continue
+        elif re.search(r'^<INCLUDE>(.*)<\/INCLUDE>', line):
+            continue
+        else:
+            symbol = line.strip()
+            if subsection == "Standard" or subsection == "Private":
+                NoLinks.add(common.CreateValidSGMLID(symbol))
+
+
+def FixCrossReferences(options):
+    scan_dir = options.module_dir
+    # TODO(ensonic): use glob.glob()?
+    for entry in os.listdir(scan_dir):
+        full_entry = os.path.join(scan_dir, entry)
+        if os.path.isdir(full_entry):
+            continue
+        elif entry.endswith('.html') or entry.endswith('.htm'):
+            FixHTMLFile(options, full_entry)
+
+
+def FixHTMLFile(options, file):
+    logging.info('Fixing file: %s', file)
+
+    content = open(file).read()
+
+    if config.highlight:
+        # FIXME: ideally we'd pass a clue about the example language to the highligher
+        # unfortunately the "language" attribute is not appearing in the html output
+        # we could patch the customization to have <code class="xxx"> inside of <pre>
+        if config.highlight.endswith('vim'):
+            def repl_func(m):
+                return HighlightSourceVim(options, m.group(1), m.group(2))
+            content = re.sub(
+                r'<div class=\"(example-contents|informalexample)\"><pre 
class=\"programlisting\">(.*?)</pre></div>',
+                repl_func, content, flags=re.DOTALL)
+        else:
+            def repl_func(m):
+                return HighlightSource(options, m.group(1), m.group(2))
+            content = re.sub(
+                r'<div class=\"(example-contents|informalexample)\"><pre 
class=\"programlisting\">(.*?)</pre></div>',
+                repl_func, content, flags=re.DOTALL)
+
+        content = re.sub(r'\&lt;GTKDOCLINK\s+HREF=\&quot;(.*?)\&quot;\&gt;(.*?)\&lt;/GTKDOCLINK\&gt;',
+                         r'\<GTKDOCLINK\ HREF=\"\1\"\>\2\</GTKDOCLINK\>', content, flags=re.DOTALL)
+
+        # From the highlighter we get all the functions marked up. Now we can turn them into GTKDOCLINK items
+        def repl_func(m):
+            return MakeGtkDocLink(m.group(1), m.group(2), m.group(3))
+        content = re.sub(r'(<span class=\"function\">)(.*?)(</span>)', repl_func, content, flags=re.DOTALL)
+        # We can also try the first item in stuff marked up as 'normal'
+        content = re.sub(
+            r'(<span class=\"normal\">\s*)(.+?)((\s+.+?)?\s*</span>)', repl_func, content, flags=re.DOTALL)
+
+    lines = content.rstrip().split('\n')
+
+    def repl_func_with_ix(i):
+        def repl_func(m):
+            return MakeXRef(options, file, i + 1, m.group(1), m.group(2))
+        return repl_func
+
+    for i in range(len(lines)):
+        lines[i] = re.sub(r'<GTKDOCLINK\s+HREF="([^"]*)"\s*>(.*?)</GTKDOCLINK\s*>', repl_func_with_ix(i), 
lines[i])
+        if 'GTKDOCLINK' in lines[i]:
+            logging.info('make xref failed for line %d: "%s"', i, lines[i])
+
+    new_file = file + '.new'
+    open(new_file, 'w').write('\n'.join(lines))
+
+    os.unlink(file)
+    os.rename(new_file, file)
+
+
+def MakeXRef(options, file, line, id, text):
+    href = Links.get(id)
+
+    # This is a workaround for some inconsistency we have with CreateValidSGMLID
+    if not href and ':' in id:
+        href = Links.get(id.replace(':', '--'))
+    # poor mans plural support
+    if not href and id.endswith('s'):
+        tid = id[:-1]
+        href = Links.get(tid)
+        if not href:
+            href = Links.get(tid + '-struct')
+    if not href:
+        href = Links.get(id + '-struct')
+
+    if href:
+        # if it is a link to same module, remove path to make it work uninstalled
+        m = re.search(r'^\.\./' + options.module + '/(.*)$', href)
+        if m:
+            href = m.group(1)
+            logging.info('Fixing link to uninstalled doc: %s, %s, %s', id, href, text)
+        else:
+            logging.info('Fixing link: %s, %s, %s', id, href, text)
+        return "<a href=\"%s\">%s</a>" % (href, text)
+    else:
+        logging.info('no link for: %s, %s', id, text)
+
+        # don't warn multiple times and also skip blacklisted (ctypes)
+        if id in NoLinks:
+            return text
+        # if it's a function, don't warn if it does not contain a "_"
+        # (transformed to "-")
+        # - gnome coding style would use '_'
+        # - will avoid wrong warnings for ansi c functions
+        if re.search(r' class=\"function\"', text) and '-' not in id:
+            return text
+        # if it's a 'return value', don't warn (implicitly created link)
+        if re.search(r' class=\"returnvalue\"', text):
+            return text
+        # if it's a 'type', don't warn if it starts with lowercase
+        # - gnome coding style would use CamelCase
+        if re.search(r' class=\"type\"', text) and id[0].islower():
+            return text
+        # don't warn for self links
+        if text == id:
+            return text
+
+        common.LogWarning(file, line, 'no link for: "%s" -> (%s).' % (id, text))
+        NoLinks.add(id)
+        return text
+
+
+def MakeGtkDocLink(pre, symbol, post):
+    id = common.CreateValidSGMLID(symbol)
+
+    # these are implicitely created links in highlighed sources
+    # we don't want warnings for those if the links cannot be resolved.
+    NoLinks.add(id)
+
+    return pre + '<GTKDOCLINK HREF="' + id + '">' + symbol + '</GTKDOCLINK>' + post
+
+
+def HighlightSource(options, type, source):
+    source = HighlightSourcePreProcess(source)
+
+    # write source to a temp file
+    # FIXME: use .c for now to hint the language to the highlighter
+    with tempfile.NamedTemporaryFile(suffix='.c') as f:
+        f.write(source)
+        f.flush()
+        temp_source_file = f.name
+        highlight_options = config.highlight_options.replace('$SRC_LANG', options.src_lang)
+
+        logging.info('running %s %s %s', config.highlight, highlight_options, temp_source_file)
+
+        # format source
+        highlighted_source = subprocess.check_output(
+            [config.highlight] + shlex.split(highlight_options) + [temp_source_file])
+        logging.debug('result: [%s]', highlighted_source)
+        if config.highlight.endswith('/source-highlight'):
+            highlighted_source = re.sub(r'^<\!-- .*? -->', '', highlighted_source, flags=re.MULTILINE | 
re.DOTALL)
+            highlighted_source = re.sub(
+                r'<pre><tt>(.*?)</tt></pre>', r'\1', highlighted_source, flags=re.MULTILINE | re.DOTALL)
+        elif config.highlight.endswith('/highlight'):
+            # need to rewrite the stylesheet classes
+            highlighted_source = highlighted_source.replace('<span class="gtkdoc com">', '<span 
class="comment">')
+            highlighted_source = highlighted_source.replace('<span class="gtkdoc dir">', '<span 
class="preproc">')
+            highlighted_source = highlighted_source.replace('<span class="gtkdoc kwd">', '<span 
class="function">')
+            highlighted_source = highlighted_source.replace('<span class="gtkdoc kwa">', '<span 
class="keyword">')
+            highlighted_source = highlighted_source.replace('<span class="gtkdoc line">', '<span 
class="linenum">')
+            highlighted_source = highlighted_source.replace('<span class="gtkdoc num">', '<span 
class="number">')
+            highlighted_source = highlighted_source.replace('<span class="gtkdoc str">', '<span 
class="string">')
+            highlighted_source = highlighted_source.replace('<span class="gtkdoc sym">', '<span 
class="symbol">')
+            # maybe also do
+            # highlighted_source = re.sub(r'</span>(.+)<span', '</span><span class="normal">\1</span><span')
+
+    return HighlightSourcePostprocess(type, highlighted_source)
+
+
+def HighlightSourceVim(options, type, source):
+    source = HighlightSourcePreProcess(source)
+
+    # write source to a temp file
+    with tempfile.NamedTemporaryFile(suffix='.h') as f:
+        f.write(source)
+        f.flush()
+        temp_source_file = f.name
+
+        # format source
+        # TODO(ensonic): use p.communicate()
+        script = "echo 'let html_number_lines=0|let html_use_css=1|let html_use_xhtml=1|e %s|syn on|set 
syntax=%s|run! syntax/2html.vim|w! %s.html|qa!' | " % (
+            temp_source_file, options.src_lang, temp_source_file)
+        script += "%s -n -e -u NONE -T xterm >/dev/null" % config.highlight
+        subprocess.check_call([script], shell=True)
+
+        highlighted_source = open(temp_source_file + ".html").read()
+        highlighted_source = re.sub(r'.*<pre\b[^>]*>\n', '', highlighted_source, flags=re.MULTILINE)
+        highlighted_source = re.sub(r'</pre>.*', '', highlighted_source, flags=re.MULTILINE)
+
+        # need to rewrite the stylesheet classes
+        highlighted_source = highlighted_source.replace('<span class="Comment">', '<span class="comment">')
+        highlighted_source = highlighted_source.replace('<span class="PreProc">', '<span class="preproc">')
+        highlighted_source = highlighted_source.replace('<span class="Statement">', '<span class="keyword">')
+        highlighted_source = highlighted_source.replace('<span class="Identifier">', '<span 
class="function">')
+        highlighted_source = highlighted_source.replace('<span class="Constant">', '<span class="number">')
+        highlighted_source = highlighted_source.replace('<span class="Special">', '<span class="symbol">')
+        highlighted_source = highlighted_source.replace('<span class="Type">', '<span class="type">')
+
+        # remove temp files
+        os.unlink(temp_source_file + '.html')
+
+    return HighlightSourcePostprocess(type, highlighted_source)
+
+
+def HighlightSourcePreProcess(source):
+    # chop of leading and trailing empty lines, leave leading space in first real line
+    source = source.strip(' ')
+    source = source.strip('\n')
+    source = source.rstrip()
+
+    # cut common indent
+    m = re.search(r'^(\s+)', source)
+    if m:
+        source = re.sub(r'^' + m.group(1), '', source, flags=re.MULTILINE)
+    # avoid double entity replacement
+    source = source.replace('&lt;', '<')
+    source = source.replace('&gt;', '>')
+    source = source.replace('&amp;', '&')
+    return source
+
+
+def HighlightSourcePostprocess(type, highlighted_source):
+    # chop of leading and trailing empty lines
+    highlighted_source = highlighted_source.strip()
+
+    # turn common urls in comments into links
+    highlighted_source = re.sub(r'<span class="url">(.*?)</span>',
+                                r'<span class="url"><a href="\1">\1</a></span>',
+                                highlighted_source, flags=re.DOTALL)
+
+    # we do own line-numbering
+    line_count = highlighted_source.count('\n')
+    source_lines = '\n'.join([str(i) for i in range(1, line_count + 2)])
+
+    return """<div class="%s">
+  <table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
+    <tbody>
+      <tr>
+        <td class="listing_lines" align="right"><pre>%s</pre></td>
+        <td class="listing_code"><pre class="programlisting">%s</pre></td>
+      </tr>
+    </tbody>
+  </table>
+</div>
+""" % (type, source_lines, highlighted_source)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 904e19a..fea2c88 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -5,7 +5,7 @@ SUBDIRS = gobject bugs annotations fail empty program .
 if BUILD_TESTS
 
 TESTS = \
-  gtkdoc-common.t gtkdoc-fixxref.t gtkdoc-mkdb.t \
+  gtkdoc-common.t gtkdoc-mkdb.t \
   gtkdoc-check.py gtkdoc-common.py \
   tools.sh gobject.sh bugs.sh annotations.sh fail.sh empty.sh sanity.sh \
   program.sh



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]