[library-web] move sitemap.xml and robots.txt generation to their own xsl files



commit 678924fcec43ae353fa36f5fe0d917c4a1ac926f
Author: Frédéric Péters <fpeters 0d be>
Date:   Sat Mar 19 19:40:20 2011 +0100

    move sitemap.xml and robots.txt generation to their own xsl files

 data/xslt/indexes.xsl |   85 -----------------------------------------------
 data/xslt/robots.xsl  |   58 ++++++++++++++++++++++++++++++++
 data/xslt/sitemap.xsl |   88 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/lgo.py            |   24 ++++++++++---
 4 files changed, 165 insertions(+), 90 deletions(-)
---
diff --git a/data/xslt/indexes.xsl b/data/xslt/indexes.xsl
index 9c59d67..c8a7934 100644
--- a/data/xslt/indexes.xsl
+++ b/data/xslt/indexes.xsl
@@ -23,7 +23,6 @@ along with libgo; if not, write to the Free Software Foundation, Inc.,
                 xmlns:exsl="http://exslt.org/common";
                 xmlns:html="http://www.w3.org/1999/xhtml";
                 extension-element-prefixes="exsl"
-                xsl:exclude-result-prefixes="sitemap"
                 version="1.0">
 
 
@@ -826,88 +825,6 @@ div.body-sidebar { width: 100%; }
 
   </xsl:template>
 
-  <!-- Google sitemap stuff -->
-  <xsl:template match="document" mode="sitemap">
-    <xsl:if test="@path"> <!-- only local documents -->
-      <url xmlns="http://www.google.com/schemas/sitemap/0.84";>
-        <loc>http://library.gnome.org<xsl:value-of select="@path"
-          />index.html.<xsl:value-of select="@lang"/></loc>
-        <changefreq>daily</changefreq>
-	<priority>0.7</priority>
-      </url>
-    </xsl:if>
-  </xsl:template>
-
-  <xsl:template match="index" mode="sitemap">
-    <xsl:param name="channel" select="@channel"/>
-    <xsl:param name="lang" select="@lang"/>
-    <xsl:param name="filename">
-      <xsl:choose>
-        <xsl:when test="@id"><xsl:value-of select="@id"/></xsl:when>
-        <xsl:otherwise>index</xsl:otherwise>
-      </xsl:choose>
-    </xsl:param>
-
-    <url xmlns="http://www.google.com/schemas/sitemap/0.84";>
-      <loc>http://library.gnome.org/<xsl:value-of select="@channel"
-        />/<xsl:value-of select="$filename"
-        />.html.<xsl:value-of select="@lang"/></loc>
-      <priority>0.9</priority>
-      <changefreq>daily</changefreq>
-    </url>
-    <xsl:apply-templates select="index" mode="sitemap"/>
-    <xsl:apply-templates select="section/document[ lang = $lang]" mode="sitemap"/>
-  </xsl:template>
-
-  <xsl:template match="home" mode="sitemap">
-    <xsl:param name="lang" select="@lang"/>
-    <url xmlns="http://www.google.com/schemas/sitemap/0.84";>
-      <loc>http://library.gnome.org/index.html.<xsl:value-of select="$lang"/></loc>
-      <priority>1.0</priority>
-      <changefreq>daily</changefreq>
-    </url>
-    <xsl:apply-templates select="../index[ lang = $lang]" mode="sitemap"/>
-  </xsl:template>
-
-  <xsl:template match="indexes" mode="sitemap">
-    <exsl:document href="sitemap.xml" method="xml" indent="yes" encoding="UTF-8">
-      <urlset xmlns="http://www.google.com/schemas/sitemap/0.84";
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-        xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84
-        http://www.google.com/schemas/sitemap/0.84/sitemap.xsd";>
-          <xsl:apply-templates select="home" mode="sitemap"/>
-      </urlset>
-    </exsl:document>
-  </xsl:template>
-
-
-  <!-- robots.txt stuff -->
-
-  <xsl:template match="document" mode="robotstxt">
-    <xsl:if test="versions">
-      <xsl:variable name="basedir">/<xsl:value-of  select="@channel"
-      />/<xsl:value-of select="@modulename" />/</xsl:variable>
-      <xsl:for-each select="versions/version">
-        <xsl:text>Disallow: </xsl:text>
-	<xsl:value-of select="$basedir"/><xsl:value-of select="@href"/>
-	<xsl:text>/
-</xsl:text>
-      </xsl:for-each>
-    </xsl:if>
-  </xsl:template>
-
-  <xsl:template match="indexes" mode="robotstxt">
-    <xsl:param name="onelang"><xsl:value-of select="index[position() = 1]/@lang"/></xsl:param>
-    <xsl:message>Writing robots.txt</xsl:message>
-    <exsl:document href="robots.txt" method="text" encoding="UTF-8">
-      <xsl:text># don't let robot index all versions of documents
-# see http://bugzilla.gnome.org/show_bug.cgi?id=509424
-User-agent: *
-</xsl:text>
-      <xsl:apply-templates select="index[ lang = $onelang and @channel != 'misc']//document" mode="robotstxt"/>
-    </exsl:document>
-  </xsl:template>
-
   <!-- JavaScript related translations -->
   <xsl:template match="home" mode="javascript">
     <xsl:param name="lang" select="@lang"/>
@@ -938,8 +855,6 @@ var remove_cookie_text = "<xsl:value-of select="$remove_cookie"/>";
       </xsl:apply-templates>
     </xsl:for-each>
     <xsl:apply-templates select="." mode="nightly"/>
-    <xsl:apply-templates select="." mode="sitemap"/>
-    <xsl:apply-templates select="." mode="robotstxt"/>
     <xsl:apply-templates select="home" mode="javascript"/>
   </xsl:template>
 
diff --git a/data/xslt/robots.xsl b/data/xslt/robots.xsl
new file mode 100644
index 0000000..ce7da42
--- /dev/null
+++ b/data/xslt/robots.xsl
@@ -0,0 +1,58 @@
+<?xml version='1.0' encoding='UTF-8'?><!-- -*- indent-tabs-mode: nil -*- -->
+<!--
+Copyright (c) 2011 Frederic Peters <fpeters 0d be>
+
+This file is part of libgo.
+
+libgo is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+libgo is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with libgo; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-->
+
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
+                xmlns:exsl="http://exslt.org/common";
+                extension-element-prefixes="exsl"
+                version="1.0">
+
+  <!-- robots.txt stuff -->
+
+  <xsl:template match="document" mode="robotstxt">
+    <xsl:if test="versions">
+      <xsl:variable name="basedir">/<xsl:value-of  select="@channel"
+      />/<xsl:value-of select="@modulename" />/</xsl:variable>
+      <xsl:for-each select="versions/version">
+        <xsl:text>Disallow: </xsl:text>
+	<xsl:value-of select="$basedir"/><xsl:value-of select="@href"/>
+	<xsl:text>/
+</xsl:text>
+      </xsl:for-each>
+    </xsl:if>
+  </xsl:template>
+
+  <xsl:template match="indexes" mode="robotstxt">
+    <xsl:param name="onelang"><xsl:value-of select="index[position() = 1]/@lang"/></xsl:param>
+    <xsl:message>Writing robots.txt</xsl:message>
+    <exsl:document href="robots.txt" method="text" encoding="UTF-8">
+      <xsl:text># don't let robot index all versions of documents
+# see http://bugzilla.gnome.org/show_bug.cgi?id=509424
+User-agent: *
+</xsl:text>
+      <xsl:apply-templates select="index[ lang = $onelang and @channel != 'misc']//document" mode="robotstxt"/>
+    </exsl:document>
+  </xsl:template>
+
+  <xsl:template match="indexes">
+    <xsl:apply-templates select="." mode="robotstxt"/>
+  </xsl:template>
+
+</xsl:stylesheet>
diff --git a/data/xslt/sitemap.xsl b/data/xslt/sitemap.xsl
new file mode 100644
index 0000000..a218649
--- /dev/null
+++ b/data/xslt/sitemap.xsl
@@ -0,0 +1,88 @@
+<?xml version='1.0' encoding='UTF-8'?><!-- -*- indent-tabs-mode: nil -*- -->
+<!--
+Copyright (c) 2011 Frederic Peters <fpeters 0d be>
+
+This file is part of libgo.
+
+libgo is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+libgo is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with libgo; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-->
+
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
+                xmlns:exsl="http://exslt.org/common";
+                xmlns:html="http://www.w3.org/1999/xhtml";
+                extension-element-prefixes="exsl"
+                xsl:exclude-result-prefixes="sitemap"
+                version="1.0">
+
+
+  <!-- Google sitemap stuff -->
+  <xsl:template match="document" mode="sitemap">
+    <xsl:if test="@path"> <!-- only local documents -->
+      <url xmlns="http://www.google.com/schemas/sitemap/0.84";>
+        <loc>http://library.gnome.org<xsl:value-of select="@path"
+          />index.html.<xsl:value-of select="@lang"/></loc>
+        <changefreq>daily</changefreq>
+	<priority>0.7</priority>
+      </url>
+    </xsl:if>
+  </xsl:template>
+
+  <xsl:template match="index" mode="sitemap">
+    <xsl:param name="channel" select="@channel"/>
+    <xsl:param name="lang" select="@lang"/>
+    <xsl:param name="filename">
+      <xsl:choose>
+        <xsl:when test="@id"><xsl:value-of select="@id"/></xsl:when>
+        <xsl:otherwise>index</xsl:otherwise>
+      </xsl:choose>
+    </xsl:param>
+
+    <url xmlns="http://www.google.com/schemas/sitemap/0.84";>
+      <loc>http://library.gnome.org/<xsl:value-of select="@channel"
+        />/<xsl:value-of select="$filename"
+        />.html.<xsl:value-of select="@lang"/></loc>
+      <priority>0.9</priority>
+      <changefreq>daily</changefreq>
+    </url>
+    <xsl:apply-templates select="index" mode="sitemap"/>
+    <xsl:apply-templates select="section/document[ lang = $lang]" mode="sitemap"/>
+  </xsl:template>
+
+  <xsl:template match="home" mode="sitemap">
+    <xsl:param name="lang" select="@lang"/>
+    <url xmlns="http://www.google.com/schemas/sitemap/0.84";>
+      <loc>http://library.gnome.org/index.html.<xsl:value-of select="$lang"/></loc>
+      <priority>1.0</priority>
+      <changefreq>daily</changefreq>
+    </url>
+    <xsl:apply-templates select="../index[ lang = $lang]" mode="sitemap"/>
+  </xsl:template>
+
+  <xsl:template match="indexes" mode="sitemap">
+    <exsl:document href="sitemap.xml" method="xml" indent="yes" encoding="UTF-8">
+      <urlset xmlns="http://www.google.com/schemas/sitemap/0.84";
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+        xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84
+        http://www.google.com/schemas/sitemap/0.84/sitemap.xsd";>
+          <xsl:apply-templates select="home" mode="sitemap"/>
+      </urlset>
+    </exsl:document>
+  </xsl:template>
+
+  <xsl:template match="indexes">
+    <xsl:apply-templates select="." mode="sitemap"/>
+  </xsl:template>
+
+</xsl:stylesheet>
diff --git a/src/lgo.py b/src/lgo.py
index 9a31b24..876ebad 100755
--- a/src/lgo.py
+++ b/src/lgo.py
@@ -146,6 +146,9 @@ class Lgo(App):
             self.process_nightly_tarballs()
         self.apply_overlay()
         self.generate_indexes()
+        self.generate_html_indexes()
+        self.generate_robots()
+        self.generate_sitemap()
         self.generate_symbols_files()
         self.generate_static_pages()
 
@@ -641,8 +644,6 @@ class Lgo(App):
         tree = ET.ElementTree(indexes)
         tree.write(os.path.join(idx_dirname, 'indexes.xml'))
 
-        self.generate_html_indexes()
-
     def create_section(self, index, section, section_docs, lang):
         section_node = ET.SubElement(index, 'section')
         section_node.set('toc_id', section)
@@ -686,12 +687,11 @@ class Lgo(App):
                 doc.create_element(parent_elem, doc_lang,
                         original_language = lang)
 
-    def generate_html_indexes(self):
+    def generate_from_indexes(self, xsl_filename):
         idx_filename = os.path.join(self.config.private_dir, 'indexes', 'indexes.xml')
 
         cmd = ['xsltproc', '--output', self.config.output_dir,
-                '--nonet', '--xinclude',
-                self.indexes_xsl_file, idx_filename]
+                '--nonet', '--xinclude', xsl_filename, idx_filename]
 
         if self.debug:
             cmd.insert(-2, '--param')
@@ -707,6 +707,20 @@ class Lgo(App):
         if rc != 0:
             logging.warn('%s failed with error %d' % (' '.join(cmd), rc))
 
+    def generate_html_indexes(self):
+        logging.info('generating index.html files')
+        self.generate_from_indexes(self.indexes_xsl_file)
+
+    def generate_robots(self):
+        logging.info('generating robots.txt file')
+        robots_xsl_file = os.path.join(data_dir, 'xslt', 'robots.xsl')
+        self.generate_from_indexes(robots_xsl_file)
+
+    def generate_sitemap(self):
+        logging.info('generating sitemap file')
+        sitemap_xsl_file = os.path.join(data_dir, 'xslt', 'sitemap.xsl')
+        self.generate_from_indexes(sitemap_xsl_file)
+
     def generate_static_pages(self):
         try:
             doc_linguas = re.findall(r'DOC_LINGUAS\s+=[\t ](.*)',



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]