[devdocsgjs/main: 1326/1867] Update Perl documentation (5.32)




commit 3af0bbe37baad3ee9a6e70f1d7e4330c9d7e5302
Author: Phil Scherer <pnscher evoforge org>
Date:   Wed Nov 25 18:36:16 2020 +0000

    Update Perl documentation (5.32)

 .../javascripts/templates/pages/about_tmpl.coffee  |  2 +-
 assets/stylesheets/pages/_perl.scss                |  6 +-
 docs/file-scrapers.md                              |  2 -
 lib/docs/filters/perl/clean_html.rb                | 44 ++--------
 lib/docs/filters/perl/entries.rb                   | 99 ++++++++++++++++------
 lib/docs/filters/perl/pre_clean_html.rb            | 17 ++++
 lib/docs/scrapers/perl.rb                          | 44 +++++++---
 7 files changed, 134 insertions(+), 80 deletions(-)
---
diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee 
b/assets/javascripts/templates/pages/about_tmpl.coffee
index 913e00dd..4f1ea1aa 100644
--- a/assets/javascripts/templates/pages/about_tmpl.coffee
+++ b/assets/javascripts/templates/pages/about_tmpl.coffee
@@ -578,7 +578,7 @@ credits = [
     'https://raw.githubusercontent.com/pydata/pandas/master/LICENSE'
   ], [
     'Perl',
-    '1993-2016 Larry Wall and others',
+    '1993-2020 Larry Wall and others',
     'GPLv1',
     'https://perldoc.perl.org/index-licence.html'
   ], [
diff --git a/assets/stylesheets/pages/_perl.scss b/assets/stylesheets/pages/_perl.scss
index 57be0c84..777eac8e 100644
--- a/assets/stylesheets/pages/_perl.scss
+++ b/assets/stylesheets/pages/_perl.scss
@@ -1,5 +1,9 @@
 ._perl {
   @extend %simple;
 
-  > h4 { @extend %block-label; }
+  dt + dt { margin-top: 1em; }
+
+  > dl > dt  { @extend %block-label; }
+  > dl > dt.function { @extend %label-blue; }
+  > dl > dt.variable { @extend %label-green; }
 }
diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md
index 4ad4fff0..33a58145 100644
--- a/docs/file-scrapers.md
+++ b/docs/file-scrapers.md
@@ -128,8 +128,6 @@ bsdtar --extract --to-stdout --file openjdk-8-doc_8u272-b10-1_all.deb data.tar.x
 bsdtar --extract --xz --file - --strip-components=6 --directory=docs/openjdk\~8/ 
./usr/share/doc/openjdk-8-jre-headless/api/
 ```
 
-## Perl
-
 ## PHP
 
 ## Python
diff --git a/lib/docs/filters/perl/clean_html.rb b/lib/docs/filters/perl/clean_html.rb
index 11ae9b15..4230f661 100644
--- a/lib/docs/filters/perl/clean_html.rb
+++ b/lib/docs/filters/perl/clean_html.rb
@@ -2,49 +2,21 @@ module Docs
   class Perl
     class CleanHtmlFilter < Filter
       def call
-        root_page? ? root : other
-        doc
-      end
-
-      def root
-        doc.inner_html = '<h1>Perl 5 Documentation</h1>'
-      end
-
-      def other
-        @doc = at_css('#content_body')
-
-        css('noscript', '#recent_pages', '#from_search', '#page_index', '.mod_az_list').remove
-
         css('h1, h2, h3, h4').each do |node|
           node.name = node.name.sub(/\d/) { |i| i.to_i + 1 }
         end
 
-        at_css('h2').name = 'h1'
-
-        css('a[name] + h2', 'a[name] + h3', 'a[name] + h4', 'a[name] + h5').each do |node|
-          node['id'] = node.previous_element['name']
-        end
-
-        css('li > a[name]').each do |node|
-          node.parent['id'] = node['name']
-        end
-
-        css('pre').each do |node|
-          node.css('li').each do |li|
-            li.content = li.content + "\n"
-          end
+        css('pre > code').each do |node|
+          node.parent['data-language'] = 'perl'
           node.content = node.content
-          node.inner_html = node.inner_html.strip_heredoc
-          node['data-language'] = 'perl'
         end
 
-        if slug =~ /functions/ || slug == 'perlvar'
-          css('ul > li[id]').each do |node|
-            heading = node.at_css('b')
-            heading.name = 'h2'
-            heading['id'] = node['id']
-            node.parent.before(node.children)
-            node.remove
+        css('dl > dt').each do |node|
+          case slug
+          when 'perlfunc'
+            node['class'] = 'function'
+          when 'perlvar'
+            node['class'] = 'variable'
           end
         end
 
diff --git a/lib/docs/filters/perl/entries.rb b/lib/docs/filters/perl/entries.rb
index 505e9b15..1fbf6637 100644
--- a/lib/docs/filters/perl/entries.rb
+++ b/lib/docs/filters/perl/entries.rb
@@ -2,54 +2,101 @@ module Docs
   class Perl
     class EntriesFilter < Docs::EntriesFilter
       REPLACE_TYPES = {
-        'Platform specific' => 'Platform Specific',
-        'Internals and C language interface' => 'Internals',
+        'Platform-Specific' => 'Platform Specific',
+        'Internals and C Language Interface' => 'Internals',
+        'Tutorials' => 'Manual: Tutorials',
+        'Overview' => 'Manual: Overview'
+      }
+
+      # Individual pages within the Perl documentation are missing all context
+      # for anything even resembling a 'type'. So we're going to grab it
+      # elsewhere with a neat trick: dynamically generate a map from a few
+      # ~index~ pages at runtime which is then referenced on future pages.
+      # Prepopulate w/ edge cases
+      TYPES = {
+        'pod2man' => 'Utilities',
+        'pod2text' => 'Utilities',
+        'encguess' => 'Utilities',
+        'streamzip' => 'Utilities',
+        'pl2pm' => 'Utilities',
 
+        'perl' => 'Manual: Overview',
+        'perldoc' => 'Manual: Overview',
+        'perlintro' => 'Manual: Overview',
         'perlop' => 'Operators',
         'perlvar' => 'Variables',
-        'Functions' => 'Functions'
+        'perlref' => 'Reference Manual',
+        'modules' => 'Standard Modules',
+        'perlutil' => 'Utilities',
+
+        'warnings' => 'Pragmas',
+        'strict' => 'Pragmas',
+
+        'Pod::Text::Overstrike' => 'Standard Modules',
+        'Test2::EventFacet::Hub' => 'Standard Modules'
       }
 
-      MANUAL_TYPES = %w(Overview Tutorials FAQs)
+      def call
+        case slug
+        when 'perl'
+          css('h2').each do |heading|
+            heading.next_element.css('a').each do |node|
+              TYPES[node.content] = heading.content
+            end
+          end
 
-      def breadcrumbs
-        @breadcrumbs ||= at_css('#breadcrumbs').content.split('>').each { |s| s.strip! }
-      end
+        when 'modules'
+          node = at_css('#Pragmatic-Modules')
+          node = node.next_element while node.name != 'ul'
+          node.css('li').each do |n|
+            TYPES[n.at_css('a').content] = 'Pragmas'
+          end
 
-      def include_default_entry?
-        slug !~ /\Aindex/
+          node = at_css('#Standard-Modules')
+          node = node.next_element while node.name != 'ul'
+          node.css('li').each do |n|
+            TYPES[n.at_css('a').content] = 'Standard Modules'
+          end
+
+        when 'perlutil'
+          css('dl > dt').each do |node|
+            TYPES[node['id']] = "Utilities"
+          end
+        end
+
+        super
       end
 
       def get_name
-        at_css('h1').content.strip
+        slug
       end
 
       def get_type
-        case breadcrumbs[1]
-        when 'Language reference'
-          REPLACE_TYPES[breadcrumbs[2]] || 'Language'
-        when /\ACore modules/
-          'Core Modules'
+        case slug
+        when /perl.*faq/
+          'Manual: FAQs'
         else
-          type = REPLACE_TYPES[breadcrumbs[1]] || breadcrumbs[1]
-          type.prepend 'Manual: ' if MANUAL_TYPES.include?(type)
-          type
+          if TYPES.key? name
+            REPLACE_TYPES[TYPES[name]] || TYPES[name]
+          else
+            'Other'
+          end
         end
       end
 
       def additional_entries
         case slug
+        when 'perlfunc'
+          css(':not(p) + dl > dt').each_with_object [] do |node, entries|
+            entries << [node.content, node['id'], 'Functions']
+          end
         when 'perlop'
-          css('h2').map do |node|
-            name = node.content
-            id = node.previous_element['name']
-            [name, id]
+          css('h2').each_with_object [] do |node, entries|
+            entries << [node.content, node['id'], 'Operators']
           end
         when 'perlvar'
-          css('#content_body > ul > li > b').map do |node|
-            name = node.content
-            id = node.previous_element['name']
-            [name, id]
+          css('> dl > dt').each_with_object [] do |node, entries|
+            entries << [node.content, node['id'], 'Variables']
           end
         else
           []
diff --git a/lib/docs/filters/perl/pre_clean_html.rb b/lib/docs/filters/perl/pre_clean_html.rb
new file mode 100755
index 00000000..957bd431
--- /dev/null
+++ b/lib/docs/filters/perl/pre_clean_html.rb
@@ -0,0 +1,17 @@
+module Docs
+  class Perl
+    class PreCleanHtmlFilter < Filter
+      def call
+        css('#links', '.leading-notice', '.permalink').remove
+
+        # Bug somewhere prevents these two ids from loading
+        if slug == 'perlvar'
+          at_css('#\$\"')['id'] = '$ls'
+          at_css('#\$\#')['id'] = '$hash'
+        end
+
+        doc
+      end
+    end
+  end
+end
diff --git a/lib/docs/scrapers/perl.rb b/lib/docs/scrapers/perl.rb
index 8c0462e5..90844c47 100644
--- a/lib/docs/scrapers/perl.rb
+++ b/lib/docs/scrapers/perl.rb
@@ -1,46 +1,62 @@
 module Docs
-  class Perl < FileScraper
+  class Perl < UrlScraper
     self.name = 'Perl'
     self.type = 'perl'
-    self.root_path = 'index.html'
+#    self.root_path = 'index.html'
+    self.initial_paths = ['modules.html', 'perlutil.html', 'perl.html']
     self.links = {
       home: 'https://www.perl.org/'
     }
 
-    html_filters.push 'perl/entries', 'perl/clean_html'
+    html_filters.push 'perl/pre_clean_html', 'perl/entries', 'perl/clean_html', 'title'
+
+    options[:container] = '#perldocdiv'
 
     options[:skip] = %w(
-      preferences.html
-      perlartistic.html
-      perlgpl.html
-      perlhist.html
-      perltodo.html )
+      perlbook perlcommunity perlexperiment perlartistic perlgpl perlhist
+      perlcn perljp perlko perltw
+      perlboot perlbot perlrepository perltodo perltooc perltoot )
 
-    options[:skip_patterns] = [/\.pdf/, /delta\.html/]
+    options[:skip_patterns] = [/\Afunctions/, /\Avariables/, /\.pdf/, /delta/]
 
     options[:attribution] = <<-HTML
-      &copy; 1993&ndash;2016 Larry Wall and others<br>
+      &copy; 1993&ndash;2020 Larry Wall and others<br>
       Licensed under the GNU General Public License version 1 or later, or the Artistic License.<br>
       The Perl logo is a trademark of the Perl Foundation.
     HTML
 
+    version '5.32' do
+      self.release = '5.32.0'
+      self.base_url = "https://perldoc.perl.org/#{self.release}/";
+    end
+
+    version '5.30' do
+      self.release = '5.30.3'
+      self.base_url = "https://perldoc.perl.org/#{self.release}/";
+    end
+
+    version '5.28' do
+      self.release = '5.28.3'
+      self.base_url = "https://perldoc.perl.org/#{self.release}/";
+    end
+
     version '5.26' do
-      self.release = '5.26.0'
+      self.release = '5.26.3'
       self.base_url = "https://perldoc.perl.org/#{self.release}/";
     end
 
     version '5.24' do
-      self.release = '5.24.0'
+      self.release = '5.24.4'
       self.base_url = "https://perldoc.perl.org/#{self.release}/";
     end
 
     version '5.22' do
-      self.release = '5.22.0'
+      self.release = '5.22.4'
       self.base_url = "https://perldoc.perl.org/#{self.release}/";
     end
 
     version '5.20' do
-      self.release = '5.20.2'
+      self.release = '5.20.3'
       self.base_url = "https://perldoc.perl.org/#{self.release}/";
     end
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]