[devdocsgjs/wip/andyholmes/gjs-repository-docs] DevDocs: add scraper for GJS's in-tree documentation




commit 775ba6b4e587291e6e4026c9347aa6a37a9ee407
Author: Andy Holmes <andrew g r holmes gmail com>
Date:   Sun Aug 14 15:12:43 2022 -0700

    DevDocs: add scraper for GJS's in-tree documentation
    
    This adds a scraper that piggy-backs on GitHub's MD-to-HTML feature to
    add documentation for the in-tree documentation for GJS.

 Dockerfile                                 |  3 ++
 lib/docs/filters/gjs_scraper/clean_html.rb | 11 +++++++
 lib/docs/filters/gjs_scraper/entries.rb    | 53 ++++++++++++++++++++++++++++++
 lib/docs/scrapers/gnome/gjs_scraper.rb     | 43 ++++++++++++++++++++++++
 4 files changed, 110 insertions(+)
---
diff --git a/Dockerfile b/Dockerfile
index 8ecaad7c..ff49509b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -56,6 +56,9 @@ WORKDIR /opt/devdocs
 RUN bundle config set --local deployment 'true' && \
     bundle install
 
+# GJS documentation
+RUN bundle exec thor docs:generate gjs_scraper --force
+
 # Generate scrapers
 RUN bundle exec thor gir:generate_all /usr/share/gir-1.0 && \
     bundle exec thor gir:generate_all /usr/lib64/mutter-3 && \
diff --git a/lib/docs/filters/gjs_scraper/clean_html.rb b/lib/docs/filters/gjs_scraper/clean_html.rb
new file mode 100644
index 00000000..857c191c
--- /dev/null
+++ b/lib/docs/filters/gjs_scraper/clean_html.rb
@@ -0,0 +1,11 @@
+module Docs
+  class GjsScraper
+    # CleanHtmlFilter for GJS's in-tree documentation. This is currently empty,
+    # but could be used to make adjustments to the HTML.
+    class CleanHtmlFilter < Filter
+      def call
+        doc
+      end
+    end
+  end
+end
diff --git a/lib/docs/filters/gjs_scraper/entries.rb b/lib/docs/filters/gjs_scraper/entries.rb
new file mode 100644
index 00000000..34eba637
--- /dev/null
+++ b/lib/docs/filters/gjs_scraper/entries.rb
@@ -0,0 +1,53 @@
+module Docs
+  class GjsScraper
+    class EntriesFilter < Docs::EntriesFilter
+      SKIP_SYMBOLS = [
+        'ECMAScript Modules'
+      ]
+
+      # "name" is the title of the entry in the sidebar.
+      def get_name
+        node = at_css('h1')
+        name = node.content.strip
+      end
+
+      # "type" is the expandable group the "name" is under.
+      def get_type
+        node = at_css('h1')
+        type = node.content.strip
+      end
+
+      def strip_symbol (node, entry)
+        # If this is a function (e.g. "baz()"), strip down to the member
+        # name to get a reasonable sidebar entry and fragment link
+        func_match = /(.*)(?=\(.*\))/.match(entry[0])
+        if func_match
+          entry[0] = func_match[1] + '()'
+          entry[1] = node['id'] = func_match[1].parameterize
+        end
+      end
+
+      def additional_entries
+        # skip README.md
+        return [] if root_page?
+
+        css('h2, h3').each_with_object [] do |node, entries|
+          # skip `###` entries for pages that don't have symbols
+          next if node.name == 'h3' and SKIP_SYMBOLS.include? type
+          
+          # common filtering
+          name = node.content.strip
+          id = node['id'] = name.parameterize
+          type = self.get_type
+          entry = [name, id, type]
+            
+          if node.name == 'h3'
+            strip_symbol node, entry
+          end
+          
+          entries << entry
+        end
+      end
+    end
+  end
+end
diff --git a/lib/docs/scrapers/gnome/gjs_scraper.rb b/lib/docs/scrapers/gnome/gjs_scraper.rb
new file mode 100644
index 00000000..4be5b590
--- /dev/null
+++ b/lib/docs/scrapers/gnome/gjs_scraper.rb
@@ -0,0 +1,43 @@
+module Docs
+  class GjsScraper < Github
+    self.name = 'GJS'
+    self.base_url = 'https://github.com/GNOME/gjs/blob/master/doc/'
+    self.root_path = 'README.md'
+    self.initial_paths = %w[
+      ByteArray
+      cairo
+      Console
+      Encoding
+      Environment
+      ESModules
+      Format
+      Gettext
+      Lang
+      Logging
+      Mainloop
+      Mapping
+      Overrides
+      Profiling
+      Signals
+      System
+      Testing
+      Timers
+    ].map { |name| name + '.md' }
+
+    html_filters.push 'gjs_scraper/entries', 'gjs_scraper/clean_html'
+
+    options[:container] = '.markdown-body'
+    options[:title] = 'GJS'
+    options[:skip_links] = true
+
+    options[:attribution] = <<-HTML
+      &copy; 2022 GJS Contributors<br>
+      Licensed under the MIT License.
+    HTML
+
+    def get_latest_version(opts)
+      tags = get_gitlab_tags('gitlab.gnome.org', 'GNOME', 'gjs', opts)
+      tags[0]['name']
+    end
+  end
+end


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]