[devdocsgjs/main: 409/1867] Implement working crawling and section building for salt




commit b7075dd51ac8c3c21046fbf293b744e75a19aa60
Author: Boris Bera <bera boris gmail com>
Date:   Tue Sep 25 23:46:15 2018 -0400

    Implement working crawling and section building for salt

 lib/docs/filters/salt_stack/clean_html.rb |  2 ++
 lib/docs/filters/salt_stack/entries.rb    | 19 +++++++++++++++++--
 lib/docs/scrapers/salt_stack.rb           |  7 ++++++-
 3 files changed, 25 insertions(+), 3 deletions(-)
---
diff --git a/lib/docs/filters/salt_stack/clean_html.rb b/lib/docs/filters/salt_stack/clean_html.rb
index ac53a94c..0f084519 100644
--- a/lib/docs/filters/salt_stack/clean_html.rb
+++ b/lib/docs/filters/salt_stack/clean_html.rb
@@ -2,6 +2,8 @@ module Docs
   class SaltStack
     class CleanHtmlFilter < Filter
       def call
+        css('.headerlink').remove
+
         doc
       end
     end
diff --git a/lib/docs/filters/salt_stack/entries.rb b/lib/docs/filters/salt_stack/entries.rb
index dda9871d..51bf17d2 100644
--- a/lib/docs/filters/salt_stack/entries.rb
+++ b/lib/docs/filters/salt_stack/entries.rb
@@ -1,12 +1,27 @@
 module Docs
   class SaltStack
     class EntriesFilter < Docs::EntriesFilter
+      SALT_REF_RGX = /salt\.([^\.]+)\.([^\s]+)/
+
       def get_name
-        at_css('h1').content
+        header = at_css('h1').content
+
+        ref_match = SALT_REF_RGX.match(header)
+        if ref_match
+          ns, mod = ref_match.captures
+          "#{ns}.#{mod}"
+        else
+          header
+        end
       end
 
       def get_type
-        'TODO'
+        type, _ = slug.split('/', 2)
+        type
+      end
+
+      def include_default_entry?
+        !subpath.end_with?('index.html')
       end
     end
   end
diff --git a/lib/docs/scrapers/salt_stack.rb b/lib/docs/scrapers/salt_stack.rb
index 3196a18a..a4c974b2 100644
--- a/lib/docs/scrapers/salt_stack.rb
+++ b/lib/docs/scrapers/salt_stack.rb
@@ -1,9 +1,14 @@
 module Docs
   class SaltStack < UrlScraper
+    self.type = 'salt_stack'
     self.release = '2018.3.2'
     self.base_url = 'https://docs.saltstack.com/en/latest/ref/'
 
-    html_filters.push 'salt_stack/entries', 'salt_stack/clean_html'
+    html_filters.push 'salt_stack/clean_html', 'salt_stack/entries'
+
+    options[:only_patterns] = [
+      %r{[^/]+/all/}
+    ]
 
     options[:container] = '.body-content'
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]