[devdocsgjs/main: 1642/1867] Fix mdn scrapers




commit 3cf987da8ba70d252af6384df283fbdf0f631212
Author: Enoc <brianhernandez222 hotmail com>
Date:   Wed May 5 21:43:58 2021 -0600

    Fix mdn scrapers
    
    - xslt_xpath had an error with redirection.

 lib/docs/filters/xslt_xpath/entries.rb | 5 +++--
 lib/docs/scrapers/mdn/css.rb           | 2 +-
 lib/docs/scrapers/mdn/dom.rb           | 2 +-
 lib/docs/scrapers/mdn/javascript.rb    | 2 +-
 lib/docs/scrapers/mdn/svg.rb           | 2 +-
 lib/docs/scrapers/mdn/xslt_xpath.rb    | 6 +-----
 6 files changed, 8 insertions(+), 11 deletions(-)
---
diff --git a/lib/docs/filters/xslt_xpath/entries.rb b/lib/docs/filters/xslt_xpath/entries.rb
index 5151246f..3480547f 100644
--- a/lib/docs/filters/xslt_xpath/entries.rb
+++ b/lib/docs/filters/xslt_xpath/entries.rb
@@ -6,13 +6,14 @@ module Docs
         name.remove! 'XPath.'
         name.remove! 'XSLT.'
         name.remove! 'Axes.'
-        name.prepend 'xsl:' if slug =~ /\AXSLT\/[a-z]/
+        name.remove! 'Element.'
+        name.prepend 'xsl:' if slug =~ /XSLT\/Element/
         name << '()' if name.gsub!('Functions.', '')
         name
       end
 
       def get_type
-        if slug =~ /\AXSLT\/[a-z]/
+        if slug =~ /XSLT\/Element/
           'XSLT Elements'
         elsif slug.start_with?('XPath/Axes')
           'XPath Axes'
diff --git a/lib/docs/scrapers/mdn/css.rb b/lib/docs/scrapers/mdn/css.rb
index 4c44f1f1..98a126ac 100644
--- a/lib/docs/scrapers/mdn/css.rb
+++ b/lib/docs/scrapers/mdn/css.rb
@@ -4,7 +4,7 @@ module Docs
     self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/CSS'
     self.root_path = '/Reference'
 
-    html_filters.push 'css/clean_html', 'css/entries', 'title'
+    html_filters.push 'css/clean_html', 'css/entries'
 
     options[:root_title] = 'CSS'
 
diff --git a/lib/docs/scrapers/mdn/dom.rb b/lib/docs/scrapers/mdn/dom.rb
index 943aeb5b..57828df3 100644
--- a/lib/docs/scrapers/mdn/dom.rb
+++ b/lib/docs/scrapers/mdn/dom.rb
@@ -4,7 +4,7 @@ module Docs
     self.name = 'DOM'
     self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/API'
 
-    html_filters.push 'dom/clean_html', 'dom/entries', 'title'
+    html_filters.push 'dom/clean_html', 'dom/entries'
 
     options[:root_title] = 'DOM'
 
diff --git a/lib/docs/scrapers/mdn/javascript.rb b/lib/docs/scrapers/mdn/javascript.rb
index 935df61c..8238d3b4 100644
--- a/lib/docs/scrapers/mdn/javascript.rb
+++ b/lib/docs/scrapers/mdn/javascript.rb
@@ -6,7 +6,7 @@ module Docs
     self.name = 'JavaScript'
     self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference'
 
-    html_filters.push 'javascript/clean_html', 'javascript/entries', 'title'
+    html_filters.push 'javascript/clean_html', 'javascript/entries'
 
     options[:root_title] = 'JavaScript'
 
diff --git a/lib/docs/scrapers/mdn/svg.rb b/lib/docs/scrapers/mdn/svg.rb
index db9de7a1..4a9de544 100644
--- a/lib/docs/scrapers/mdn/svg.rb
+++ b/lib/docs/scrapers/mdn/svg.rb
@@ -6,7 +6,7 @@ module Docs
     self.name = 'SVG'
     self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/SVG'
 
-    html_filters.push 'svg/clean_html', 'svg/entries', 'title'
+    html_filters.push 'svg/clean_html', 'svg/entries'
 
     options[:root_title] = 'SVG'
 
diff --git a/lib/docs/scrapers/mdn/xslt_xpath.rb b/lib/docs/scrapers/mdn/xslt_xpath.rb
index 2407f0c0..286c48f4 100644
--- a/lib/docs/scrapers/mdn/xslt_xpath.rb
+++ b/lib/docs/scrapers/mdn/xslt_xpath.rb
@@ -6,15 +6,11 @@ module Docs
     self.root_path = '/XSLT'
     self.initial_paths = %w(/XPath)
 
-    html_filters.push 'xslt_xpath/clean_html', 'xslt_xpath/entries', 'title'
+    html_filters.push 'xslt_xpath/clean_html', 'xslt_xpath/entries'
 
     options[:root_title] = 'XSLT'
 
     options[:only_patterns] = [/\A\/XSLT/, /\A\/XPath/]
 
-    options[:fix_urls] = ->(url) do
-      url.sub! 'https://developer.mozilla.org/en-US/docs/Web/XSLT/Element', "#{XsltXpath.base_url}/XSLT"
-      url
-    end
   end
 end


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]