[devdocsgjs/main: 1600/1867] mdn: update scraper to mdn/yari




commit 4dc9557032d0052f0e6248375f262af2dae5ca93
Author: Simon Legner <Simon Legner gmail com>
Date:   Fri Jan 29 23:49:35 2021 +0100

    mdn: update scraper to mdn/yari

 assets/stylesheets/pages/_mdn.scss |  1 +
 lib/docs/filters/mdn/clean_html.rb | 15 +++++++++++++++
 lib/docs/filters/svg/clean_html.rb |  2 +-
 lib/docs/scrapers/mdn/mdn.rb       | 12 +-----------
 4 files changed, 18 insertions(+), 12 deletions(-)
---
diff --git a/assets/stylesheets/pages/_mdn.scss b/assets/stylesheets/pages/_mdn.scss
index fb2cce38..10e144f3 100644
--- a/assets/stylesheets/pages/_mdn.scss
+++ b/assets/stylesheets/pages/_mdn.scss
@@ -27,6 +27,7 @@
   p > code, li > code { @extend %label; }
 
   > .note,
+  .notecard, // MDN 2021
   .notice,
   .warning,
   .overheadIndicator,
diff --git a/lib/docs/filters/mdn/clean_html.rb b/lib/docs/filters/mdn/clean_html.rb
index 540be3e1..b78f1def 100644
--- a/lib/docs/filters/mdn/clean_html.rb
+++ b/lib/docs/filters/mdn/clean_html.rb
@@ -41,6 +41,18 @@ module Docs
           node.parent['id'] = node['name']
           node.before(node.content).remove
         end
+        css('h2 > a, h3 > a').each do |node|
+          node.parent.content = node.content
+        end
+
+        css('.notecard > h4').each do |node|
+          node.name = 'strong'
+        end
+
+        css('svg.deprecated').each do |node|
+          node.name = 'span'
+          node.content = node.content
+        end
 
         css('dt > a[id]').each do |node|
           next if node['href']
@@ -64,6 +76,9 @@ module Docs
         end
 
         # New compatibility tables
+        # FIXME(2021):
+        # - fetched from external JSON: 
https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/alignment-baseline/bcd.json
+        # - https://github.com/mdn/yari/blob/master/build/bcd-urls.js
 
         css('.bc-data #Legend + dl', '.bc-data #Legend', '.bc-data #Legend_2 + dl', '.bc-data #Legend_2', 
'.bc-browser-name').remove
 
diff --git a/lib/docs/filters/svg/clean_html.rb b/lib/docs/filters/svg/clean_html.rb
index 4494e875..3468cfb7 100644
--- a/lib/docs/filters/svg/clean_html.rb
+++ b/lib/docs/filters/svg/clean_html.rb
@@ -11,7 +11,7 @@ module Docs
       end
 
       def other
-        css('.prevnext').remove
+        css('.prev-next').remove
 
         if at_css('p').content.include?("\u{00AB}")
           at_css('p').remove
diff --git a/lib/docs/scrapers/mdn/mdn.rb b/lib/docs/scrapers/mdn/mdn.rb
index 1ed97605..04e39e39 100644
--- a/lib/docs/scrapers/mdn/mdn.rb
+++ b/lib/docs/scrapers/mdn/mdn.rb
@@ -3,12 +3,9 @@ module Docs
     self.abstract = true
     self.type = 'mdn'
 
-    params[:raw] = 1
-    params[:macros] = 1
-
     html_filters.push 'mdn/clean_html'
 
-    options[:rate_limit] = 200
+    options[:container] = '#content'
     options[:trailing_slash] = false
 
     options[:skip_link] = ->(link) {
@@ -23,12 +20,5 @@ module Docs
     def get_latest_version(opts)
       get_latest_github_commit_date('mdn', 'content', opts)
     end
-
-    private
-
-    def process_response?(response)
-      response.effective_url.host = 'developer.mozilla.org' if response.effective_url.host == 
'wiki.developer.mozilla.org'
-      super && response.effective_url.query == 'raw=1&macros=1'
-    end
   end
 end


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]