[devdocsgjs/main: 829/1867] rxjs: finish scraper and filters




commit 17528d984575fe6d7a586987a7936a06d695c588
Author: Jasper van Merle <jaspervmerle gmail com>
Date:   Fri Aug 16 16:49:12 2019 +0200

    rxjs: finish scraper and filters

 .../javascripts/templates/pages/about_tmpl.coffee  |  20 ++--
 lib/docs/filters/rxjs/clean_html.rb                |  54 +++++++++--
 lib/docs/filters/rxjs/entries.rb                   |  12 ++-
 lib/docs/scrapers/rxjs.rb                          | 102 ++++++++++-----------
 public/icons/docs/rxjs/16.png                      | Bin 5356 -> 1521 bytes
 public/icons/docs/rxjs/SOURCE                      |   2 +-
 6 files changed, 117 insertions(+), 73 deletions(-)
---
diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee 
b/assets/javascripts/templates/pages/about_tmpl.coffee
index ac86b701..892c4284 100644
--- a/assets/javascripts/templates/pages/about_tmpl.coffee
+++ b/assets/javascripts/templates/pages/about_tmpl.coffee
@@ -211,8 +211,7 @@ credits = [
     '2017 Cypress.io',
     'MIT',
     'https://raw.githubusercontent.com/cypress-io/cypress-documentation/develop/LICENSE.md'
-  ],
-  [
+  ], [
     'D',
     '1999-2018 The D Language Foundation',
     'Boost',
@@ -572,8 +571,7 @@ credits = [
     '2016-2018, The Pony Developers & 2014-2015, Causality Ltd.',
     'BSD',
     'https://raw.githubusercontent.com/ponylang/ponyc/master/LICENSE'
-  ],
-  [
+  ], [
     'PostgreSQL',
     '1996-2018 The PostgreSQL Global Development Group<br>&copy; 1994 The Regents of the University of 
California',
     'PostgreSQL',
@@ -648,13 +646,17 @@ credits = [
     '2010 The Rust Project Developers',
     'MIT',
     'https://raw.githubusercontent.com/rust-lang/rust/master/LICENSE-MIT'
+  ], [
+    'RxJS',
+    '2015-2018 Google, Inc., Netflix, Inc., Microsoft Corp. and contributors',
+    'Apache',
+    'https://raw.githubusercontent.com/ReactiveX/rxjs/master/LICENSE.txt'
   ], [
     'Salt Stack',
     '2019 SaltStack',
     'Apache',
     'https://raw.githubusercontent.com/saltstack/salt/develop/LICENSE'
-  ],
-  [
+  ], [
     'Sass',
     '2006-2016 Hampton Catlin, Nathan Weizenbaum, and Chris Eppstein',
     'MIT',
@@ -664,8 +666,7 @@ credits = [
     '2002-2019 EPFL, with contributions from Lightbend',
     'Apache',
     'https://raw.githubusercontent.com/scala/scala-lang/master/license.md'
-  ],
-  [
+  ], [
     'scikit-image',
     '2011 the scikit-image team',
     'BSD',
@@ -765,8 +766,7 @@ credits = [
     '2003-2019 WordPress Foundation',
     'GPLv2+',
     'https://wordpress.org/about/license/'
-  ],
-  [
+  ], [
     'Yarn',
     '2016-present Yarn Contributors',
     'BSD',
diff --git a/lib/docs/filters/rxjs/clean_html.rb b/lib/docs/filters/rxjs/clean_html.rb
index 1056b1a6..864c201b 100644
--- a/lib/docs/filters/rxjs/clean_html.rb
+++ b/lib/docs/filters/rxjs/clean_html.rb
@@ -7,6 +7,11 @@ module Docs
           at_css('h1').content = 'RxJS Documentation'
         end
 
+        if at_css('h1').nil?
+          title = subpath.rpartition('/').last.titleize
+          doc.prepend_child("<h1>#{title}</h1>")
+        end
+
         css('br', 'hr', '.material-icons', '.header-link', '.breadcrumb').remove
 
         css('.content', 'article', '.api-header', 'section', '.instance-member').each do |node|
@@ -65,6 +70,16 @@ module Docs
 
           if node['class'] && node['class'].include?('api-heading')
             node.name = 'h3'
+
+            unless node.ancestors('.instance-method').empty?
+              matches = node.inner_html.scan(/([^(& ]+)[(&]/)
+
+              unless matches.empty? || matches[0][0] == 'constructor'
+                node['name'] = matches[0][0]
+                node['id'] = node['name'].downcase + '-'
+              end
+            end
+
             node.inner_html = "<code>#{node.inner_html}</code>"
           end
 
@@ -77,25 +92,48 @@ module Docs
           node.remove_attribute('class')
         end
 
-        css('h1[class]').remove_attr('class')
-        css('table[class]').remove_attr('class')
-        css('table[width]').remove_attr('width')
-        css('tr[style]').remove_attr('style')
+        css('td > .overloads').each do |node|
+          node.replace node.at_css('.detail-contents')
+        end
+
+        css('td.short-description p').each do |node|
+          signature = node.parent.parent.next_element.at_css('h3[id]')
+          signature.after(node) unless signature.nil?
+        end
 
-        if at_css('.api-type-label.module')
-          at_css('h1').content = subpath.remove('api/')
+        css('.method-table').each do |node|
+          node.replace node.at_css('tbody')
         end
 
-        css('th h3').each do |node|
-          node.name = 'span'
+        css('.api-body > table > caption').each do |node|
+          node.name = 'center'
+          lift_out_of_table node
         end
 
+        css('.api-body > table > tbody > tr:not([class]) > td > *').each do |node|
+          lift_out_of_table node
+        end
+
+        css('.api-body > table').each do |node|
+          node.remove if node.content.strip.blank?
+        end
+
+        css('h1[class]').remove_attr('class')
+        css('table[class]').remove_attr('class')
+        css('table[width]').remove_attr('width')
+        css('tr[style]').remove_attr('style')
+
         css('code code').each do |node|
           node.before(node.children).remove
         end
 
         doc
       end
+
+      def lift_out_of_table(node)
+        table = node.ancestors('table').first
+        table.previous_element.after(node)
+      end
     end
   end
 end
diff --git a/lib/docs/filters/rxjs/entries.rb b/lib/docs/filters/rxjs/entries.rb
index 020ce1eb..c6e488fb 100644
--- a/lib/docs/filters/rxjs/entries.rb
+++ b/lib/docs/filters/rxjs/entries.rb
@@ -2,22 +2,28 @@ module Docs
   class Rxjs
     class EntriesFilter < Docs::EntriesFilter
       def get_name
-        name = at_css('h1').content
+        title = at_css('h1')
+        name = title.nil? ? subpath.rpartition('/').last.titleize : title.content
         name.prepend "#{$1}. " if subpath =~ /\-pt(\d+)/
+        name += '()' unless at_css('.api-type-label.function').nil?
         name
       end
 
       def get_type
         if slug.start_with?('guide')
           'Guide'
-        elsif at_css('.api-type-label.module')
-          name.split('/').first
         elsif slug.start_with?('api/')
           slug.split('/').second
         else
           'Miscellaneous'
         end
       end
+
+      def additional_entries
+        css('h3[id]').map do |node|
+          ["#{name}.#{node['name']}()", node['id']]
+        end
+      end
     end
   end
 end
diff --git a/lib/docs/scrapers/rxjs.rb b/lib/docs/scrapers/rxjs.rb
index 1825fc80..e5ea1051 100644
--- a/lib/docs/scrapers/rxjs.rb
+++ b/lib/docs/scrapers/rxjs.rb
@@ -4,11 +4,26 @@ module Docs
   class Rxjs < UrlScraper
     self.name = 'RxJS'
     self.type = 'rxjs'
+    self.release = '6.5.2'
+    self.base_url = 'https://rxjs.dev/'
+    self.root_path = 'guide/overview'
     self.links = {
       home: 'https://rxjs.dev/',
       code: 'https://github.com/ReactiveX/rxjs'
     }
 
+    html_filters.push 'rxjs/clean_html', 'rxjs/entries'
+
+    options[:follow_links] = false
+    options[:only_patterns] = [/guide\//, /api\//]
+    options[:skip_patterns] = [/api\/([^\/]+)\.json/]
+    options[:fix_urls_before_parse] = ->(url) do
+      url.sub! %r{\Aguide/}, '/guide/'
+      url.sub! %r{\Aapi/}, '/api/'
+      url.sub! %r{\Agenerated/}, '/generated/'
+      url
+    end
+
     options[:max_image_size] = 256_000
 
     options[:attribution] = <<-HTML
@@ -16,69 +31,54 @@ module Docs
       Code licensed under an Apache-2.0 License. Documentation licensed under CC BY 4.0.
     HTML
 
-    module Common
-      private
+    def get_latest_version(opts)
+      json = fetch_json('https://rxjs.dev/generated/navigation.json', opts)
+      json['__versionInfo']['raw']
+    end
 
-      def initial_urls
-        initial_urls = []
+    private
 
-        Request.run "#{self.class.base_url}generated/navigation.json" do |response|
-          data = JSON.parse(response.body)
-          dig = ->(entry) do
-            initial_urls << url_for("generated/docs/#{entry['url']}.json") if entry['url'] && entry['url'] 
!= 'api'
-            entry['children'].each(&dig) if entry['children']
-          end
-          data['SideNav'].each(&dig)
-        end
+    def initial_urls
+      initial_urls = []
 
-        Request.run "#{self.class.base_url}generated/docs/api/api-list.json" do |response|
-          data = JSON.parse(response.body)
-          dig = ->(entry) do
-            initial_urls << url_for("generated/docs/#{entry['path']}.json") if entry['path']
-            initial_urls << url_for("generated/docs/api/#{entry['name']}.json") if entry['name'] && 
!entry['path']
-            entry['items'].each(&dig) if entry['items']
-          end
-          data.each(&dig)
+      Request.run "#{self.class.base_url}generated/navigation.json" do |response|
+        data = JSON.parse(response.body)
+        dig = ->(entry) do
+          initial_urls << url_for("generated/docs/#{entry['url']}.json") if entry['url'] && entry['url'] != 
'api'
+          entry['children'].each(&dig) if entry['children']
         end
-
-        initial_urls
+        data['SideNav'].each(&dig)
       end
 
-      def handle_response(response)
-        if response.mime_type.include?('json')
-          begin
-            response.options[:response_body] = JSON.parse(response.body)['contents']
-          rescue JSON::ParserError
-            response.options[:response_body] = ''
-          end
-          response.headers['Content-Type'] = 'text/html'
-          response.url.path = response.url.path.sub('/generated/docs/', '/').remove('.json')
-          response.effective_url.path = response.effective_url.path.sub('/generated/docs/', 
'/').remove('.json')
+      Request.run "#{self.class.base_url}generated/docs/api/api-list.json" do |response|
+        data = JSON.parse(response.body)
+        dig = ->(entry) do
+          initial_urls << url_for("generated/docs/#{entry['path']}.json") if entry['path']
+          initial_urls << url_for("generated/docs/api/#{entry['name']}.json") if entry['name'] && 
!entry['path']
+          entry['items'].each(&dig) if entry['items']
         end
-        super
+        data.each(&dig)
       end
-    end
 
-    version do
-      self.release = '6.3.3'
-      self.base_url = 'https://rxjs.dev/'
-      self.root_path = 'guide/overview'
-
-      html_filters.push 'rxjs/clean_html', 'rxjs/entries'
-
-      options[:follow_links] = false
-      options[:only_patterns] = [/\Aguide/, /\Aapi/]
-      options[:fix_urls_before_parse] = ->(url) do
-        url.sub! %r{\Aguide/}, '/guide/'
-        url.sub! %r{\Aapi/}, '/api/'
-        url.sub! %r{\Agenerated/}, '/generated/'
-        url
+      initial_urls.select do |url|
+        options[:only_patterns].any? { |pattern| url =~ pattern } &&
+          options[:skip_patterns].none? { |pattern| url =~ pattern }
       end
-
-      include Docs::Rxjs::Common
     end
 
-    private
+    def handle_response(response)
+      if response.mime_type.include?('json')
+        begin
+          response.options[:response_body] = JSON.parse(response.body)['contents']
+        rescue JSON::ParserError
+          response.options[:response_body] = ''
+        end
+        response.headers['Content-Type'] = 'text/html'
+        response.url.path = response.url.path.sub('/generated/docs/', '/').remove('.json')
+        response.effective_url.path = response.effective_url.path.sub('/generated/docs/', 
'/').remove('.json')
+      end
+      super
+    end
 
     def parse(response)
       response.body.gsub! '<code-example', '<pre'
diff --git a/public/icons/docs/rxjs/16.png b/public/icons/docs/rxjs/16.png
index 7db53a8a..790f8390 100644
Binary files a/public/icons/docs/rxjs/16.png and b/public/icons/docs/rxjs/16.png differ
diff --git a/public/icons/docs/rxjs/SOURCE b/public/icons/docs/rxjs/SOURCE
index 536eb88a..2a3b3084 100644
--- a/public/icons/docs/rxjs/SOURCE
+++ b/public/icons/docs/rxjs/SOURCE
@@ -1 +1 @@
-http://reactivex.io/
+https://github.com/ReactiveX/reactivex.github.io/blob/develop/favicon.ico


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]