[devdocsgjs/main: 829/1867] rxjs: finish scraper and filters
- From: Andy Holmes <andyholmes src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [devdocsgjs/main: 829/1867] rxjs: finish scraper and filters
- Date: Fri, 19 Nov 2021 23:47:31 +0000 (UTC)
commit 17528d984575fe6d7a586987a7936a06d695c588
Author: Jasper van Merle <jaspervmerle gmail com>
Date: Fri Aug 16 16:49:12 2019 +0200
rxjs: finish scraper and filters
.../javascripts/templates/pages/about_tmpl.coffee | 20 ++--
lib/docs/filters/rxjs/clean_html.rb | 54 +++++++++--
lib/docs/filters/rxjs/entries.rb | 12 ++-
lib/docs/scrapers/rxjs.rb | 102 ++++++++++-----------
public/icons/docs/rxjs/16.png | Bin 5356 -> 1521 bytes
public/icons/docs/rxjs/SOURCE | 2 +-
6 files changed, 117 insertions(+), 73 deletions(-)
---
diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee
b/assets/javascripts/templates/pages/about_tmpl.coffee
index ac86b701..892c4284 100644
--- a/assets/javascripts/templates/pages/about_tmpl.coffee
+++ b/assets/javascripts/templates/pages/about_tmpl.coffee
@@ -211,8 +211,7 @@ credits = [
'2017 Cypress.io',
'MIT',
'https://raw.githubusercontent.com/cypress-io/cypress-documentation/develop/LICENSE.md'
- ],
- [
+ ], [
'D',
'1999-2018 The D Language Foundation',
'Boost',
@@ -572,8 +571,7 @@ credits = [
'2016-2018, The Pony Developers & 2014-2015, Causality Ltd.',
'BSD',
'https://raw.githubusercontent.com/ponylang/ponyc/master/LICENSE'
- ],
- [
+ ], [
'PostgreSQL',
'1996-2018 The PostgreSQL Global Development Group<br>© 1994 The Regents of the University of
California',
'PostgreSQL',
@@ -648,13 +646,17 @@ credits = [
'2010 The Rust Project Developers',
'MIT',
'https://raw.githubusercontent.com/rust-lang/rust/master/LICENSE-MIT'
+ ], [
+ 'RxJS',
+ '2015-2018 Google, Inc., Netflix, Inc., Microsoft Corp. and contributors',
+ 'Apache',
+ 'https://raw.githubusercontent.com/ReactiveX/rxjs/master/LICENSE.txt'
], [
'Salt Stack',
'2019 SaltStack',
'Apache',
'https://raw.githubusercontent.com/saltstack/salt/develop/LICENSE'
- ],
- [
+ ], [
'Sass',
'2006-2016 Hampton Catlin, Nathan Weizenbaum, and Chris Eppstein',
'MIT',
@@ -664,8 +666,7 @@ credits = [
'2002-2019 EPFL, with contributions from Lightbend',
'Apache',
'https://raw.githubusercontent.com/scala/scala-lang/master/license.md'
- ],
- [
+ ], [
'scikit-image',
'2011 the scikit-image team',
'BSD',
@@ -765,8 +766,7 @@ credits = [
'2003-2019 WordPress Foundation',
'GPLv2+',
'https://wordpress.org/about/license/'
- ],
- [
+ ], [
'Yarn',
'2016-present Yarn Contributors',
'BSD',
diff --git a/lib/docs/filters/rxjs/clean_html.rb b/lib/docs/filters/rxjs/clean_html.rb
index 1056b1a6..864c201b 100644
--- a/lib/docs/filters/rxjs/clean_html.rb
+++ b/lib/docs/filters/rxjs/clean_html.rb
@@ -7,6 +7,11 @@ module Docs
at_css('h1').content = 'RxJS Documentation'
end
+ if at_css('h1').nil?
+ title = subpath.rpartition('/').last.titleize
+ doc.prepend_child("<h1>#{title}</h1>")
+ end
+
css('br', 'hr', '.material-icons', '.header-link', '.breadcrumb').remove
css('.content', 'article', '.api-header', 'section', '.instance-member').each do |node|
@@ -65,6 +70,16 @@ module Docs
if node['class'] && node['class'].include?('api-heading')
node.name = 'h3'
+
+ unless node.ancestors('.instance-method').empty?
+ matches = node.inner_html.scan(/([^(& ]+)[(&]/)
+
+ unless matches.empty? || matches[0][0] == 'constructor'
+ node['name'] = matches[0][0]
+ node['id'] = node['name'].downcase + '-'
+ end
+ end
+
node.inner_html = "<code>#{node.inner_html}</code>"
end
@@ -77,25 +92,48 @@ module Docs
node.remove_attribute('class')
end
- css('h1[class]').remove_attr('class')
- css('table[class]').remove_attr('class')
- css('table[width]').remove_attr('width')
- css('tr[style]').remove_attr('style')
+ css('td > .overloads').each do |node|
+ node.replace node.at_css('.detail-contents')
+ end
+
+ css('td.short-description p').each do |node|
+ signature = node.parent.parent.next_element.at_css('h3[id]')
+ signature.after(node) unless signature.nil?
+ end
- if at_css('.api-type-label.module')
- at_css('h1').content = subpath.remove('api/')
+ css('.method-table').each do |node|
+ node.replace node.at_css('tbody')
end
- css('th h3').each do |node|
- node.name = 'span'
+ css('.api-body > table > caption').each do |node|
+ node.name = 'center'
+ lift_out_of_table node
end
+ css('.api-body > table > tbody > tr:not([class]) > td > *').each do |node|
+ lift_out_of_table node
+ end
+
+ css('.api-body > table').each do |node|
+ node.remove if node.content.strip.blank?
+ end
+
+ css('h1[class]').remove_attr('class')
+ css('table[class]').remove_attr('class')
+ css('table[width]').remove_attr('width')
+ css('tr[style]').remove_attr('style')
+
css('code code').each do |node|
node.before(node.children).remove
end
doc
end
+
+ def lift_out_of_table(node)
+ table = node.ancestors('table').first
+ table.previous_element.after(node)
+ end
end
end
end
diff --git a/lib/docs/filters/rxjs/entries.rb b/lib/docs/filters/rxjs/entries.rb
index 020ce1eb..c6e488fb 100644
--- a/lib/docs/filters/rxjs/entries.rb
+++ b/lib/docs/filters/rxjs/entries.rb
@@ -2,22 +2,28 @@ module Docs
class Rxjs
class EntriesFilter < Docs::EntriesFilter
def get_name
- name = at_css('h1').content
+ title = at_css('h1')
+ name = title.nil? ? subpath.rpartition('/').last.titleize : title.content
name.prepend "#{$1}. " if subpath =~ /\-pt(\d+)/
+ name += '()' unless at_css('.api-type-label.function').nil?
name
end
def get_type
if slug.start_with?('guide')
'Guide'
- elsif at_css('.api-type-label.module')
- name.split('/').first
elsif slug.start_with?('api/')
slug.split('/').second
else
'Miscellaneous'
end
end
+
+ def additional_entries
+ css('h3[id]').map do |node|
+ ["#{name}.#{node['name']}()", node['id']]
+ end
+ end
end
end
end
diff --git a/lib/docs/scrapers/rxjs.rb b/lib/docs/scrapers/rxjs.rb
index 1825fc80..e5ea1051 100644
--- a/lib/docs/scrapers/rxjs.rb
+++ b/lib/docs/scrapers/rxjs.rb
@@ -4,11 +4,26 @@ module Docs
class Rxjs < UrlScraper
self.name = 'RxJS'
self.type = 'rxjs'
+ self.release = '6.5.2'
+ self.base_url = 'https://rxjs.dev/'
+ self.root_path = 'guide/overview'
self.links = {
home: 'https://rxjs.dev/',
code: 'https://github.com/ReactiveX/rxjs'
}
+ html_filters.push 'rxjs/clean_html', 'rxjs/entries'
+
+ options[:follow_links] = false
+ options[:only_patterns] = [/guide\//, /api\//]
+ options[:skip_patterns] = [/api\/([^\/]+)\.json/]
+ options[:fix_urls_before_parse] = ->(url) do
+ url.sub! %r{\Aguide/}, '/guide/'
+ url.sub! %r{\Aapi/}, '/api/'
+ url.sub! %r{\Agenerated/}, '/generated/'
+ url
+ end
+
options[:max_image_size] = 256_000
options[:attribution] = <<-HTML
@@ -16,69 +31,54 @@ module Docs
Code licensed under an Apache-2.0 License. Documentation licensed under CC BY 4.0.
HTML
- module Common
- private
+ def get_latest_version(opts)
+ json = fetch_json('https://rxjs.dev/generated/navigation.json', opts)
+ json['__versionInfo']['raw']
+ end
- def initial_urls
- initial_urls = []
+ private
- Request.run "#{self.class.base_url}generated/navigation.json" do |response|
- data = JSON.parse(response.body)
- dig = ->(entry) do
- initial_urls << url_for("generated/docs/#{entry['url']}.json") if entry['url'] && entry['url']
!= 'api'
- entry['children'].each(&dig) if entry['children']
- end
- data['SideNav'].each(&dig)
- end
+ def initial_urls
+ initial_urls = []
- Request.run "#{self.class.base_url}generated/docs/api/api-list.json" do |response|
- data = JSON.parse(response.body)
- dig = ->(entry) do
- initial_urls << url_for("generated/docs/#{entry['path']}.json") if entry['path']
- initial_urls << url_for("generated/docs/api/#{entry['name']}.json") if entry['name'] &&
!entry['path']
- entry['items'].each(&dig) if entry['items']
- end
- data.each(&dig)
+ Request.run "#{self.class.base_url}generated/navigation.json" do |response|
+ data = JSON.parse(response.body)
+ dig = ->(entry) do
+ initial_urls << url_for("generated/docs/#{entry['url']}.json") if entry['url'] && entry['url'] !=
'api'
+ entry['children'].each(&dig) if entry['children']
end
-
- initial_urls
+ data['SideNav'].each(&dig)
end
- def handle_response(response)
- if response.mime_type.include?('json')
- begin
- response.options[:response_body] = JSON.parse(response.body)['contents']
- rescue JSON::ParserError
- response.options[:response_body] = ''
- end
- response.headers['Content-Type'] = 'text/html'
- response.url.path = response.url.path.sub('/generated/docs/', '/').remove('.json')
- response.effective_url.path = response.effective_url.path.sub('/generated/docs/',
'/').remove('.json')
+ Request.run "#{self.class.base_url}generated/docs/api/api-list.json" do |response|
+ data = JSON.parse(response.body)
+ dig = ->(entry) do
+ initial_urls << url_for("generated/docs/#{entry['path']}.json") if entry['path']
+ initial_urls << url_for("generated/docs/api/#{entry['name']}.json") if entry['name'] &&
!entry['path']
+ entry['items'].each(&dig) if entry['items']
end
- super
+ data.each(&dig)
end
- end
- version do
- self.release = '6.3.3'
- self.base_url = 'https://rxjs.dev/'
- self.root_path = 'guide/overview'
-
- html_filters.push 'rxjs/clean_html', 'rxjs/entries'
-
- options[:follow_links] = false
- options[:only_patterns] = [/\Aguide/, /\Aapi/]
- options[:fix_urls_before_parse] = ->(url) do
- url.sub! %r{\Aguide/}, '/guide/'
- url.sub! %r{\Aapi/}, '/api/'
- url.sub! %r{\Agenerated/}, '/generated/'
- url
+ initial_urls.select do |url|
+ options[:only_patterns].any? { |pattern| url =~ pattern } &&
+ options[:skip_patterns].none? { |pattern| url =~ pattern }
end
-
- include Docs::Rxjs::Common
end
- private
+ def handle_response(response)
+ if response.mime_type.include?('json')
+ begin
+ response.options[:response_body] = JSON.parse(response.body)['contents']
+ rescue JSON::ParserError
+ response.options[:response_body] = ''
+ end
+ response.headers['Content-Type'] = 'text/html'
+ response.url.path = response.url.path.sub('/generated/docs/', '/').remove('.json')
+ response.effective_url.path = response.effective_url.path.sub('/generated/docs/',
'/').remove('.json')
+ end
+ super
+ end
def parse(response)
response.body.gsub! '<code-example', '<pre'
diff --git a/public/icons/docs/rxjs/16.png b/public/icons/docs/rxjs/16.png
index 7db53a8a..790f8390 100644
Binary files a/public/icons/docs/rxjs/16.png and b/public/icons/docs/rxjs/16.png differ
diff --git a/public/icons/docs/rxjs/SOURCE b/public/icons/docs/rxjs/SOURCE
index 536eb88a..2a3b3084 100644
--- a/public/icons/docs/rxjs/SOURCE
+++ b/public/icons/docs/rxjs/SOURCE
@@ -1 +1 @@
-http://reactivex.io/
+https://github.com/ReactiveX/reactivex.github.io/blob/develop/favicon.ico
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]