[devdocsgjs/main: 1390/1867] Fix scraper and improve readability
- From: Andy Holmes <andyholmes src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [devdocsgjs/main: 1390/1867] Fix scraper and improve readability
- Date: Fri, 19 Nov 2021 23:47:56 +0000 (UTC)
commit a8c5c31712dd1bcdb870e7bf2e18033ee8dba406
Author: MasterEnoc <brianhernandez222 hotmail com>
Date: Tue Dec 15 14:32:49 2020 -0600
Fix scraper and improve readability
lib/docs/filters/rethinkdb/entries.rb | 11 +++++-
lib/docs/scrapers/rethinkdb.rb | 72 +++++++++++++++++++++++++++--------
2 files changed, 66 insertions(+), 17 deletions(-)
---
diff --git a/lib/docs/filters/rethinkdb/entries.rb b/lib/docs/filters/rethinkdb/entries.rb
index 5e49386b..a0239454 100644
--- a/lib/docs/filters/rethinkdb/entries.rb
+++ b/lib/docs/filters/rethinkdb/entries.rb
@@ -1,9 +1,17 @@
module Docs
class Rethinkdb
class EntriesFilter < Docs::EntriesFilter
+
def get_name
if subpath.start_with?('api')
- at_css('.title').content.remove('ReQL command:').split(', ').first
+ name = at_css('.title').content.remove('ReQL command:').split(', ').first
+
+ if name.strip.empty?
+ 'lt'
+ else
+ name
+ end
+
else
at_css('.docs-nav .active').content
end
@@ -30,6 +38,7 @@ module Docs
def include_default_entry?
at_css('.docs-article p').try(:content) != 'Choose your language:'
end
+
end
end
end
diff --git a/lib/docs/scrapers/rethinkdb.rb b/lib/docs/scrapers/rethinkdb.rb
index 4db5c464..b98e2cea 100644
--- a/lib/docs/scrapers/rethinkdb.rb
+++ b/lib/docs/scrapers/rethinkdb.rb
@@ -13,14 +13,18 @@ module Docs
html_filters.push 'rethinkdb/entries', 'rethinkdb/clean_html'
options[:trailing_slash] = true
+
options[:container] = '.documentation'
options[:only_patterns] = [/\Adocs/]
+
options[:skip_patterns] = [/docs\/install(\-drivers)?\/./]
+
options[:skip] = %w(
docs/build/
docs/tutorials/elections/
- docs/tutorials/superheroes/)
+ docs/tutorials/superheroes/
+ )
MULTILANG_DOCS = %w(
changefeeds
@@ -33,31 +37,66 @@ module Docs
rabbitmq
secondary-indexes
sql-to-reql
- storing-binary)
+ storing-binary
+ )
options[:attribution] = <<-HTML
© RethinkDB contributors<br>
Licensed under the Creative Commons Attribution-ShareAlike 3.0 Unported License.
HTML
- %w(JavaScript Ruby Python Java).each do |name|
- path = name.downcase
- instance_eval <<-CODE
- version '#{name}' do
- self.initial_paths = %w(api/#{path}/)
+ version 'javascript' do
+ self.initial_paths = %w(api/javascript/)
- options[:only_patterns] += [/\\Aapi\\/#{path}\\//]
+ options[:only_patterns] += [/\Aapi\/javascript\//]
- options[:fix_urls] = ->(url) do
- url.sub! %r{rethinkdb.com/docs/(#{MULTILANG_DOCS.join('|')})\\z},
'rethinkdb.com/docs/\\1/#{path}/'
- url.sub! %r{rethinkdb.com/docs/(#{MULTILANG_DOCS.join('|')})/(?!#{path}/).*},
'rethinkdb.com/docs/\\1/#{path}/'
- url.sub! %r{rethinkdb.com/api/(?!javascript|ruby|python|java)}, 'rethinkdb.com/api/#{path}/'
- url
- end
- end
- CODE
+ options[:fix_urls] = ->(url) do
+ url.sub! %r{rethinkdb.com/docs/(#{MULTILANG_DOCS.join('|')})\\z},
'rethinkdb.com/docs/\\1/javascript/'
+ url.sub! %r{rethinkdb.com/docs/(#{MULTILANG_DOCS.join('|')})/(?!javascript/).*},
'rethinkdb.com/docs/\\1/javascript/'
+ url.sub! %r{rethinkdb.com/api/(?!javascript|ruby|python|java)}, 'rethinkdb.com/api/javascript/'
+ url
+ end
end
+ version 'ruby' do
+ self.initial_paths = %w(api/ruby/)
+
+ options[:only_patterns] += [/\Aapi\/ruby\//]
+
+ options[:fix_urls] = ->(url) do
+ url.sub! %r{rethinkdb.com/docs/(#{MULTILANG_DOCS.join('|')})\\z}, 'rethinkdb.com/docs/\\1/ruby/'
+ url.sub! %r{rethinkdb.com/docs/(#{MULTILANG_DOCS.join('|')})/(?!ruby/).*},
'rethinkdb.com/docs/\\1/ruby/'
+ url.sub! %r{rethinkdb.com/api/(?!javascript|ruby|python|java)}, 'rethinkdb.com/api/ruby/'
+ url
+ end
+ end
+
+ version 'python' do
+ self.initial_paths = %w(api/python/)
+
+ options[:only_patterns] += [/\Aapi\/python\//]
+
+ options[:fix_urls] = ->(url) do
+ url.sub! %r{rethinkdb.com/docs/(#{MULTILANG_DOCS.join('|')})\\z}, 'rethinkdb.com/docs/\\1/python/'
+ url.sub! %r{rethinkdb.com/docs/(#{MULTILANG_DOCS.join('|')})/(?!python/).*},
'rethinkdb.com/docs/\\1/python/'
+ url.sub! %r{rethinkdb.com/api/(?!javascript|ruby|python|java)}, 'rethinkdb.com/api/python/'
+ url
+ end
+ end
+
+ version 'java' do
+ self.initial_paths = %w(api/java/)
+
+ options[:only_patterns] += [/\Aapi\/java\//]
+
+ options[:fix_urls] = ->(url) do
+ url.sub! %r{rethinkdb.com/docs/(#{MULTILANG_DOCS.join('|')})\\z}, 'rethinkdb.com/docs/\\1/java/'
+ url.sub! %r{rethinkdb.com/docs/(#{MULTILANG_DOCS.join('|')})/(?!java/).*},
'rethinkdb.com/docs/\\1/java/'
+ url.sub! %r{rethinkdb.com/api/(?!javascript|ruby|python|java)}, 'rethinkdb.com/api/java/'
+ url
+ end
+ end
+
def get_latest_version(opts)
get_latest_github_release('rethinkdb', 'rethinkdb', opts)
end
@@ -68,5 +107,6 @@ module Docs
return false unless super
response.body !~ /http-equiv="refresh"/i
end
+
end
end
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]