[devdocsgjs/main: 4/76] Migrate c scraper from filescraper to urlscraper
- From: Andy Holmes <andyholmes src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [devdocsgjs/main: 4/76] Migrate c scraper from filescraper to urlscraper
- Date: Fri, 15 Jul 2022 21:39:45 +0000 (UTC)
commit fdfcf3d9174d7e386af42412c3a2394d6e1eafca
Author: Enoc <brianhernandez222 hotmail com>
Date: Fri Sep 10 11:14:54 2021 -0600
Migrate c scraper from filescraper to urlscraper
assets/stylesheets/application.css.scss | 2 +-
assets/stylesheets/pages/{_c.scss => _cppref.scss} | 0
lib/docs/filters/c/clean_html.rb | 116 ---------------------
lib/docs/filters/c/entries.rb | 3 +
lib/docs/filters/c/fix_code.rb | 21 ----
lib/docs/filters/c/fix_urls.rb | 11 --
lib/docs/scrapers/c.rb | 42 --------
lib/docs/scrapers/cppref/c.rb | 12 +++
lib/docs/scrapers/cppref/cpp.rb | 9 --
lib/docs/scrapers/cppref/cppref.rb | 12 ++-
10 files changed, 24 insertions(+), 204 deletions(-)
---
diff --git a/assets/stylesheets/application.css.scss b/assets/stylesheets/application.css.scss
index 0243afeb..542e1510 100644
--- a/assets/stylesheets/application.css.scss
+++ b/assets/stylesheets/application.css.scss
@@ -39,7 +39,7 @@
'pages/async',
'pages/bash',
'pages/bootstrap',
- 'pages/c',
+ 'pages/cppref',
'pages/cakephp',
'pages/clojure',
'pages/codeception',
diff --git a/assets/stylesheets/pages/_c.scss b/assets/stylesheets/pages/_cppref.scss
similarity index 100%
rename from assets/stylesheets/pages/_c.scss
rename to assets/stylesheets/pages/_cppref.scss
diff --git a/lib/docs/filters/c/entries.rb b/lib/docs/filters/c/entries.rb
index 6c9f1565..63cfec61 100644
--- a/lib/docs/filters/c/entries.rb
+++ b/lib/docs/filters/c/entries.rb
@@ -22,6 +22,9 @@ module Docs
end
def get_type
+
+ return "C keywords" if slug =~ /keyword/
+
type = at_css('.t-navbar > div:nth-child(4) > :first-child').try(:content)
type.strip!
type.remove! ' library'
diff --git a/lib/docs/scrapers/cppref/c.rb b/lib/docs/scrapers/cppref/c.rb
new file mode 100644
index 00000000..faa48fb3
--- /dev/null
+++ b/lib/docs/scrapers/cppref/c.rb
@@ -0,0 +1,12 @@
+module Docs
+ class C < Cppref
+ self.name = 'c'
+ self.slug = 'c'
+ self.base_url = 'https://en.cppreference.com/w/c/'
+
+ html_filters.insert_before 'cppref/clean_html', 'c/entries'
+
+ options[:root_title] = 'C Programming Language'
+
+ end
+end
diff --git a/lib/docs/scrapers/cppref/cpp.rb b/lib/docs/scrapers/cppref/cpp.rb
index bfc87c62..4f259729 100644
--- a/lib/docs/scrapers/cppref/cpp.rb
+++ b/lib/docs/scrapers/cppref/cpp.rb
@@ -2,7 +2,6 @@ module Docs
class Cpp < Cppref
self.name = 'C++'
self.slug = 'cpp'
- self.type = 'c'
self.base_url = 'https://en.cppreference.com/w/cpp/'
html_filters.insert_before 'cppref/clean_html', 'cpp/entries'
@@ -16,13 +15,5 @@ module Docs
regex/regex_token_iterator/operator_cmp.html
)
- # Same as get_latest_version in lib/docs/scrapers/c.rb
- def get_latest_version(opts)
- doc = fetch_doc('https://en.cppreference.com/w/Cppreference:Archives', opts)
- link = doc.at_css('a[title^="File:"]')
- date = link.content.scan(/(\d+)\./)[0][0]
- DateTime.strptime(date, '%Y%m%d').to_time.to_i
- end
-
end
end
diff --git a/lib/docs/scrapers/cppref/cppref.rb b/lib/docs/scrapers/cppref/cppref.rb
index b91751ef..85bbc771 100644
--- a/lib/docs/scrapers/cppref/cppref.rb
+++ b/lib/docs/scrapers/cppref/cppref.rb
@@ -6,7 +6,7 @@ module Docs
html_filters.insert_before 'clean_html', 'cppref/fix_code'
html_filters.push 'cppref/clean_html', 'title'
- # 'cpp20/entries',
+
options[:decode_and_clean_paths] = true
options[:container] = '#content'
options[:title] = false
@@ -21,9 +21,13 @@ module Docs
Licensed under the Creative Commons Attribution-ShareAlike Unported License v3.0.
HTML
- # def get_latest_version
-
- # end
+ # Check if the 'headers' page has changed
+ def get_latest_version(opts)
+ doc = fetch_doc(self.base_url + self.root_path, opts)
+ date = doc.at_css('#footer-info-lastmod').content
+ date = date.match(/[[:digit:]]{1,2} .* [[:digit:]]{4}/).to_s
+ date = DateTime.strptime(date, '%e %B %Y').to_time.to_i
+ end
end
end
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]