[devdocsgjs/main: 1691/1867] Rewrite links by generating scraper :replace_paths from entries filter
- From: Andy Holmes <andyholmes src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [devdocsgjs/main: 1691/1867] Rewrite links by generating scraper :replace_paths from entries filter
- Date: Fri, 19 Nov 2021 23:48:15 +0000 (UTC)
commit 005db388cec113f8956c56ef5787d8deb50c02b4
Author: Cimbali <me cimba li>
Date: Wed Jun 2 00:20:51 2021 +0200
Rewrite links by generating scraper :replace_paths from entries filter
lib/docs/filters/r/entries.rb | 9 ++++++---
lib/docs/scrapers/r.rb | 14 ++++++--------
2 files changed, 12 insertions(+), 11 deletions(-)
---
diff --git a/lib/docs/filters/r/entries.rb b/lib/docs/filters/r/entries.rb
index a9793e07..ed09345d 100644
--- a/lib/docs/filters/r/entries.rb
+++ b/lib/docs/filters/r/entries.rb
@@ -4,14 +4,17 @@ module Docs
PKG_INDEX_ENTRIES = Hash.new []
- def initialize(*)
- super
-
+ def call
if slug_parts[-1] == '00Index'
+ dir = File.dirname(result[:subpath])
css('tr a').each do |link|
PKG_INDEX_ENTRIES[link['href']] += [link.text]
+ next if link['href'] == link.text
+ context[:replace_paths][File.join(dir, "#{link.text}.html")] = File.join(dir,
"#{link['href']}.html")
end
end
+
+ super
end
def slug_parts
diff --git a/lib/docs/scrapers/r.rb b/lib/docs/scrapers/r.rb
index e0e43355..308d1a6b 100644
--- a/lib/docs/scrapers/r.rb
+++ b/lib/docs/scrapers/r.rb
@@ -29,14 +29,12 @@ module Docs
/\.pdf$/
]
- ## We want to fix links like so − but only if the targets don’t exist,
- ## as these target packages or keywords that do not have their own file,
- ## but exist on another page, and we properly record it.
- #
- #options[:fix_urls] = ->(url) do
- # url.sub!(%r'/library/([^/]+)/doc/index.html$') { |m| "/r-#{$1.parameterize.downcase}/" }
- # url.sub!(%r'/library/([^/]+)/html/([^/]+).html$') { |m|
"/library/#{$1.parameterize.downcase}/html/#{$2.parameterize.downcase}" }
- #end
+ options[:replace_paths] = {
+ ## We want to fix links like so − but only if the targets don’t exist:
+ # 'library/MASS/html/cov.mve.html' => 'library/MASS/html/cov.rob.html'
+ ## Paths for target packages or keywords that do not have their own file
+ ## are generated in the entries filter from 00Index.html files
+ }
options[:skip] = %w(
doc/html/packages-head-utf8.html
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]