[devdocsgjs/main: 1454/1867] ocaml: polish scraper
- From: Andy Holmes <andyholmes src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [devdocsgjs/main: 1454/1867] ocaml: polish scraper
- Date: Fri, 19 Nov 2021 23:48:03 +0000 (UTC)
commit 7128d2d98878bed36c524cd60c7d80e3744e3e20
Author: Simon Legner <Simon Legner gmail com>
Date: Sun Jan 3 11:55:30 2021 +0100
ocaml: polish scraper
lib/docs/filters/ocaml/clean_html.rb | 21 ++++++++++++++++-----
lib/docs/filters/ocaml/entries.rb | 12 +++---------
2 files changed, 19 insertions(+), 14 deletions(-)
---
diff --git a/lib/docs/filters/ocaml/clean_html.rb b/lib/docs/filters/ocaml/clean_html.rb
index dea7a59a..a68284a5 100644
--- a/lib/docs/filters/ocaml/clean_html.rb
+++ b/lib/docs/filters/ocaml/clean_html.rb
@@ -2,21 +2,32 @@ module Docs
class Ocaml
class CleanHtmlFilter < Filter
def call
- css('pre').each do |node|
+
+ css('pre, .caml-example').each do |node|
+ span = node.at_css('span[id]')
+ node['id'] = span['id'] if span
+ node['data-type'] = "#{span.content} [#{at_css('h1').content}]" if span
node['data-language'] = 'ocaml'
+ node.name = 'pre'
+ node.content = node.content
end
css('.caml-input').each do |node|
node.content = '# ' + node.content.strip
end
- css('.caml-example').each do |node|
- node.name = 'pre'
- node.traverse { |n| n.remove if n.text? && n.text !~ /\S/ }
+ css('.maintitle *[style]').each do |node|
+ node.remove_attribute 'style'
+ end
- node['data-language'] = 'ocaml'
+ css('h1').each do |node|
+ node.content = node.content
+ table = node.ancestors('table.center')
+ table.first.before(node).remove if table.present?
end
+ css('.navbar').remove
+
doc
end
end
diff --git a/lib/docs/filters/ocaml/entries.rb b/lib/docs/filters/ocaml/entries.rb
index a85edd52..ebd3cb27 100644
--- a/lib/docs/filters/ocaml/entries.rb
+++ b/lib/docs/filters/ocaml/entries.rb
@@ -37,11 +37,8 @@ module Docs
module_node = css('h1').at_css('span')
- css('pre').each do |node|
- next unless span = node.at_css('span')
- if span['id'].nil?
- next
- elsif span['id'].start_with?('VAL')
+ css('pre > span[id]').each do |span|
+ if span['id'].start_with?('VAL')
entry_type = 'Values'
elsif span['id'].start_with?('MODULE')
entry_type = 'Modules'
@@ -52,12 +49,9 @@ module Docs
end
name = span.content
- if not module_node.nil?
- name = "#{name} [#{module_node.content}]"
- end
+ name += " [#{module_node.content}]" unless module_node.nil?
entries << [name, span['id'], entry_type]
end
-
entries
end
end
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]