[devdocsgjs/main: 1454/1867] ocaml: polish scraper




commit 7128d2d98878bed36c524cd60c7d80e3744e3e20
Author: Simon Legner <Simon Legner gmail com>
Date:   Sun Jan 3 11:55:30 2021 +0100

    ocaml: polish scraper

 lib/docs/filters/ocaml/clean_html.rb | 21 ++++++++++++++++-----
 lib/docs/filters/ocaml/entries.rb    | 12 +++---------
 2 files changed, 19 insertions(+), 14 deletions(-)
---
diff --git a/lib/docs/filters/ocaml/clean_html.rb b/lib/docs/filters/ocaml/clean_html.rb
index dea7a59a..a68284a5 100644
--- a/lib/docs/filters/ocaml/clean_html.rb
+++ b/lib/docs/filters/ocaml/clean_html.rb
@@ -2,21 +2,32 @@ module Docs
   class Ocaml
     class CleanHtmlFilter < Filter
       def call
-        css('pre').each do |node|
+
+        css('pre, .caml-example').each do |node|
+          span = node.at_css('span[id]')
+          node['id'] = span['id'] if span
+          node['data-type'] = "#{span.content} [#{at_css('h1').content}]" if span
           node['data-language'] = 'ocaml'
+          node.name = 'pre'
+          node.content = node.content
         end
 
         css('.caml-input').each do |node|
           node.content = '# ' + node.content.strip
         end
 
-        css('.caml-example').each do |node|
-          node.name = 'pre'
-          node.traverse { |n| n.remove if n.text? && n.text !~ /\S/ }
+        css('.maintitle *[style]').each do |node|
+          node.remove_attribute 'style'
+        end
 
-          node['data-language'] = 'ocaml'
+        css('h1').each do |node|
+          node.content = node.content
+          table = node.ancestors('table.center')
+          table.first.before(node).remove if table.present?
         end
 
+        css('.navbar').remove
+
         doc
       end
     end
diff --git a/lib/docs/filters/ocaml/entries.rb b/lib/docs/filters/ocaml/entries.rb
index a85edd52..ebd3cb27 100644
--- a/lib/docs/filters/ocaml/entries.rb
+++ b/lib/docs/filters/ocaml/entries.rb
@@ -37,11 +37,8 @@ module Docs
 
         module_node = css('h1').at_css('span')
 
-        css('pre').each do |node|
-          next unless span = node.at_css('span')
-          if span['id'].nil?
-            next
-          elsif span['id'].start_with?('VAL')
+        css('pre > span[id]').each do |span|
+          if span['id'].start_with?('VAL')
             entry_type = 'Values'
           elsif span['id'].start_with?('MODULE')
             entry_type = 'Modules'
@@ -52,12 +49,9 @@ module Docs
           end
 
           name = span.content
-          if not module_node.nil?
-            name = "#{name} [#{module_node.content}]"
-          end
+          name += " [#{module_node.content}]" unless module_node.nil?
           entries << [name, span['id'], entry_type]
         end
-
         entries
       end
     end


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]