[devdocsgjs/main: 8/15] Update OCaml documentation (newline in code snippets)




commit 00c643a2b727eb538a41dafc9338554962e89372
Author: Simon Legner <Simon Legner gmail com>
Date:   Sun Jul 31 08:44:31 2022 +0200

    Update OCaml documentation (newline in code snippets)
    
    Fixes #1783.

 docs/file-scrapers.md                |  4 ++--
 lib/docs/filters/ocaml/clean_html.rb | 15 ++++++++++-----
 lib/docs/scrapers/ocaml.rb           | 12 +++++++++---
 3 files changed, 21 insertions(+), 10 deletions(-)
---
diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md
index 56025456..80b5032b 100644
--- a/docs/file-scrapers.md
+++ b/docs/file-scrapers.md
@@ -151,11 +151,11 @@ bsdtar --extract --file=- --directory=docs/numpy~$VERSION/
 ## OCaml
 
 Download from https://www.ocaml.org/docs/ the HTML reference:
-https://ocaml.org/releases/4.11/ocaml-4.11-refman-html.tar.gz
+https://v2.ocaml.org/releases/4.14/ocaml-4.14-refman-html.tar.gz
 and extract it as `/path/to/devdocs/docs/ocaml`:
 
 ```sh
-curl https://ocaml.org/releases/$VERSION/ocaml-$VERSION-refman-html.tar.gz | \
+curl https://v2.ocaml.org/releases/$VERSION/ocaml-$VERSION-refman-html.tar.gz | \
 tar xz --transform 's/htmlman/ocaml/' --directory docs/
 ```
 
diff --git a/lib/docs/filters/ocaml/clean_html.rb b/lib/docs/filters/ocaml/clean_html.rb
index a68284a5..70bac845 100644
--- a/lib/docs/filters/ocaml/clean_html.rb
+++ b/lib/docs/filters/ocaml/clean_html.rb
@@ -3,17 +3,19 @@ module Docs
     class CleanHtmlFilter < Filter
       def call
 
-        css('pre, .caml-example').each do |node|
+        css('pre').each do |node|
           span = node.at_css('span[id]')
           node['id'] = span['id'] if span
           node['data-type'] = "#{span.content} [#{at_css('h1').content}]" if span
           node['data-language'] = 'ocaml'
-          node.name = 'pre'
           node.content = node.content
         end
 
-        css('.caml-input').each do |node|
-          node.content = '# ' + node.content.strip
+        css('.caml-input ~ .caml-output').each do |node|
+          node.previous_element << "\n\n"
+          node.previous_element << node.content
+          node.previous_element.remove_class('caml-input')
+          node.remove
         end
 
         css('.maintitle *[style]').each do |node|
@@ -26,7 +28,10 @@ module Docs
           table.first.before(node).remove if table.present?
         end
 
-        css('.navbar').remove
+        css('.navbar', '#sidebar-button', 'hr').remove
+        css('img[alt="Previous"]', 'img[alt="Up"]', 'img[alt="Next"]').each do |node|
+          node.parent.remove
+        end
 
         doc
       end
diff --git a/lib/docs/scrapers/ocaml.rb b/lib/docs/scrapers/ocaml.rb
index 72de98f8..0ab64a90 100644
--- a/lib/docs/scrapers/ocaml.rb
+++ b/lib/docs/scrapers/ocaml.rb
@@ -4,7 +4,7 @@ module Docs
     self.type = 'ocaml'
     self.root_path = 'index.html'
     self.release = '4.14'
-    self.base_url = "https://www.ocaml.org/releases/#{self.release}/htmlman/";
+    self.base_url = "https://v2.ocaml.org/releases/#{self.release}/htmlman/";
     self.links = {
       home: 'https://ocaml.org/',
       code: 'https://github.com/ocaml/ocaml'
@@ -27,8 +27,14 @@ module Docs
     HTML
 
     def get_latest_version(opts)
-      doc = fetch_doc('https://www.ocaml.org/releases/', opts)
-      doc.css('#main-contents li > a').first.content
+      get_latest_github_release('ocaml', 'ocaml', opts)
+    end
+
+    private
+
+    def parse(response) # Hook here because Nokogori removes whitespace from code fragments
+      response.body.gsub! %r{<div\ class="pre([^"]*)"[^>]*>([\W\w]+?)</div>}, '<pre class="\1">\2</pre>'
+      super
     end
 
   end


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]