[devdocsgjs/main: 1062/1867] Update Elixir's scraper




commit 83256157ab91dc16703cc6fa87c2d655b9456703
Author: David Chen <cgl david gmail com>
Date:   Sat May 23 14:24:23 2020 -0700

    Update Elixir's scraper

 lib/docs/filters/elixir/clean_html.rb | 47 ++++++++++++++++-------------------
 lib/docs/filters/elixir/entries.rb    | 14 +++++++----
 2 files changed, 31 insertions(+), 30 deletions(-)
---
diff --git a/lib/docs/filters/elixir/clean_html.rb b/lib/docs/filters/elixir/clean_html.rb
index cf703389..47cec84c 100644
--- a/lib/docs/filters/elixir/clean_html.rb
+++ b/lib/docs/filters/elixir/clean_html.rb
@@ -25,41 +25,38 @@ module Docs
       end
 
       def api
-        css('footer', '.view-source', 'h1 .visible-xs').remove
+        css('.hover-link', '.view-source', 'footer').remove
 
-        css('section section.docstring h2').each do |node|
-          node.name = 'h4'
+        css('.summary').each do |node|
+          node.name = 'dl'
         end
 
-        css('h1 .hover-link', '.detail-link').each do |node|
-          node.parent['id'] = node['href'].remove('#')
-          node.remove
+        css('.summary h2').each do |node|
+          node.content = node.inner_text
+          node.parent.before(node)
         end
 
-        css('.details-list').each do |list|
-          type = list['id'].remove(/s\z/) if list['id']
-          list.css('.detail-header').each do |node|
-            node.name = 'h3'
-            node['class'] += " #{type}" if type
-          end
+        css('.summary-signature').each do |node|
+          node.name = 'dt'
         end
 
-        css('.summary h2').each { |node| node.parent.before(node) }
-        css('.summary').each { |node| node.name = 'dl' }
-        css('.summary-signature').each { |node| node.name = 'dt' }
-        css('.summary-synopsis').each { |node| node.name = 'dd' }
-
-        css('section', 'div:not(.type-detail)', 'h2 a').each do |node|
-          node.before(node.children).remove
+        css('.summary-synopsis').each do |node|
+          node.name = 'dd'
         end
 
-        css('.detail-header > pre').each do |node|
-          node.parent.after(node)
-        end
+        css('section.detail').each do |detail|
+          id = detail['id']
+          detail.remove_attribute('id')
 
-        css('.signature').each do |node|
-          non_text_children = node.xpath('node()[not(self::text())]')
-          non_text_children.to_a.reverse.each { |child| node.parent.add_next_sibling(child) }
+          detail.css('.detail-header').each do |node|
+            node.name = 'h3'
+            node['id'] = id
+            node.content = node.at_css('.signature').inner_text
+          end
+
+          detail.css('.docstring h2').each do |node|
+            node.name = 'h4'
+          end
         end
 
         css('pre').each do |node|
diff --git a/lib/docs/filters/elixir/entries.rb b/lib/docs/filters/elixir/entries.rb
index 72794f87..24fd9415 100644
--- a/lib/docs/filters/elixir/entries.rb
+++ b/lib/docs/filters/elixir/entries.rb
@@ -41,21 +41,25 @@ module Docs
       end
 
       def additional_entries
-        return [] if type == 'Exceptions' || type == 'Guide'
+        return [] if type == 'Exceptions' || type == 'Guide' || root_page?
 
-        css('.detail-header .signature').map do |node|
-          id = node.parent['id']
+        css('.detail-header').map do |node|
+          id = node['id']
           name = node.content.strip
+
           name.remove! %r{\(.*\)}
           name.remove! 'left '
           name.remove! ' right'
           name.sub! 'sigil_', '~'
 
-          unless node.parent['class'].end_with?('macro') || self.name.start_with?('Kernel')
+          if self.name && !self.name.start_with?('Kernel')
             name.prepend "#{self.name}."
           end
 
-          name << " (#{id.split('/').last})" if id =~ /\/\d+\z/
+          if id =~ %r{/\d+\z}
+            arity = id.split('/').last
+            name << " (#{arity})"
+          end
 
           [name, id]
         end


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]