[devdocsgjs/main: 1263/1867] PyTorch 1.6+ scraper code cleanup




commit 5afcd785d79d94c6f35d57a1a35b9066a85a5eb8
Author: Phil Scherer <pnscher evoforge org>
Date:   Sat Nov 21 01:40:58 2020 +0000

    PyTorch 1.6+ scraper code cleanup

 lib/docs/filters/pytorch/entries.rb | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)
---
diff --git a/lib/docs/filters/pytorch/entries.rb b/lib/docs/filters/pytorch/entries.rb
index 4a4580da..c7168f0e 100644
--- a/lib/docs/filters/pytorch/entries.rb
+++ b/lib/docs/filters/pytorch/entries.rb
@@ -1,27 +1,23 @@
 module Docs
   class Pytorch
     class EntriesFilter < Docs::EntriesFilter
-      def get_name
-        breadcrumbs = at_css('.pytorch-breadcrumbs')
-        name_in_breadcrumb = breadcrumbs.css('li')[1].content
-
-        article = at_css('.pytorch-article')
+      NAME_REPLACEMENTS = {
+        "Distributed communication package - torch.distributed" => "torch.distributed"
+      }
 
-        # hard-coded name replacements, for better presentation.
-        name_replacements = {
-          "Distributed communication package - torch.distributed" => "torch.distributed"
-        }
+      def get_breadcrumbs()
+        css('.pytorch-breadcrumbs > li').map { |node| node.content.delete_suffix(' >') }
+      end
 
+      def get_name
         # The id of the container `div.section` indicates the page type.
         # If the id starts with `module-`, then it's an API reference,
         # otherwise it is a note or design doc.
-        article_id = article.at_css('div.section')['id']
-        if article_id.starts_with? 'module-'
+        if at_css('.section')['id'].starts_with? 'module-'
           /\Amodule-(.*)/.match(article_id)[1]
         else
-          name_in_breadcrumb = name_in_breadcrumb.delete_suffix(' >')
-          name_in_breadcrumb = name_replacements.fetch(name_in_breadcrumb, name_in_breadcrumb)
-          name_in_breadcrumb
+          name = get_breadcrumbs()[1]
+          NAME_REPLACEMENTS.fetch(name, name)
         end
       end
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]