[devdocsgjs/main: 618/1867] Add Django Rest Framework scrapper




commit 14623be6285120a78c9d4f33cd831e48600efa6f
Author: Emil Maruszczak <emilekm gmail com>
Date:   Mon Apr 29 01:02:25 2019 +0200

    Add Django Rest Framework scrapper

 lib/docs/filters/rest_framework/clean_html.rb |  36 ++++++++++++++++
 lib/docs/filters/rest_framework/entries.rb    |  60 ++++++++++++++++++++++++++
 lib/docs/scrapers/rest_framework.rb           |  37 ++++++++++++++++
 public/icons/docs/rest_framework/16.png       | Bin 0 -> 1166 bytes
 public/icons/docs/rest_framework/16 2x png    | Bin 0 -> 4254 bytes
 public/icons/docs/rest_framework/SOURCE       |   1 +
 6 files changed, 134 insertions(+)
---
diff --git a/lib/docs/filters/rest_framework/clean_html.rb b/lib/docs/filters/rest_framework/clean_html.rb
new file mode 100644
index 00000000..007b33b2
--- /dev/null
+++ b/lib/docs/filters/rest_framework/clean_html.rb
@@ -0,0 +1,36 @@
+module Docs
+  class RestFramework
+    class CleanHtmlFilter < Docs::Filter
+      def call
+        css('hr').remove
+
+        css('.badges').each do |node|
+          node.remove
+        end
+
+        css('pre').each do |node|
+          node['data-language'] = 'python'
+        end
+
+        css('h1').each do |node|
+          node['style'] = nil
+        end
+
+        # Translate source files links to DevDocs links
+        links = Nokogiri::XML::Node.new('p', doc)
+        links['class'] = '_links'
+
+        css('a.github').each do |node|
+          span = node.at_css('span')
+          node.content = span.content
+          span.remove
+          node['class'] = '_links-link'
+          links.add_child(node)
+        end
+        doc.add_child(links)
+
+        doc
+      end
+    end
+  end
+end
diff --git a/lib/docs/filters/rest_framework/entries.rb b/lib/docs/filters/rest_framework/entries.rb
new file mode 100644
index 00000000..53b2fce0
--- /dev/null
+++ b/lib/docs/filters/rest_framework/entries.rb
@@ -0,0 +1,60 @@
+module Docs
+  class RestFramework
+    class EntriesFilter < Docs::EntriesFilter
+
+      def get_name
+        name = css('h1').first.content
+        name.slice! 'Tutorial '
+        name = '0: ' + name if name.include? 'Quickstart'
+        name
+      end
+
+      def get_type
+        case subpath
+        when /\Atutorial/
+          'Tutorial'
+        when /\Aapi-guide/
+          'API Guide'
+        end
+      end
+
+      def additional_entries
+        return [] if type == nil || type == 'Tutorial'
+
+        # Framework classes are provided in two different ways:
+        # - as H2's after H1 category titled:
+        accepted_headers = ['API Reference', 'API Guide']
+        # - as headers (1 or 2) with these endings:
+        endings = ['Validator', 'Field', 'View', 'Mixin', 'Default', 'Serializer']
+
+        # To avoid writing down all the endings
+        # and to ensure all entries in API categories are matched
+        # two different ways of finding them are used
+
+        entries = []
+
+        local_type = 'Ref: ' + name
+        in_category = false
+
+        css('h1, h2').each do |node|
+          # Third party category contains entries that could be matched (and shouldn't be)
+          break if node.content === 'Third party packages'
+
+          if in_category
+            if node.name === 'h1'
+              in_category = false
+              next
+            end
+            entries << [node.content, node['id'], local_type]
+          elsif accepted_headers.include? node.content 
+            in_category = true
+          elsif endings.any? { |word| node.content.ends_with?(word) }
+            entries << [node.content, node['id'], local_type]
+          end
+        end
+
+        entries
+      end
+    end
+  end
+end
diff --git a/lib/docs/scrapers/rest_framework.rb b/lib/docs/scrapers/rest_framework.rb
new file mode 100644
index 00000000..fa64b080
--- /dev/null
+++ b/lib/docs/scrapers/rest_framework.rb
@@ -0,0 +1,37 @@
+module Docs
+  class RestFramework < UrlScraper
+    self.name = 'Django REST Framework'
+    self.release = '3.9.2'
+    self.slug = 'rest_framework'
+    self.type = 'mkdocs'
+    self.base_url = 'https://www.django-rest-framework.org/'
+    self.root_path = 'index.html'
+    self.links = {
+      home: 'https://www.django-rest-framework.org/',
+      code: 'https://github.com/encode/django-rest-framework'
+    }
+
+    html_filters.push 'mkdocs/clean_html', 'rest_framework/clean_html', 'rest_framework/entries'
+
+    options[:skip_patterns] = [
+      /\Atopics\//,
+      /\Acommunity\//,
+    ]
+
+    options[:attribution] = <<-HTML
+      Copyright 2011&ndash;present Encode OSS Ltd<br>
+      Licensed under the BSD License.
+    HTML
+
+    private
+
+    def handle_response(response)
+      # Some scrapped urls don't have ending slash
+      # which leads to page duplication
+      if !response.url.path.ends_with?('/') && !response.url.path.ends_with?('index.html')
+        response.url.path << '/'
+      end
+      super
+    end
+  end
+end
diff --git a/public/icons/docs/rest_framework/16.png b/public/icons/docs/rest_framework/16.png
new file mode 100644
index 00000000..e2e33539
Binary files /dev/null and b/public/icons/docs/rest_framework/16.png differ
diff --git a/public/icons/docs/rest_framework/16 2x png b/public/icons/docs/rest_framework/16 2x png
new file mode 100644
index 00000000..e4ea76ba
Binary files /dev/null and b/public/icons/docs/rest_framework/16 2x png differ
diff --git a/public/icons/docs/rest_framework/SOURCE b/public/icons/docs/rest_framework/SOURCE
new file mode 100644
index 00000000..96950eed
--- /dev/null
+++ b/public/icons/docs/rest_framework/SOURCE
@@ -0,0 +1 @@
+https://github.com/encode/django-rest-framework/blob/master/docs_theme/img/favicon.ico


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]