[devdocsgjs/main: 964/1867] sequelize: finish scraper and filters




commit 089aa6158aa9997da4f56eb40d2b81b6b91bf8f9
Author: Jasper van Merle <jaspervmerle gmail com>
Date:   Sat Oct 26 03:30:48 2019 +0200

    sequelize: finish scraper and filters

 lib/docs/filters/sequelize/clean_html.rb | 25 +++++++++++++++++++++++--
 lib/docs/filters/sequelize/entries.rb    | 25 ++++++++++++++++---------
 lib/docs/scrapers/sequelize.rb           |  8 ++++----
 3 files changed, 43 insertions(+), 15 deletions(-)
---
diff --git a/lib/docs/filters/sequelize/clean_html.rb b/lib/docs/filters/sequelize/clean_html.rb
index 269e28ff..27c4d552 100644
--- a/lib/docs/filters/sequelize/clean_html.rb
+++ b/lib/docs/filters/sequelize/clean_html.rb
@@ -2,16 +2,37 @@ module Docs
   class Sequelize
     class CleanHtmlFilter < Filter
       def call
+        @doc = at_css('.content')
+
         # Clean up the home page
-        if root_page?
+        if root_page? || subpath == "index.html"
           # Remove logo
           css('.manual-user-index > div > div.logo').remove
-          # Convert title to proper H1 element
+
+          # Convert title to proper h1 element
           at_css('.manual-user-index > div > div.sequelize').name = 'h1'
+
           # Remove badges (NPM, Travis, test coverage, etc.)
           css('.manual-user-index > p:nth-child(4)').remove
+
           # Remove image cards pointing to entries of the manual
           css('.manual-cards').remove
+
+          # Pull the header out of it's container
+          header = at_css('h1')
+          header.parent.parent.parent.add_previous_sibling header
+        else
+          # Pull the header out of it's container
+          header = at_css('h1')
+          header.parent.add_previous_sibling header
+        end
+
+        # Remove header notice
+        css('.header-notice').remove
+
+        # Change td in thead to th
+        css('table > thead > tr > td').each do |node|
+          node.name = 'th'
         end
 
         # Add syntax highlighting to code blocks
diff --git a/lib/docs/filters/sequelize/entries.rb b/lib/docs/filters/sequelize/entries.rb
index a3866386..6cf7ed0e 100644
--- a/lib/docs/filters/sequelize/entries.rb
+++ b/lib/docs/filters/sequelize/entries.rb
@@ -9,17 +9,24 @@ module Docs
       # Assign the pages to main categories
       def get_type
         if path.start_with?('manual/')
-          type = 'Manual'
-        elsif path.start_with?('file/lib/')
-          type = 'Source files'
+          'Manual'
+        elsif path.include?('lib/data-types')
+          'datatypes'
+        elsif path.include?('lib/errors/validation')
+          'errors/validation'
+        elsif path.include?('lib/errors/database')
+          'errors/database'
+        elsif path.include?('lib/errors/connection')
+          'errors/connection'
+        elsif path.include?('lib/errors')
+          'errors'
+        elsif path.include?('lib/associations')
+          'associations'
+        elsif path.include?('master/variable')
+          'variables'
         else
-          # API Reference pages. The `path` for most of these starts with 'class/lib/',
-          # but there's also 'variable/index' (pseudo-classes), and 'identifiers' (the main index)
-          # so we use an unqualified `else` as a catch-all.
-          type = 'Reference'
+          'classes'
         end
-
-        type
       end
     end
   end
diff --git a/lib/docs/scrapers/sequelize.rb b/lib/docs/scrapers/sequelize.rb
index e5bc4e80..9e49ca50 100644
--- a/lib/docs/scrapers/sequelize.rb
+++ b/lib/docs/scrapers/sequelize.rb
@@ -3,18 +3,18 @@ module Docs
     self.name = 'Sequelize'
     self.slug = 'sequelize'
     self.type = 'simple'
-    self.release = '5.19.6'
+    self.release = '5.21.1'
     self.base_url = 'https://sequelize.org/master/'
     self.links = {
       home: 'https://sequelize.org/',
-      code: 'https://github.com/sequelize/sequelize/'
+      code: 'https://github.com/sequelize/sequelize'
     }
 
     # List of content filters (to be applied sequentially)
     html_filters.push 'sequelize/entries', 'sequelize/clean_html'
 
-    # Wrapper element that holds the main content
-    options[:container] = '.content'
+    # Skip the source files, the license page and the "Who's using Sequelize" page
+    options[:skip_patterns] = [/\.js\.html/, /manual\/legal\.html/, /manual\/whos-using\.html/]
 
     # License information that appears appears at the bottom of the entry page
     options[:attribution] = <<-HTML


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]