[devdocsgjs/main: 583/1867] Add better logging and get_latest_version implementations for 10 scrapers




commit 2d1e8aa00cce957358cb741f180c3ff8dabfbefa
Author: Jasper van Merle <jaspervmerle gmail com>
Date:   Fri Mar 8 15:13:11 2019 +0100

    Add better logging and get_latest_version implementations for 10 scrapers

 lib/docs/core/scraper.rb        | 34 +++++++++++++++++++++++-
 lib/docs/scrapers/angular.rb    |  4 +++
 lib/docs/scrapers/angularjs.rb  |  4 +++
 lib/docs/scrapers/ansible.rb    |  6 +++++
 lib/docs/scrapers/apache.rb     |  6 +++++
 lib/docs/scrapers/apache_pig.rb |  6 +++++
 lib/docs/scrapers/async.rb      |  7 +++++
 lib/docs/scrapers/babel.rb      |  6 +++++
 lib/docs/scrapers/backbone.rb   |  7 +++++
 lib/docs/scrapers/bash.rb       |  7 +++++
 lib/docs/scrapers/bluebird.rb   |  4 +++
 lib/docs/scrapers/pygame.rb     |  1 +
 lib/tasks/updates.thor          | 57 ++++++++++++++++++++++++++---------------
 13 files changed, 128 insertions(+), 21 deletions(-)
---
diff --git a/lib/docs/core/scraper.rb b/lib/docs/core/scraper.rb
index 89191e48..a7e388a8 100644
--- a/lib/docs/core/scraper.rb
+++ b/lib/docs/core/scraper.rb
@@ -132,7 +132,7 @@ module Docs
       end
     end
 
-    def get_latest_version
+    def get_latest_version(&block)
       raise NotImplementedError
     end
 
@@ -231,6 +231,38 @@ module Docs
       {}
     end
 
+    # Utility methods for get_latest_version
+
+    def fetch(url, &block)
+      Request.run(url) do |response|
+        if response.success?
+          block.call response.body
+        else
+          block.call nil
+        end
+      end
+    end
+
+    def fetch_doc(url, &block)
+      fetch(url) do |body|
+        parser = Parser.new(body)
+        block.call parser.html
+      end
+    end
+
+    def fetch_json(url, &block)
+      fetch(url) do |body|
+        json = JSON.parse(body)
+        block.call json
+      end
+    end
+
+    def get_npm_version(package, &block)
+      fetch_json("https://registry.npmjs.com/#{package}";) do |json|
+        block.call json['dist-tags']['latest']
+      end
+    end
+
     module FixInternalUrlsBehavior
       def self.included(base)
         base.extend ClassMethods
diff --git a/lib/docs/scrapers/angular.rb b/lib/docs/scrapers/angular.rb
index c318ce25..fa03eb36 100644
--- a/lib/docs/scrapers/angular.rb
+++ b/lib/docs/scrapers/angular.rb
@@ -155,6 +155,10 @@ module Docs
       end
     end
 
+    def get_latest_version(&block)
+      get_npm_version('@angular/core', &block)
+    end
+
     private
 
     def parse(response)
diff --git a/lib/docs/scrapers/angularjs.rb b/lib/docs/scrapers/angularjs.rb
index b8ff08b9..aa74ca1c 100644
--- a/lib/docs/scrapers/angularjs.rb
+++ b/lib/docs/scrapers/angularjs.rb
@@ -69,5 +69,9 @@ module Docs
       self.release = '1.2.32'
       self.base_url = "https://code.angularjs.org/#{release}/docs/partials/";
     end
+
+    def get_latest_version(&block)
+      get_npm_version('angular', &block)
+    end
   end
 end
diff --git a/lib/docs/scrapers/ansible.rb b/lib/docs/scrapers/ansible.rb
index 2d62909a..60fb1953 100644
--- a/lib/docs/scrapers/ansible.rb
+++ b/lib/docs/scrapers/ansible.rb
@@ -87,5 +87,11 @@ module Docs
         quickstart.html
         list_of_all_modules.html)
     end
+
+    def get_latest_version(&block)
+      fetch_doc('https://docs.ansible.com/ansible/latest/index.html') do |doc|
+        block.call doc.at_css('.DocSiteProduct-CurrentVersion').content.strip
+      end
+    end
   end
 end
diff --git a/lib/docs/scrapers/apache.rb b/lib/docs/scrapers/apache.rb
index 9ee82f12..5eca041e 100644
--- a/lib/docs/scrapers/apache.rb
+++ b/lib/docs/scrapers/apache.rb
@@ -33,5 +33,11 @@ module Docs
       &copy; 2018 The Apache Software Foundation<br>
       Licensed under the Apache License, Version 2.0.
     HTML
+
+    def get_latest_version(&block)
+      fetch_doc('http://httpd.apache.org/docs/') do |doc|
+        block.call doc.at_css('#apcontents > ul a')['href'][0...-1]
+      end
+    end
   end
 end
diff --git a/lib/docs/scrapers/apache_pig.rb b/lib/docs/scrapers/apache_pig.rb
index 65897a78..15c477bf 100644
--- a/lib/docs/scrapers/apache_pig.rb
+++ b/lib/docs/scrapers/apache_pig.rb
@@ -43,5 +43,11 @@ module Docs
       self.base_url = "https://pig.apache.org/docs/r#{release}/";
     end
 
+    def get_latest_version(&block)
+      fetch_doc('https://pig.apache.org/') do |doc|
+        item = doc.at_css('div[id="menu_1.2"] > .menuitem:last-child')
+        block.call item.content.strip.sub(/Release /, '')
+      end
+    end
   end
 end
diff --git a/lib/docs/scrapers/async.rb b/lib/docs/scrapers/async.rb
index 40022f19..930820b4 100644
--- a/lib/docs/scrapers/async.rb
+++ b/lib/docs/scrapers/async.rb
@@ -17,5 +17,12 @@ module Docs
       &copy; 2010&ndash;2018 Caolan McMahon<br>
       Licensed under the MIT License.
     HTML
+
+    def get_latest_version(&block)
+      fetch_doc('https://caolan.github.io/async/') do |doc|
+        version = doc.at_css('#version-dropdown > a').content.strip[1..-1]
+        block.call version
+      end
+    end
   end
 end
diff --git a/lib/docs/scrapers/babel.rb b/lib/docs/scrapers/babel.rb
index c9e40212..cc8bec6d 100644
--- a/lib/docs/scrapers/babel.rb
+++ b/lib/docs/scrapers/babel.rb
@@ -22,5 +22,11 @@ module Docs
     stub '' do
       '<div></div>'
     end
+
+    def get_latest_version(&block)
+      fetch_doc('https://babeljs.io/docs/en/') do |doc|
+        block.call doc.at_css('a[href="/versions"] > h3').content
+      end
+    end
   end
 end
diff --git a/lib/docs/scrapers/backbone.rb b/lib/docs/scrapers/backbone.rb
index b72b1084..2fb7662f 100644
--- a/lib/docs/scrapers/backbone.rb
+++ b/lib/docs/scrapers/backbone.rb
@@ -20,5 +20,12 @@ module Docs
       &copy; 2010&ndash;2016 Jeremy Ashkenas, DocumentCloud<br>
       Licensed under the MIT License.
     HTML
+
+    def get_latest_version(&block)
+      fetch_doc('https://backbonejs.org/') do |doc|
+        version = doc.at_css('.version').content
+        block.call version[1...-1]
+      end
+    end
   end
 end
diff --git a/lib/docs/scrapers/bash.rb b/lib/docs/scrapers/bash.rb
index feb0ddce..b62868a6 100644
--- a/lib/docs/scrapers/bash.rb
+++ b/lib/docs/scrapers/bash.rb
@@ -17,5 +17,12 @@ module Docs
       Copyright &copy; 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.<br>
       Licensed under the GNU Free Documentation License.
     HTML
+
+    def get_latest_version(&block)
+      fetch('https://www.gnu.org/software/bash/manual/html_node/index.html') do |body|
+        version = body.scan(/, Version ([0-9.]+)/)[0][0]
+        block.call version[0...-1]
+      end
+    end
   end
 end
diff --git a/lib/docs/scrapers/bluebird.rb b/lib/docs/scrapers/bluebird.rb
index e5cd6b59..73888004 100644
--- a/lib/docs/scrapers/bluebird.rb
+++ b/lib/docs/scrapers/bluebird.rb
@@ -18,5 +18,9 @@ module Docs
       &copy; 2013&ndash;2017 Petka Antonov<br>
       Licensed under the MIT License.
     HTML
+
+    def get_latest_version(&block)
+      get_npm_version('bluebird', &block)
+    end
   end
 end
diff --git a/lib/docs/scrapers/pygame.rb b/lib/docs/scrapers/pygame.rb
index 9da3148d..892619e4 100644
--- a/lib/docs/scrapers/pygame.rb
+++ b/lib/docs/scrapers/pygame.rb
@@ -2,6 +2,7 @@ module Docs
   class Pygame < UrlScraper
     self.type = 'simple'
     self.release = '1.9.4'
+    self.base_url = 'https://www.pygame.org/docs/'
     self.root_path = 'py-modindex.html'
     self.links = {
       home: 'https://www.pygame.org/',
diff --git a/lib/tasks/updates.thor b/lib/tasks/updates.thor
index 35d52e28..eb3467f2 100644
--- a/lib/tasks/updates.thor
+++ b/lib/tasks/updates.thor
@@ -9,7 +9,8 @@ class UpdatesCLI < Thor
     super
   end
 
-  desc 'check [doc]...', 'Check for outdated documentations'
+  desc 'check [--verbose] [doc]...', 'Check for outdated documentations'
+  option :verbose, :type => :boolean
   def check(*names)
     # Convert names to a list of Scraper instances
     # Versions are omitted, if v10 is outdated than v8 is aswell
@@ -27,13 +28,14 @@ class UpdatesCLI < Thor
       result
     end
 
-    outdated = results.select {|result| result.is_a?(Hash) && result[:is_outdated]}
-    return if outdated.empty?
+    valid_results = results.select {|result| result.is_a?(Hash)}
 
-    logger.info("Outdated documentations (#{outdated.length}):")
-    outdated.each do |result|
-      logger.info("#{result[:name]}: #{result[:current_version]} -> #{result[:latest_version]}")
-    end
+    up_to_date_results = valid_results.select {|result| !result[:is_outdated]}
+    outdated_results = valid_results.select {|result| result[:is_outdated]}
+
+    log_results('Up-to-date', up_to_date_results) if options[:verbose] and !up_to_date_results.empty?
+    logger.info("") if options[:verbose] and !up_to_date_results.empty? and !outdated_results.empty?
+    log_results('Outdated', outdated_results) unless outdated_results.empty?
   rescue Docs::DocNotFound => error
     logger.error(error)
     logger.info('Run "thor docs:list" to see the list of docs.')
@@ -42,33 +44,48 @@ class UpdatesCLI < Thor
   private
 
   def check_doc(doc)
-    # Scraper versions are always sorted from new to old
+    # Newer scraper versions always come before older scraper versions
     # Therefore, the first item's release value is the latest current scraper version
     #
     # For example, a scraper could scrape 3 versions: 10, 11 and 12
-    # doc.versions.first would be the scraper for version 12 if the scraper is written like all the other 
scrapers are
+    # doc.versions.first would be the scraper for version 12
     instance = doc.versions.first.new
 
+    return nil unless instance.class.method_defined?(:options)
+
     current_version = instance.options[:release]
     return nil if current_version.nil?
 
-    latest_version = instance.get_latest_version
-    return nil if latest_version.nil?
+    logger.debug("Checking #{doc.name}")
+
+    instance.get_latest_version do |latest_version|
+      return {
+        name: doc.name,
+        current_version: current_version,
+        latest_version: latest_version,
+        is_outdated: instance.is_outdated(current_version, latest_version)
+      }
+    end
 
-    {
-      name: doc.name,
-      current_version: current_version,
-      latest_version: latest_version,
-      is_outdated: instance.is_outdated(current_version, latest_version)
-    }
+    return nil
   rescue NotImplementedError
-    logger.warn("Can't check #{doc.name}, get_latest_version is not implemented")
-  rescue => error
-    logger.error("Error while checking #{doc.name}: #{error}")
+    logger.warn("Couldn't check #{doc.name}, get_latest_version is not implemented")
+  rescue
+    logger.error("Error while checking #{doc.name}")
+    raise
+  end
+
+  def log_results(label, results)
+    logger.info("#{label} documentations (#{results.length}):")
+
+    results.each do |result|
+      logger.info("#{result[:name]}: #{result[:current_version]} -> #{result[:latest_version]}")
+    end
   end
 
   def logger
     @logger ||= Logger.new($stdout).tap do |logger|
+      logger.level = options[:verbose] ? Logger::DEBUG : Logger::INFO
       logger.formatter = proc do |severity, datetime, progname, msg|
         prefix = severity != "INFO" ? "[#{severity}] " : ""
         "#{prefix}#{msg}\n"


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]