[devdocsgjs/main: 46/69] Image scraping: evaluate Content-Length header




commit 965be77c0385bdce8e5ad155b817bd050d445c80
Author: Simon Legner <Simon Legner gmail com>
Date:   Tue Sep 6 14:48:51 2022 +0200

    Image scraping: evaluate Content-Length header

 lib/docs/core/response.rb           |  5 +++++
 lib/docs/filters/core/images.rb     |  7 +++++++
 test/lib/docs/core/response_test.rb | 11 +++++++++++
 3 files changed, 23 insertions(+)
---
diff --git a/lib/docs/core/response.rb b/lib/docs/core/response.rb
index 24a1807b..e0c2798b 100644
--- a/lib/docs/core/response.rb
+++ b/lib/docs/core/response.rb
@@ -12,6 +12,11 @@ module Docs
       body.blank?
     end
 
+    def content_length
+      value = headers['Content-Length'] || '0'
+      value.to_i
+    end
+
     def mime_type
       headers['Content-Type'] || 'text/plain'
     end
diff --git a/lib/docs/filters/core/images.rb b/lib/docs/filters/core/images.rb
index ad9fe26b..407583dc 100644
--- a/lib/docs/filters/core/images.rb
+++ b/lib/docs/filters/core/images.rb
@@ -46,6 +46,13 @@ module Docs
               next
             end
 
+            size = response.content_length
+
+            if size > (context[:max_image_size] || DEFAULT_MAX_SIZE)
+              instrument 'too_big.image', url: url, size: size
+              next
+            end
+
             image = response.body
 
             unless context[:optimize_images] == false
diff --git a/test/lib/docs/core/response_test.rb b/test/lib/docs/core/response_test.rb
index 4e13b67a..4142752d 100644
--- a/test/lib/docs/core/response_test.rb
+++ b/test/lib/docs/core/response_test.rb
@@ -63,6 +63,17 @@ class DocsResponseTest < MiniTest::Spec
     end
   end
 
+  describe "#content_length" do
+    it "returns the content type" do
+      options.headers['Content-Length'] = '188420'
+      assert_equal 188420, response.content_length
+    end
+
+    it "defaults to 0" do
+      assert_equal 0, response.content_length
+    end
+  end
+
   describe "#mime_type" do
     it "returns the content type" do
       options.headers['Content-Type'] = 'type'


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]