[Groonga-commit] ranguba/chupa-text-http-server at 1715bfa [master] Add support timeout and limits

Back to archive index
Kouhei Sutou null+****@clear*****
Thu Mar 28 16:38:16 JST 2019


Kouhei Sutou	2019-03-28 16:38:16 +0900 (Thu, 28 Mar 2019)

  Revision: 1715bfa601aeef536e1dbbfe270ba3aee051227d
  https://github.com/ranguba/chupa-text-http-server/commit/1715bfa601aeef536e1dbbfe270ba3aee051227d

  Message:
    Add support timeout and limits

  Modified files:
    Gemfile
    Gemfile.local.example
    app/controllers/extractions_controller.rb
    app/models/extraction.rb

  Modified: Gemfile (+1 -1)
===================================================================
--- Gemfile    2019-03-26 16:52:53 +0900 (628c2d9)
+++ Gemfile    2019-03-28 16:38:16 +0900 (4e39f14)
@@ -62,7 +62,7 @@ gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby]
 gem 'bootstrap'
 gem 'jquery-rails'
 
-gem 'chupa-text'#, path: '../chupa-text'
+gem 'chupa-text', '>= 1.2.2'#, path: '../chupa-text'
 local_gemfile = "#{__dir__}/Gemfile.local"
 if File.exist?(local_gemfile)
   eval(File.read(local_gemfile), binding, local_gemfile, 1)

  Modified: Gemfile.local.example (+0 -1)
===================================================================
--- Gemfile.local.example    2019-03-26 16:52:53 +0900 (e0f78fa)
+++ Gemfile.local.example    2019-03-28 16:38:16 +0900 (c7aeea2)
@@ -2,7 +2,6 @@
 
 gem "chupa-text-decomposer-html"
 gem "chupa-text-decomposer-pdf"
-gem "chupa-text-decomposer-spreadsheet"
 gem "chupa-text-decomposer-libreoffice-excel"
 gem "chupa-text-decomposer-libreoffice-powerpoint"
 gem "chupa-text-decomposer-libreoffice-word"

  Modified: app/controllers/extractions_controller.rb (+8 -4)
===================================================================
--- app/controllers/extractions_controller.rb    2019-03-26 16:52:53 +0900 (00d3c79)
+++ app/controllers/extractions_controller.rb    2019-03-28 16:38:16 +0900 (ed1bacd)
@@ -28,12 +28,16 @@ class ExtractionsController < ApplicationController
     def extraction_params
       if params[:extraction]
         # For form
-        params.require(:extraction).permit(:data, :uri)
+        base_params = params.require(:extraction)
       else
         # For API
-        params
-          .except(:format, :utf8, :authenticity_token, :commit)
-          .permit(:data, :uri)
+        base_params = params.except(:format, :utf8, :authenticity_token, :commit)
       end
+      base_params.permit(:data,
+                         :uri,
+                         :timeout,
+                         :limit_cpu,
+                         :limit_as,
+                         :max_body_size)
     end
 end

  Modified: app/models/extraction.rb (+44 -0)
===================================================================
--- app/models/extraction.rb    2019-03-26 16:52:53 +0900 (e8a6e7d)
+++ app/models/extraction.rb    2019-03-28 16:38:16 +0900 (6234291)
@@ -3,8 +3,25 @@ class Extraction
 
   attr_accessor :data
   attr_accessor :uri
+  attr_accessor :timeout
+  attr_accessor :limit_cpu
+  attr_accessor :limit_as
+  attr_writer :max_body_size
 
   validates :data, presence: true, if: ->(record) {record.uri.blank?}
+  validates :max_body_size,
+            numericality: {only_integer: true},
+            allow_nil: true
+
+  def initialize(attributes={})
+    @data = nil
+    @uri = nil
+    @timeout = nil
+    @limit_cpu = nil
+    @limit_as = nil
+    @max_body_size = nil
+    super
+  end
 
   def persisted?
     false
@@ -14,6 +31,16 @@ class Extraction
     nil
   end
 
+  def max_body_size
+    if @max_body_size.is_a?(Numeric)
+      @max_body_size
+    elsif @max_body_size.blank?
+      nil
+    else
+      Integer(@max_body_size, 10)
+    end
+  end
+
   def extract
     return nil unless valid?
 
@@ -31,9 +58,11 @@ class Extraction
       end
       data = ChupaText::VirtualFileData.new(data_uri, @data.to_io)
       data.mime_type =****@data*****_type if****@data*****_type
+      setup_data(data)
     else
       begin
         data = ChupaText::InputData.new(@uri)
+        setup_data(data)
       rescue ChupaText::DownloadError => error
         errors.add(:uri, :invalid, message: error.message)
         return nil
@@ -44,9 +73,16 @@ class Extraction
     end
     formatter = ChupaText::Formatters::Hash.new
     formatter.format_start(data)
+    max = data.max_body_size
+    size = 0
     begin
       extractor.extract(data) do |extracted|
         formatter.format_extracted(extracted)
+        body = extracted.body
+        if max and body
+          size += body.bytesize
+          break if size >= max
+        end
       end
     rescue ChupaText::Error => error
       errors.add(:data, :invalid, message: error.message)
@@ -54,4 +90,12 @@ class Extraction
     end
     formatter.format_finish(data)
   end
+
+  private
+  def setup_data(data)
+    data.max_body_size = max_body_size
+    data.timeout = @timeout
+    data.limit_cpu = @limit_cpu
+    data.limit_as = @limit_as
+  end
 end
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190328/f173e3cf/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index