Kouhei Sutou 2019-03-28 16:38:16 +0900 (Thu, 28 Mar 2019) Revision: 1715bfa601aeef536e1dbbfe270ba3aee051227d https://github.com/ranguba/chupa-text-http-server/commit/1715bfa601aeef536e1dbbfe270ba3aee051227d Message: Add support timeout and limits Modified files: Gemfile Gemfile.local.example app/controllers/extractions_controller.rb app/models/extraction.rb Modified: Gemfile (+1 -1) =================================================================== --- Gemfile 2019-03-26 16:52:53 +0900 (628c2d9) +++ Gemfile 2019-03-28 16:38:16 +0900 (4e39f14) @@ -62,7 +62,7 @@ gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] gem 'bootstrap' gem 'jquery-rails' -gem 'chupa-text'#, path: '../chupa-text' +gem 'chupa-text', '>= 1.2.2'#, path: '../chupa-text' local_gemfile = "#{__dir__}/Gemfile.local" if File.exist?(local_gemfile) eval(File.read(local_gemfile), binding, local_gemfile, 1) Modified: Gemfile.local.example (+0 -1) =================================================================== --- Gemfile.local.example 2019-03-26 16:52:53 +0900 (e0f78fa) +++ Gemfile.local.example 2019-03-28 16:38:16 +0900 (c7aeea2) @@ -2,7 +2,6 @@ gem "chupa-text-decomposer-html" gem "chupa-text-decomposer-pdf" -gem "chupa-text-decomposer-spreadsheet" gem "chupa-text-decomposer-libreoffice-excel" gem "chupa-text-decomposer-libreoffice-powerpoint" gem "chupa-text-decomposer-libreoffice-word" Modified: app/controllers/extractions_controller.rb (+8 -4) =================================================================== --- app/controllers/extractions_controller.rb 2019-03-26 16:52:53 +0900 (00d3c79) +++ app/controllers/extractions_controller.rb 2019-03-28 16:38:16 +0900 (ed1bacd) @@ -28,12 +28,16 @@ class ExtractionsController < ApplicationController def extraction_params if params[:extraction] # For form - params.require(:extraction).permit(:data, :uri) + base_params = params.require(:extraction) else # For API - params - .except(:format, :utf8, :authenticity_token, :commit) - .permit(:data, :uri) + base_params = params.except(:format, :utf8, :authenticity_token, :commit) end + base_params.permit(:data, + :uri, + :timeout, + :limit_cpu, + :limit_as, + :max_body_size) end end Modified: app/models/extraction.rb (+44 -0) =================================================================== --- app/models/extraction.rb 2019-03-26 16:52:53 +0900 (e8a6e7d) +++ app/models/extraction.rb 2019-03-28 16:38:16 +0900 (6234291) @@ -3,8 +3,25 @@ class Extraction attr_accessor :data attr_accessor :uri + attr_accessor :timeout + attr_accessor :limit_cpu + attr_accessor :limit_as + attr_writer :max_body_size validates :data, presence: true, if: ->(record) {record.uri.blank?} + validates :max_body_size, + numericality: {only_integer: true}, + allow_nil: true + + def initialize(attributes={}) + @data = nil + @uri = nil + @timeout = nil + @limit_cpu = nil + @limit_as = nil + @max_body_size = nil + super + end def persisted? false @@ -14,6 +31,16 @@ class Extraction nil end + def max_body_size + if @max_body_size.is_a?(Numeric) + @max_body_size + elsif @max_body_size.blank? + nil + else + Integer(@max_body_size, 10) + end + end + def extract return nil unless valid? @@ -31,9 +58,11 @@ class Extraction end data = ChupaText::VirtualFileData.new(data_uri, @data.to_io) data.mime_type =****@data*****_type if****@data*****_type + setup_data(data) else begin data = ChupaText::InputData.new(@uri) + setup_data(data) rescue ChupaText::DownloadError => error errors.add(:uri, :invalid, message: error.message) return nil @@ -44,9 +73,16 @@ class Extraction end formatter = ChupaText::Formatters::Hash.new formatter.format_start(data) + max = data.max_body_size + size = 0 begin extractor.extract(data) do |extracted| formatter.format_extracted(extracted) + body = extracted.body + if max and body + size += body.bytesize + break if size >= max + end end rescue ChupaText::Error => error errors.add(:data, :invalid, message: error.message) @@ -54,4 +90,12 @@ class Extraction end formatter.format_finish(data) end + + private + def setup_data(data) + data.max_body_size = max_body_size + data.timeout = @timeout + data.limit_cpu = @limit_cpu + data.limit_as = @limit_as + end end -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190328/f173e3cf/attachment-0001.html>