[Groonga-commit] ranguba/chupa-text at 6a8f6b3 [master] Fix infinite loop by CSV decomposer

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Jul 5 18:09:39 JST 2017


Kouhei Sutou	2017-07-05 18:09:39 +0900 (Wed, 05 Jul 2017)

  New Revision: 6a8f6b3f887693fc2f9dc10b47d4fe954ae36364
  https://github.com/ranguba/chupa-text/commit/6a8f6b3f887693fc2f9dc10b47d4fe954ae36364

  Message:
    Fix infinite loop by CSV decomposer

  Added files:
    test/fixture/command/chupa-text/numbers.csv
  Modified files:
    lib/chupa-text/command/chupa-text.rb
    lib/chupa-text/configuration.rb
    lib/chupa-text/data.rb
    lib/chupa-text/decomposers/csv.rb
    lib/chupa-text/decomposers/gzip.rb
    lib/chupa-text/decomposers/tar.rb
    lib/chupa-text/decomposers/xml.rb
    test/command/test-chupa-text.rb

  Modified: lib/chupa-text/command/chupa-text.rb (+1 -1)
===================================================================
--- lib/chupa-text/command/chupa-text.rb    2017-07-05 16:12:57 +0900 (f0df9fc)
+++ lib/chupa-text/command/chupa-text.rb    2017-07-05 18:09:39 +0900 (96a600f)
@@ -30,7 +30,7 @@ module ChupaText
 
       def initialize
         @input = nil
-        @configuration = Configuration.default
+        @configuration = Configuration.load_default
         @enable_gems = true
         @format = :json
       end

  Modified: lib/chupa-text/configuration.rb (+2 -3)
===================================================================
--- lib/chupa-text/configuration.rb    2017-07-05 16:12:57 +0900 (16e9771)
+++ lib/chupa-text/configuration.rb    2017-07-05 18:09:39 +0900 (cf92932)
@@ -18,11 +18,10 @@ module ChupaText
   class Configuration
     class << self
       def default
-        @default ||= create_default
+        @default ||= load_default
       end
 
-      private
-      def create_default
+      def load_default
         configuration = new
         loader = ConfigurationLoader.new(configuration)
         loader.load("chupa-text.conf")

  Modified: lib/chupa-text/data.rb (+4 -1)
===================================================================
--- lib/chupa-text/data.rb    2017-07-05 16:12:57 +0900 (fe68795)
+++ lib/chupa-text/data.rb    2017-07-05 18:09:39 +0900 (e98bb9a)
@@ -62,7 +62,10 @@ module ChupaText
       @source = nil
       @options = options || {}
       source_data = @options[:source_data]
-      merge!(source_data) if source_data
+      if source_data
+        merge!(source_data)
+        @source = source_data
+      end
     end
 
     def initialize_copy(object)

  Modified: lib/chupa-text/decomposers/csv.rb (+9 -4)
===================================================================
--- lib/chupa-text/decomposers/csv.rb    2017-07-05 16:12:57 +0900 (67a81c8)
+++ lib/chupa-text/decomposers/csv.rb    2017-07-05 18:09:39 +0900 (390f5bd)
@@ -22,8 +22,14 @@ module ChupaText
       registry.register("csv", self)
 
       def target?(data)
-        data.extension == "csv" or
-          data.mime_type == "text/csv"
+        return true if data.mime_type == "text/csv"
+
+        if data.text_plain? and
+            (data["source-mime-types"] || []).include?("text/csv")
+          return false
+        end
+
+        data.extension == "csv"
       end
 
       def decompose(data)
@@ -35,8 +41,7 @@ module ChupaText
             text << "\n"
           end
         end
-        text_data = TextData.new(text)
-        text_data.uri = data.uri
+        text_data = TextData.new(text, :source_data => data)
         yield(text_data)
       end
     end

  Modified: lib/chupa-text/decomposers/gzip.rb (+1 -2)
===================================================================
--- lib/chupa-text/decomposers/gzip.rb    2017-07-05 16:12:57 +0900 (a404e97)
+++ lib/chupa-text/decomposers/gzip.rb    2017-07-05 18:09:39 +0900 (c06490b)
@@ -42,8 +42,7 @@ module ChupaText
         when "tgz"
           uri = data.uri.to_s.gsub(/\.tgz\z/i, ".tar")
         end
-        extracted = VirtualFileData.new(uri, reader)
-        extracted.source = data
+        extracted = VirtualFileData.new(uri, reader, :source_data => data)
         yield(extracted)
       end
     end

  Modified: lib/chupa-text/decomposers/tar.rb (+2 -2)
===================================================================
--- lib/chupa-text/decomposers/tar.rb    2017-07-05 16:12:57 +0900 (2794d5b)
+++ lib/chupa-text/decomposers/tar.rb    2017-07-05 18:09:39 +0900 (89a343a)
@@ -32,8 +32,8 @@ module ChupaText
           reader.each do |entry|
             next unless entry.file?
             entry.extend(CopyStreamable)
-            extracted = VirtualFileData.new(entry.full_name, entry)
-            extracted.source = data
+            extracted = VirtualFileData.new(entry.full_name, entry,
+                                            :source_data => data)
             yield(extracted)
           end
         end

  Modified: lib/chupa-text/decomposers/xml.rb (+1 -2)
===================================================================
--- lib/chupa-text/decomposers/xml.rb    2017-07-05 16:12:57 +0900 (537806c)
+++ lib/chupa-text/decomposers/xml.rb    2017-07-05 18:09:39 +0900 (8c4689e)
@@ -34,8 +34,7 @@ module ChupaText
           parser = REXML::Parsers::StreamParser.new(input, listener)
           parser.parse
         end
-        text_data = TextData.new(text)
-        text_data.uri = data.uri
+        text_data = TextData.new(text, :source_data => data)
         yield(text_data)
       end
 

  Modified: test/command/test-chupa-text.rb (+28 -0)
===================================================================
--- test/command/test-chupa-text.rb    2017-07-05 16:12:57 +0900 (a9ec2b0)
+++ test/command/test-chupa-text.rb    2017-07-05 18:09:39 +0900 (501f821)
@@ -182,4 +182,32 @@ class TestCommandChupaText < Test::Unit::TestCase
                                path.to_s))
     end
   end
+
+  sub_test_case("extract") do
+    def test_csv
+      fixture_name = "numbers.csv"
+      uri = fixture_uri(fixture_name)
+      path = fixture_path(fixture_name)
+      assert_equal([
+                     true,
+                     {
+                       "uri"       => uri.to_s,
+                       "path"      => path.to_s,
+                       "mime-type" => "text/csv",
+                       "size"      => path.stat.size,
+                       "texts"     => [
+                         {
+                           "uri"       => uri.to_s,
+                           "path"      => path.to_s,
+                           "mime-type" => "text/plain",
+                           "source-mime-types" => ["text/csv"],
+                           "body"      => "1 2 3\n4 5 6\n7 8 9\n",
+                           "size"      => 18,
+                         },
+                       ],
+                     },
+                   ],
+                   run_command(path.to_s))
+    end
+  end
 end

  Added: test/fixture/command/chupa-text/numbers.csv (+3 -0) 100644
===================================================================
--- /dev/null
+++ test/fixture/command/chupa-text/numbers.csv    2017-07-05 18:09:39 +0900 (d3494f6)
@@ -0,0 +1,3 @@
+1,2,3
+4,5,6
+7,8,9
-------------- next part --------------
HTML����������������������������...
Télécharger 



More information about the Groonga-commit mailing list
Back to archive index