[Groonga-commit] ranguba/chupa-text at 696180d [master] Use depth priority processing to reduce memory usage

Back to archive index
Kouhei Sutou null+****@clear*****
Sun Mar 3 04:47:26 JST 2019


Kouhei Sutou	2019-03-03 04:47:26 +0900 (Sun, 03 Mar 2019)

  Revision: 696180dea95169b697b9c81c042ba892c669687f
  https://github.com/ranguba/chupa-text/commit/696180dea95169b697b9c81c042ba892c669687f

  Message:
    Use depth priority processing to reduce memory usage

  Modified files:
    lib/chupa-text/extractor.rb

  Modified: lib/chupa-text/extractor.rb (+31 -32)
===================================================================
--- lib/chupa-text/extractor.rb    2019-03-02 06:16:52 +0900 (920b7d9)
+++ lib/chupa-text/extractor.rb    2019-03-03 04:47:26 +0900 (4ce2c11)
@@ -57,38 +57,8 @@ module ChupaText
     #   You can get text data by `text_data.body`.
     #
     # @return [void]
-    def extract(input)
-      targets = [ensure_data(input)]
-      until targets.empty?
-        target = targets.shift
-        debug do
-          "#{log_tag}[extract][target] <#{target.uri}>:<#{target.mime_type}>"
-        end
-        decomposer = find_decomposer(target)
-        if decomposer.nil?
-          if target.text_plain?
-            debug {"#{log_tag}[extract][text-plain]"}
-            yield(target.to_utf8_body_data)
-            next
-          else
-            debug {"#{log_tag}[extract][decomposer] not found"}
-            if target.text?
-              yield(target.to_utf8_body_data)
-            end
-            next
-          end
-        end
-        debug {"#{log_tag}[extract][decomposer] #{decomposer.class}"}
-        decomposer.decompose(target) do |decomposed|
-          debug do
-            "#{log_tag}[extract][decomposed] " +
-              "#{decomposer.class}: " +
-              "<#{target.uri}>: " +
-              "<#{target.mime_type}> -> <#{decomposed.mime_type}>"
-          end
-          targets.push(decomposed)
-        end
-      end
+    def extract(input, &block)
+      extract_recursive(ensure_data(input), &block)
     end
 
     private
@@ -112,6 +82,35 @@ module ChupaText
       candidate[1]
     end
 
+    def extract_recursive(target, &block)
+      debug do
+        "#{log_tag}[extract][target] <#{target.uri}>:<#{target.mime_type}>"
+      end
+      decomposer = find_decomposer(target)
+      if decomposer.nil?
+        if target.text_plain?
+          debug {"#{log_tag}[extract][text-plain]"}
+          yield(target.to_utf8_body_data)
+        else
+          debug {"#{log_tag}[extract][decomposer] not found"}
+          if target.text?
+            yield(target.to_utf8_body_data)
+          end
+        end
+      else
+        debug {"#{log_tag}[extract][decomposer] #{decomposer.class}"}
+        decomposer.decompose(target) do |decomposed|
+          debug do
+            "#{log_tag}[extract][decomposed] " +
+              "#{decomposer.class}: " +
+              "<#{target.uri}>: " +
+              "<#{target.mime_type}> -> <#{decomposed.mime_type}>"
+          end
+          extract_recursive(decomposed, &block)
+        end
+      end
+    end
+
     def log_tag
       "[extractor]"
     end
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190303/def75575/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index