Kouhei Sutou 2019-03-03 04:47:26 +0900 (Sun, 03 Mar 2019) Revision: 696180dea95169b697b9c81c042ba892c669687f https://github.com/ranguba/chupa-text/commit/696180dea95169b697b9c81c042ba892c669687f Message: Use depth priority processing to reduce memory usage Modified files: lib/chupa-text/extractor.rb Modified: lib/chupa-text/extractor.rb (+31 -32) =================================================================== --- lib/chupa-text/extractor.rb 2019-03-02 06:16:52 +0900 (920b7d9) +++ lib/chupa-text/extractor.rb 2019-03-03 04:47:26 +0900 (4ce2c11) @@ -57,38 +57,8 @@ module ChupaText # You can get text data by `text_data.body`. # # @return [void] - def extract(input) - targets = [ensure_data(input)] - until targets.empty? - target = targets.shift - debug do - "#{log_tag}[extract][target] <#{target.uri}>:<#{target.mime_type}>" - end - decomposer = find_decomposer(target) - if decomposer.nil? - if target.text_plain? - debug {"#{log_tag}[extract][text-plain]"} - yield(target.to_utf8_body_data) - next - else - debug {"#{log_tag}[extract][decomposer] not found"} - if target.text? - yield(target.to_utf8_body_data) - end - next - end - end - debug {"#{log_tag}[extract][decomposer] #{decomposer.class}"} - decomposer.decompose(target) do |decomposed| - debug do - "#{log_tag}[extract][decomposed] " + - "#{decomposer.class}: " + - "<#{target.uri}>: " + - "<#{target.mime_type}> -> <#{decomposed.mime_type}>" - end - targets.push(decomposed) - end - end + def extract(input, &block) + extract_recursive(ensure_data(input), &block) end private @@ -112,6 +82,35 @@ module ChupaText candidate[1] end + def extract_recursive(target, &block) + debug do + "#{log_tag}[extract][target] <#{target.uri}>:<#{target.mime_type}>" + end + decomposer = find_decomposer(target) + if decomposer.nil? + if target.text_plain? + debug {"#{log_tag}[extract][text-plain]"} + yield(target.to_utf8_body_data) + else + debug {"#{log_tag}[extract][decomposer] not found"} + if target.text? + yield(target.to_utf8_body_data) + end + end + else + debug {"#{log_tag}[extract][decomposer] #{decomposer.class}"} + decomposer.decompose(target) do |decomposed| + debug do + "#{log_tag}[extract][decomposed] " + + "#{decomposer.class}: " + + "<#{target.uri}>: " + + "<#{target.mime_type}> -> <#{decomposed.mime_type}>" + end + extract_recursive(decomposed, &block) + end + end + end + def log_tag "[extractor]" end -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190303/def75575/attachment-0001.html>