[Groonga-commit] ranguba/chupa-text at 8729061 [master] Support screenshot

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Jul 5 18:27:14 JST 2017


Kouhei Sutou	2017-07-05 18:27:14 +0900 (Wed, 05 Jul 2017)

  New Revision: 87290619a9518934541e9ee813bce9ed6c60088e
  https://github.com/ranguba/chupa-text/commit/87290619a9518934541e9ee813bce9ed6c60088e

  Message:
    Support screenshot

  Copied files:
    lib/chupa-text/screenshot.rb
      (from lib/chupa-text/decomposers/csv.rb)
  Modified files:
    lib/chupa-text.rb
    lib/chupa-text/command/chupa-text.rb
    lib/chupa-text/data.rb
    lib/chupa-text/decomposers/csv.rb
    lib/chupa-text/formatters/hash.rb

  Modified: lib/chupa-text.rb (+2 -0)
===================================================================
--- lib/chupa-text.rb    2017-07-05 18:09:39 +0900 (100efcb)
+++ lib/chupa-text.rb    2017-07-05 18:27:14 +0900 (7845f93)
@@ -41,6 +41,8 @@ require "chupa-text/formatters"
 require "chupa-text/file-content"
 require "chupa-text/virtual-content"
 
+require "chupa-text/screenshot"
+
 require "chupa-text/attributes"
 require "chupa-text/data"
 require "chupa-text/input-data"

  Modified: lib/chupa-text/command/chupa-text.rb (+14 -3)
===================================================================
--- lib/chupa-text/command/chupa-text.rb    2017-07-05 18:09:39 +0900 (96a600f)
+++ lib/chupa-text/command/chupa-text.rb    2017-07-05 18:27:14 +0900 (fc766f7)
@@ -33,6 +33,7 @@ module ChupaText
         @configuration = Configuration.load_default
         @enable_gems = true
         @format = :json
+        @need_screenshot = true
       end
 
       def run(*arguments)
@@ -92,12 +93,20 @@ module ChupaText
                   "Appends PATH to decomposer load path.") do |path|
           $LOAD_PATH << path
         end
+
+        parser.separator("")
+        parser.separator("Output related options")
         parser.on("--format=FORMAT", AVAILABLE_FORMATS,
                   "Output FORMAT.",
                   "[#{AVAILABLE_FORMATS.join(', ')}]",
-                  "(default: json)") do |format|
+                  "(default: #{@format})") do |format|
           @format = format
         end
+        parser.on("--[no-]need-screenshot",
+                  "Generate screenshot if available.",
+                  "(default: #{@need_screenshot})") do |boolean|
+          @need_screenshot = boolean
+        end
 
         parser.separator("")
         parser.separator("Log related options:")
@@ -152,7 +161,7 @@ module ChupaText
 
       def create_data
         if****@input*****?
-          VirtualFileData.new(nil, $stdin)
+          data = VirtualFileData.new(nil, $stdin)
         else
           case @input
           when /\A[a-z]+:\/\//i
@@ -160,8 +169,10 @@ module ChupaText
           else
             input = Pathname(@input)
           end
-          InputData.new(input)
+          data = InputData.new(input)
         end
+        data.need_screenshot = @need_screenshot
+        data
       end
 
       def create_formatter

  Modified: lib/chupa-text/data.rb (+15 -0)
===================================================================
--- lib/chupa-text/data.rb    2017-07-05 18:09:39 +0900 (e98bb9a)
+++ lib/chupa-text/data.rb    2017-07-05 18:27:14 +0900 (c2afcbc)
@@ -52,6 +52,14 @@ module ChupaText
     #   archive data in {#source}.
     attr_accessor :source
 
+    # @return [Screenshot, nil] The screenshot of the data. For example,
+    #   the first page image for PDF file.text.
+    attr_accessor :screenshot
+
+    # @param [Bool] value `true` when screenshot is needed.
+    # @return [Bool] the specified value
+    attr_writer :need_screenshot
+
     def initialize(options={})
       @uri = nil
       @body = nil
@@ -60,6 +68,8 @@ module ChupaText
       @mime_type = nil
       @attributes = Attributes.new
       @source = nil
+      @screenshot = nil
+      @need_screenshot = true
       @options = options || {}
       source_data = @options[:source_data]
       if source_data
@@ -165,6 +175,11 @@ module ChupaText
       mime_type == "text/plain"
     end
 
+    # @return [Bool] `true` when screenshot is needed if available.
+    def need_screenshot?
+      @need_screenshot
+    end
+
     private
     def guess_mime_type
       guess_mime_type_from_uri or

  Modified: lib/chupa-text/decomposers/csv.rb (+32 -1)
===================================================================
--- lib/chupa-text/decomposers/csv.rb    2017-07-05 18:09:39 +0900 (390f5bd)
+++ lib/chupa-text/decomposers/csv.rb    2017-07-05 18:27:14 +0900 (72923fb)
@@ -1,4 +1,4 @@
-# Copyright (C) 2013  Kouhei Sutou <kou �� clear-code.com>
+# Copyright (C) 2013-2017  Kouhei Sutou <kou �� clear-code.com>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -14,6 +14,7 @@
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
+require "cgi/util"
 require "csv"
 
 module ChupaText
@@ -41,9 +42,39 @@ module ChupaText
             text << "\n"
           end
         end
+
         text_data = TextData.new(text, :source_data => data)
+        if data.need_screenshot?
+          text_data.screenshot = create_screenshot(text)
+        end
+
         yield(text_data)
       end
+
+      private
+      def create_screenshot(text)
+        target_text = ""
+        text.each_line.with_index do |line, i|
+          target_text << line
+          break if i == 4
+        end
+        mime_type = "image/svg+xml"
+        data = <<-SVG
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+  xmlns="http://www.w3.org/2000/svg"
+  width="100"
+  height="100"
+  viewBox="0 0 100 100">
+  <text
+    x="0"
+    y="10"
+    xml:space="preserve"
+    style="font-size: 10px;">#{CGI.escapeHTML(target_text)}</text>
+</svg>
+        SVG
+        Screenshot.new(mime_type, data)
+      end
     end
   end
 end

  Modified: lib/chupa-text/formatters/hash.rb (+6 -0)
===================================================================
--- lib/chupa-text/formatters/hash.rb    2017-07-05 18:09:39 +0900 (31e8f9e)
+++ lib/chupa-text/formatters/hash.rb    2017-07-05 18:27:14 +0900 (f28bee6)
@@ -28,6 +28,12 @@ module ChupaText
         text = {}
         format_headers(data, text)
         text["body"] = data.body
+        if data.screenshot
+          text["screenshot"] = {
+            "mime-type" => data.screenshot.mime_type,
+            "data" => data.screenshot.data,
+          }
+        end
         @texts << text
       end
 

  Copied: lib/chupa-text/screenshot.rb (+9 -28) 51%
===================================================================
--- lib/chupa-text/decomposers/csv.rb    2017-07-05 18:09:39 +0900 (390f5bd)
+++ lib/chupa-text/screenshot.rb    2017-07-05 18:27:14 +0900 (08ed97a)
@@ -1,4 +1,4 @@
-# Copyright (C) 2013  Kouhei Sutou <kou �� clear-code.com>
+# Copyright (C) 2017  Kouhei Sutou <kou �� clear-code.com>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -14,36 +14,17 @@
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
-require "csv"
-
 module ChupaText
-  module Decomposers
-    class CSV < Decomposer
-      registry.register("csv", self)
-
-      def target?(data)
-        return true if data.mime_type == "text/csv"
-
-        if data.text_plain? and
-            (data["source-mime-types"] || []).include?("text/csv")
-          return false
-        end
+  class Screenshot
+    # @return [String] The MIME type of the screenshot.
+    attr_reader :mime_type
 
-        data.extension == "csv"
-      end
+    # @return [String] The data of the screenshot.
+    attr_accessor :data
 
-      def decompose(data)
-        text = ""
-        data.open do |input|
-          csv = ::CSV.new(input)
-          csv.each do |row|
-            text << row.join(" ")
-            text << "\n"
-          end
-        end
-        text_data = TextData.new(text, :source_data => data)
-        yield(text_data)
-      end
+    def initialize(mime_type, data)
+      @mime_type = mime_type
+      @data = data
     end
   end
 end
-------------- next part --------------
HTML����������������������������...
Télécharger 



More information about the Groonga-commit mailing list
Back to archive index