[logaling-commit] logaling/logaling-command [master] [gene95] extract tar.

Back to archive index

null+****@clear***** null+****@clear*****
Mon Jan 23 12:42:33 JST 2012


Kouhei Sutou	2012-01-23 12:42:33 +0900 (Mon, 23 Jan 2012)

  New Revision: 1d83703867182a73202793189c5cdf86cb37aa76

  Log:
    [gene95] extract tar.

  Modified files:
    lib/logaling/external_glossaries/gene95.rb

  Modified: lib/logaling/external_glossaries/gene95.rb (+20 -6)
===================================================================
--- lib/logaling/external_glossaries/gene95.rb    2012-01-23 11:09:24 +0900 (2bcd16d)
+++ lib/logaling/external_glossaries/gene95.rb    2012-01-23 12:42:33 +0900 (c6617e8)
@@ -15,6 +15,8 @@
 
 require 'open-uri'
 require 'zlib'
+require 'stringio'
+require 'rubygems/package'
 
 module Logaling
   class Gene95 < ExternalGlossary
@@ -29,14 +31,26 @@ module Logaling
         Zlib::GzipReader.open(open(url)) do |gz|
           puts "importing gene95 dictionary..."
 
-          2.times { gz.gets } # skip header
+          Gem::Package::TarReader.new(gz) do |tar|
+            tar.each do |entry|
+              case entry.full_name
+              when "gene.txt"
+                lines = StringIO.new(entry.read).each_line
 
-          preprocessed_lines = gz.readlines.map do |line|
-            line.encode("UTF-8", "CP932", undef: :replace, replace: '').chomp
-          end
+                2.times { lines.next } # skip header
+
+                preprocessed_lines = lines.map.map do |line|
+                  line.encode("UTF-8", "CP932",
+                              undef: :replace, replace: '').chomp
+                end
 
-          preprocessed_lines.each_slice(2) do |source, target|
-            csv << [source.sub(/(    .*)/, ''), target]
+                preprocessed_lines.each_slice(2) do |source, target|
+                  csv << [source.sub(/(    .*)/, ''), target]
+                end
+              else
+                # ignore
+              end
+            end
           end
         end
       end




More information about the logaling-commit mailing list
Back to archive index