grep.rb の UTF-8 対応と ruby 1.9 対応 (tDiary-users-talk: 0512) - tDiary-users

三輪です。

青木さんの grep.rb http://i.loveruby.net/svn/public/tdiarytools/trunk/grep.rb
の UTF-8 対応と ruby 1.9 対応してみました。( すごくバータリーです )

( というか grep.rb ではなくて search.rb を使ってるのかしら
 https://github.com/tdiary/tdiary-contrib/blob/master/util/tdiarysearch/search.rb
)

--- C:/Users/rin/Desktop/grep.rb.orig	Sun Jan 22 15:20:40 2012
+++ C:/Users/rin/Desktop/grep.rb	Sun Jan 22 15:23:07 2012
@@ -1,4 +1,5 @@
 #!/usr/bin/env ruby
+# -*- coding: utf-8 -*-
 #
 # $Id$
 #
@@ -26,7 +27,7 @@
   <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
   <html lang="ja-JP">
   <head>
-    <meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
     <meta http-equiv="Content-Language" content="ja-JP">
     <meta name="robots" content="none">
     <title>tDiary Grep</title>
@@ -115,12 +116,12 @@
     begin
       Uconv.u8toeuc(str)
     rescue Uconv::Error
-      NKF::nkf('-e -m0', str)
+      NKF::nkf('-w -m0', str)
     end
   end
 rescue LoadError
   def to_euc(str)
-    NKF::nkf('-e -m0', str)
+    NKF::nkf('-w -m0', str)
   end
 end

@@ -133,12 +134,12 @@
 class InvalidTDiaryFormat < TDiaryGrepError; end
 class ConfigError < TDiaryGrepError; end

-Z_SPACE = "\241\241"   # zen-kaku space
+Z_SPACE = "　"   # zen-kaku space

-BEGIN { $defout.binmode }
+BEGIN { $stdout.binmode }

 def main
-  $KCODE = 'EUC'
+#  $KCODE = 'UTF8'
   cgi = CGI.new
   html = '<html><head><title></title></head><body><p>error</p></body></html>'
   begin
@@ -158,7 +159,7 @@
       elsif not cgi.valid?('q')
         return search_form_page()
       else
-        query = to_euc([cgi.params['q']].compact.flatten.join(' '))
+        query = [cgi.params['q']].compact.flatten.join(' ')
         html = search_result_page(setup_patterns(query))
         save_query(query, query_log()) if LOGGING
         return html
@@ -183,7 +184,7 @@
 def send_html(cgi, html)
   print cgi.header('status' => '200 OK',
                    'type' => 'text/html',
-                   'charset' => 'euc-jp',
+                   'charset' => 'UTF-8',
                    'Content-Length' => html.length.to_s,
                    'Cache-Control' => 'no-cache',
                    'Pragma' => 'no-cache')
@@ -193,7 +194,7 @@
 def setup_patterns(query)
   patterns = split_string(query).map {|pat|
     check_pattern pat
-    /#{Regexp.quote(pat)}/ie
+    /#{Regexp.quote(pat)}/iu
   }
   raise WrongQuery, 'no pattern' if patterns.empty?
   raise WrongQuery, 'too many sub patterns' if patterns.length > 8
@@ -208,7 +209,7 @@
 end

 def split_string(str)
-  str.split(/[\s#{Z_SPACE}]+/oe).reject {|w| w.empty? }
+  str.split(/[\s#{Z_SPACE}]+/ou).reject {|w| w.empty? }
 end

 def save_query(query, file)
@@ -312,7 +313,7 @@

 def read_diaries(path)
   diaries = []
-  File.open(path) {|f|
+  File.open(path, :encoding => 'UTF-8') {|f|
     f.each('') do |header|
       diaries.push Diary.parse(header, f.gets("\n.\n").chomp(".\n"))
     end
@@ -398,7 +399,7 @@
     title, body =****@sourc*****(/\n/, 2)
     sprintf('%-30s | %s',
             title.to_s.strip,
-            remove_tags(body.to_s).gsub(/[\s#{Z_SPACE}]+/oe, '
').slice(/\A.{0,60}/me))
+            remove_tags(body.to_s).gsub(/[\s#{Z_SPACE}]+/ou, '
').slice(/\A.{0,60}/mu))
   end

   private
@@ -420,7 +421,7 @@

 @tdiary_conf = nil
 def tdiary_conf
-  @tdiary_conf ||= File.read("#{File.dirname(__FILE__)}/tdiary.conf")
+  @tdiary_conf ||= File.read("#{File.dirname(__FILE__)}/tdiary.conf",
:encoding => 'UTF-8')
 end

 #


-- 
みわ



tDiary-users

[tDiary-users-talk: 0512] grep.rb の UTF-8 対応と ruby 1.9 対応