[Groonga-commit] groonga/groonga at f28cb0c [master] nfkc: create a class

Back to archive index

Kouhei Sutou null+****@clear*****
Sat Dec 5 22:03:26 JST 2015


Kouhei Sutou	2015-12-05 22:03:26 +0900 (Sat, 05 Dec 2015)

  New Revision: f28cb0cdcee13b051ca007248e816d57b8b1909c
  https://github.com/groonga/groonga/commit/f28cb0cdcee13b051ca007248e816d57b8b1909c

  Message:
    nfkc: create a class
    
    Generated code isn't changed.

  Modified files:
    lib/nfkc.rb

  Modified: lib/nfkc.rb (+284 -261)
===================================================================
--- lib/nfkc.rb    2015-12-05 20:26:34 +0900 (ec8d074)
+++ lib/nfkc.rb    2015-12-05 22:03:26 +0900 (7081ce3)
@@ -18,78 +18,298 @@
 
 CUSTOM_RULE_PATH = 'nfkc-custom-rules.txt'
 
-def gen_bc(file, hash, level)
-  bl = ' ' * (level * 2)
-  h2 = {}
-  hash.each{|key,val|
-    key = key.dup
-    key.force_encoding("ASCII-8BIT")
-    head = key.bytes[0]
-    rest = key[1..-1]
-    if h2[head]
-      h2[head][rest] = val
+class SwitchGenerator
+  def initialize(unicode_version, output)
+    @unicode_version = unicode_version
+    @output = output
+  end
+
+  def generate(map1, map2)
+    generate_header
+    STDERR.puts('generating char type code..')
+    generate_blockcode_char_type("gc")
+    STDERR.puts('generating map1 code..')
+    generate_map1(map1)
+    STDERR.puts('generating map2 code..')
+    generate_map2(map2)
+    generate_footer
+  end
+
+  private
+  def generate_header
+    @output.puts(<<-HEADER)
+/* -*- c-basic-offset: 2 -*- */
+/* Copyright(C) 2010-2015 Brazil
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License version 2.1 as published by the Free Software Foundation.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+don't edit this file by hand. it generated automatically by nfkc.rb
+*/
+
+#include "grn.h"
+#include <groonga/nfkc.h>
+
+#ifdef GRN_WITH_NFKC
+
+    HEADER
+  end
+
+  def generate_footer
+    @output.puts(<<-FOOTER)
+#endif /* GRN_WITH_NFKC */
+
+    FOOTER
+  end
+
+  def generate_blockcode_char_type(option)
+    @output.puts(<<-HEADER)
+grn_char_type
+grn_nfkc#{@unicode_version}_char_type(const unsigned char *str)
+{
+    HEADER
+    bc = {}
+    open("|./icudump --#{option}").each{|l|
+      src,_,code = l.chomp.split("\t")
+      str = src.split(':').collect(&:hex).pack("c*")
+      bc[str] = code
+    }
+    @lv = 0
+    gen_bc(bc, 0)
+    @output.puts(<<-FOOTER)
+  return -1;
+}
+
+    FOOTER
+  end
+
+  def gen_bc(hash, level)
+    bl = ' ' * (level * 2)
+    h2 = {}
+    hash.each{|key,val|
+      key = key.dup
+      key.force_encoding("ASCII-8BIT")
+      head = key.bytes[0]
+      rest = key[1..-1]
+      if h2[head]
+        h2[head][rest] = val
+      else
+        h2[head] = {rest => val}
+      end
+    }
+    if h2.size < 3
+      h2.keys.sort.each{|k|
+        if (0x80 < k)
+          @output.printf("#{bl}if (str[#{level}] < 0x%02X) { return #{@lv}; }\n", k)
+        end
+        h = h2[k]
+        if h.keys.join =~ /^\x80*$/n
+          @lv, = h.values
+        else
+          @output.printf("#{bl}if (str[#{level}] == 0x%02X) {\n", k)
+          gen_bc(h, level + 1)
+          @output.puts bl + '}'
+        end
+      }
+      @output.puts bl + "return #{@lv};"
     else
-      h2[head] = {rest => val}
+      @output.puts bl + "switch (str[#{level}]) {"
+      lk = 0x80
+      br = true
+      h2.keys.sort.each{|k|
+        if (lk < k)
+          for j in lk..k-1
+            @output.printf("#{bl}case 0x%02X :\n", j)
+          end
+          br = false
+        end
+        unless br
+          @output.puts bl + "  return #{@lv};"
+          @output.puts bl + '  break;'
+        end
+        h = h2[k]
+        @output.printf("#{bl}case 0x%02X :\n", k)
+        if h.keys.join =~ /^\x80*$/n
+          @lv, = h.values
+          br = false
+        else
+          gen_bc(h, level + 1)
+          @output.puts bl + '  break;'
+          br = true
+        end
+        lk = k + 1
+      }
+      @output.puts bl + 'default :'
+      @output.puts bl + "  return #{@lv};"
+      @output.puts bl + '  break;'
+      @output.puts bl + '}'
     end
-  }
-  if h2.size < 3
-    h2.keys.sort.each{|k|
-      if (0x80 < k)
-        file.printf("#{bl}if (str[#{level}] < 0x%02X) { return #{$lv}; }\n", k)
-      end
-      h = h2[k]
-      if h.keys.join =~ /^\x80*$/n
-        $lv, = h.values
+  end
+
+  def generate_map1(hash)
+    @output.puts(<<-HEADER)
+const char *
+grn_nfkc#{@unicode_version}_map1(const unsigned char *str)
+{
+    HEADER
+    gen_map1(hash, 0)
+    @output.puts(<<-FOOTER)
+  return 0;
+}
+
+    FOOTER
+  end
+
+  def gen_map1(hash, level)
+    bl = ' ' * ((level + 0) * 2)
+    if hash['']
+      dst = ''
+      hash[''].each_byte{|b| dst << format('\x%02X', b)}
+      @output.puts "#{bl}return \"#{dst}\";"
+      hash.delete('')
+    end
+    return if hash.empty?
+    h2 = {}
+    hash.each{|key,val|
+      key = key.dup
+      key.force_encoding("ASCII-8BIT")
+      head = key.bytes[0]
+      rest = key[1..-1]
+      if h2[head]
+        h2[head][rest] = val
       else
-        file.printf("#{bl}if (str[#{level}] == 0x%02X) {\n", k)
-        gen_bc(file, h, level + 1)
-        file.puts bl + '}'
+        h2[head] = {rest => val}
       end
     }
-    file.puts bl + "return #{$lv};"
-  else
-    file.puts bl + "switch (str[#{level}]) {"
-    lk = 0x80
-    br = true
-    h2.keys.sort.each{|k|
-      if (lk < k)
-        for j in lk..k-1
-          file.printf("#{bl}case 0x%02X :\n", j)
-        end
-        br = false
+    if h2.size == 1
+      h2.each{|key,val|
+        @output.printf("#{bl}if (str[#{level}] == 0x%02X) {\n", key)
+        gen_map1(val, level + 1)
+        @output.puts bl + '}'
+      }
+    else
+      @output.puts "#{bl}switch (str[#{level}]) {"
+      h2.keys.sort.each{|k|
+        @output.printf("#{bl}case 0x%02X :\n", k)
+        gen_map1(h2[k], level + 1)
+        @output.puts("#{bl}  break;")
+      }
+      @output.puts bl + '}'
+    end
+  end
+
+  def generate_map2(map2)
+    @output.puts(<<-HEADER)
+const char *
+grn_nfkc#{@unicode_version}_map2(const unsigned char *prefix, const unsigned char *suffix)
+{
+    HEADER
+    suffix = {}
+    map2.each{|src,dst|
+      chars = src.chars
+      if chars.size != 2
+        STDERR.puts "caution: more than two chars in pattern #{chars.join('|')}"
       end
-      unless br
-        file.puts bl + "  return #{$lv};"
-        file.puts bl + '  break;'
+      s = chars.pop
+      if suffix[s]
+        suffix[s][chars.join] = dst
+      else
+        suffix[s] = {chars.join=>dst}
       end
-      h = h2[k]
-      file.printf("#{bl}case 0x%02X :\n", k)
-      if h.keys.join =~ /^\x80*$/n
-        $lv, = h.values
-        br = false
+    }
+    gen_map2_sub(suffix, 0)
+    @output.puts(<<-FOOTER)
+  return 0;
+}
+
+    FOOTER
+  end
+
+  def gen_map2_sub2(hash, level, indent)
+    bl = ' ' * ((level + indent + 0) * 2)
+    if hash['']
+      @output.print "#{bl}return \""
+      hash[''].each_byte{|b| @output.printf('\x%02X', b)}
+      @output.puts "\";"
+      hash.delete('')
+    end
+    return if hash.empty?
+
+    h2 = {}
+    hash.each{|key,val|
+      key = key.dup
+      key.force_encoding("ASCII-8BIT")
+      head = key.bytes[0]
+      rest = key[1..-1]
+      if h2[head]
+        h2[head][rest] = val
       else
-        gen_bc(file, h, level + 1)
-        file.puts bl + '  break;'
-        br = true
+        h2[head] = {rest => val}
       end
-      lk = k + 1
     }
-    file.puts bl + 'default :'
-    file.puts bl + "  return #{$lv};"
-    file.puts bl + '  break;'
-    file.puts bl + '}'
+
+    if h2.size == 1
+      h2.each{|key,val|
+        @output.printf("#{bl}if (prefix[#{level}] == 0x%02X) {\n", key)
+        gen_map2_sub2(val, level + 1, indent)
+        @output.puts bl + '}'
+      }
+    else
+      @output.puts "#{bl}switch (prefix[#{level}]) {"
+      h2.keys.sort.each{|k|
+        @output.printf("#{bl}case 0x%02X :\n", k)
+        gen_map2_sub2(h2[k], level + 1, indent)
+        @output.puts("#{bl}  break;")
+      }
+      @output.puts bl + '}'
+    end
   end
-end
 
-def generate_blockcode_char_type(file, option)
-  bc = {}
-  open("|./icudump --#{option}").each{|l|
-    src,_,code = l.chomp.split("\t")
-    str = src.split(':').collect(&:hex).pack("c*")
-    bc[str] = code
-  }
-  $lv = 0
-  gen_bc(file, bc, 0)
+  def gen_map2_sub(hash, level)
+    bl = ' ' * ((level + 0) * 2)
+    if hash['']
+      gen_map2_sub2(hash[''], 0, level)
+      hash.delete('')
+    end
+    return if hash.empty?
+    h2 = {}
+    hash.each{|key,val|
+      key = key.dup
+      key.force_encoding("ASCII-8BIT")
+      head = key.bytes[0]
+      rest = key[1..-1]
+      if h2[head]
+        h2[head][rest] = val
+      else
+        h2[head] = {rest => val}
+      end
+    }
+    if h2.size == 1
+      h2.each{|key,val|
+        @output.printf("#{bl}if (suffix[#{level}] == 0x%02X) {\n", key)
+        gen_map2_sub(val, level + 1)
+        @output.puts bl + '}'
+      }
+    else
+      @output.puts "#{bl}switch (suffix[#{level}]) {"
+      h2.keys.sort.each{|k|
+        @output.printf("#{bl}case 0x%02X :\n", k)
+        gen_map2_sub(h2[k], level + 1)
+        @output.puts("#{bl}  break;")
+      }
+      @output.puts bl + '}'
+    end
+  end
 end
 
 def ccpush(hash, src, dst)
@@ -202,137 +422,6 @@ def create_map2(map1)
   return cc
 end
 
-def generate_map1(file, hash, level)
-  bl = ' ' * ((level + 0) * 2)
-  if hash['']
-    dst = ''
-    hash[''].each_byte{|b| dst << format('\x%02X', b)}
-    file.puts "#{bl}return \"#{dst}\";"
-    hash.delete('')
-  end
-  return if hash.empty?
-  h2 = {}
-  hash.each{|key,val|
-    key = key.dup
-    key.force_encoding("ASCII-8BIT")
-    head = key.bytes[0]
-    rest = key[1..-1]
-    if h2[head]
-      h2[head][rest] = val
-    else
-      h2[head] = {rest => val}
-    end
-  }
-  if h2.size == 1
-    h2.each{|key,val|
-      file.printf("#{bl}if (str[#{level}] == 0x%02X) {\n", key)
-      generate_map1(file, val, level + 1)
-      file.puts bl + '}'
-    }
-  else
-    file.puts "#{bl}switch (str[#{level}]) {"
-    h2.keys.sort.each{|k|
-      file.printf("#{bl}case 0x%02X :\n", k)
-      generate_map1(file, h2[k], level + 1)
-      file.puts("#{bl}  break;")
-    }
-    file.puts bl + '}'
-  end
-end
-
-def gen_map2_sub2(file, hash, level, indent)
-  bl = ' ' * ((level + indent + 0) * 2)
-  if hash['']
-    file.print "#{bl}return \""
-    hash[''].each_byte{|b| file.printf('\x%02X', b)}
-    file.puts "\";"
-    hash.delete('')
-  end
-  return if hash.empty?
-
-  h2 = {}
-  hash.each{|key,val|
-    key = key.dup
-    key.force_encoding("ASCII-8BIT")
-    head = key.bytes[0]
-    rest = key[1..-1]
-    if h2[head]
-      h2[head][rest] = val
-    else
-      h2[head] = {rest => val}
-    end
-  }
-
-  if h2.size == 1
-    h2.each{|key,val|
-      file.printf("#{bl}if (prefix[#{level}] == 0x%02X) {\n", key)
-      gen_map2_sub2(file, val, level + 1, indent)
-      file.puts bl + '}'
-    }
-  else
-    file.puts "#{bl}switch (prefix[#{level}]) {"
-    h2.keys.sort.each{|k|
-      file.printf("#{bl}case 0x%02X :\n", k)
-      gen_map2_sub2(file, h2[k], level + 1, indent)
-      file.puts("#{bl}  break;")
-    }
-    file.puts bl + '}'
-  end
-end
-
-def gen_map2_sub(file, hash, level)
-  bl = ' ' * ((level + 0) * 2)
-  if hash['']
-    gen_map2_sub2(file, hash[''], 0, level)
-    hash.delete('')
-  end
-  return if hash.empty?
-  h2 = {}
-  hash.each{|key,val|
-    key = key.dup
-    key.force_encoding("ASCII-8BIT")
-    head = key.bytes[0]
-    rest = key[1..-1]
-    if h2[head]
-      h2[head][rest] = val
-    else
-      h2[head] = {rest => val}
-    end
-  }
-  if h2.size == 1
-    h2.each{|key,val|
-      file.printf("#{bl}if (suffix[#{level}] == 0x%02X) {\n", key)
-      gen_map2_sub(file, val, level + 1)
-      file.puts bl + '}'
-    }
-  else
-    file.puts "#{bl}switch (suffix[#{level}]) {"
-    h2.keys.sort.each{|k|
-      file.printf("#{bl}case 0x%02X :\n", k)
-      gen_map2_sub(file, h2[k], level + 1)
-      file.puts("#{bl}  break;")
-    }
-    file.puts bl + '}'
-  end
-end
-
-def generate_map2(file, map2)
-  suffix = {}
-  map2.each{|src,dst|
-    chars = src.chars
-    if chars.size != 2
-      STDERR.puts "caution: more than two chars in pattern #{chars.join('|')}"
-    end
-    s = chars.pop
-    if suffix[s]
-      suffix[s][chars.join] = dst
-    else
-      suffix[s] = {chars.join=>dst}
-    end
-  }
-  gen_map2_sub(file, suffix, 0)
-end
-
 ######## main #######
 
 ARGV.each{|arg|
@@ -350,79 +439,13 @@ system('cc -Wall -O3 -o icudump -I/tmp/local/include -L/tmp/local/lib icudump.c
 STDERR.puts('getting Unicode version')
 unicode_version = `./icudump --version`.strip.gsub(".", "")
 
-template = <<END
-/* -*- c-basic-offset: 2 -*- */
-/* Copyright(C) 2010-2015 Brazil
-
-  This library is free software; you can redistribute it and/or
-  modify it under the terms of the GNU Lesser General Public
-  License version 2.1 as published by the Free Software Foundation.
-
-  This library is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  Lesser General Public License for more details.
-
-  You should have received a copy of the GNU Lesser General Public
-  License along with this library; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-
-don't edit this file by hand. it generated automatically by nfkc.rb
-*/
-
-#include "grn.h"
-#include <groonga/nfkc.h>
-
-#ifdef GRN_WITH_NFKC
-
-grn_char_type
-grn_nfkc#{unicode_version}_char_type(const unsigned char *str)
-{
-%  return -1;
-}
-
-const char *
-grn_nfkc#{unicode_version}_map1(const unsigned char *str)
-{
-%  return 0;
-}
-
-const char *
-grn_nfkc#{unicode_version}_map2(const unsigned char *prefix, const unsigned char *suffix)
-{
-%  return 0;
-}
-
-#endif /* GRN_WITH_NFKC */
-
-END
-
 STDERR.puts('creating map1..')
 map1 = create_map1()
 
 STDERR.puts('creating map2..')
 map2 = create_map2(map1)
 
-outf = open("nfkc#{unicode_version}.c", 'w')
-
-tmps = template.split(/%/)
-
-#STDERR.puts('generating block code..')
-#outf.print(tmps.shift)
-#generate_blockcode_char_type(outf, 'bc')
-
-STDERR.puts('generating char type code..')
-outf.print(tmps.shift)
-generate_blockcode_char_type(outf, 'gc')
-
-STDERR.puts('generating map1 code..')
-outf.print(tmps.shift)
-generate_map1(outf, map1, 0)
-
-STDERR.puts('generating map2 code..')
-outf.print(tmps.shift)
-generate_map2(outf, map2)
-
-outf.print(tmps.shift)
-outf.close
-STDERR.puts('done.')
+File.open("nfkc#{unicode_version}.c", "w") do |output|
+  generator = SwitchGenerator.new(unicode_version, output)
+  generator.generate(map1, map2)
+end
-------------- next part --------------
HTML����������������������������...
Télécharger 



More information about the Groonga-commit mailing list
Back to archive index