[Groonga-commit] groonga/groonga [master] Revert "Remove deprecated grn_str_open()"

Back to archive index

Kouhei Sutou null+****@clear*****
Mon Jan 21 17:55:05 JST 2013


Kouhei Sutou	2013-01-21 17:55:05 +0900 (Mon, 21 Jan 2013)

  New Revision: 83b5096d9856051f67d251b46d78099f48db252c
  https://github.com/groonga/groonga/commit/83b5096d9856051f67d251b46d78099f48db252c

  Log:
    Revert "Remove deprecated grn_str_open()"
    
    This reverts commit 46c15627aa48627f5f24446006ca30dda4c2dae1.

  Modified files:
    lib/str.c
    plugins/suggest/suggest.c
    test/unit/util/test-string.c

  Modified: lib/str.c (+1103 -0)
===================================================================
--- lib/str.c    2013-01-21 17:53:14 +0900 (ec87c10)
+++ lib/str.c    2013-01-21 17:55:05 +0900 (a26e1b9)
@@ -170,6 +170,1109 @@ grn_charlen(grn_ctx *ctx, const char *str, const char *end)
   return grn_charlen_(ctx, str, end, ctx->encoding);
 }
 
+static unsigned char symbol[] = {
+  ',', '.', 0, ':', ';', '?', '!', 0, 0, 0, '`', 0, '^', '~', '_', 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, '-', '-', '/', '\\', 0, 0, '|', 0, 0, 0, '\'', 0,
+  '"', '(', ')', 0, 0, '[', ']', '{', '}', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  '+', '-', 0, 0, 0, '=', 0, '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  '$', 0, 0, '%', '#', '&', '*', '@', 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+inline static grn_rc
+normalize_euc(grn_ctx *ctx, grn_str *nstr)
+{
+  static uint16_t hankana[] = {
+    0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 0xa5a3,
+    0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 0xa1bc, 0xa5a2,
+    0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 0xa5af, 0xa5b1, 0xa5b3,
+    0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6,
+    0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5,
+    0xa5d8, 0xa5db, 0xa5de, 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6,
+    0xa5e8, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab,
+    0xa1eb
+  };
+  static unsigned char dakuten[] = {
+    0xf4, 0, 0, 0, 0, 0xac, 0, 0xae, 0, 0xb0, 0, 0xb2, 0, 0xb4, 0, 0xb6, 0,
+    0xb8, 0, 0xba, 0, 0xbc, 0, 0xbe, 0, 0xc0, 0, 0xc2, 0, 0, 0xc5, 0, 0xc7,
+    0, 0xc9, 0, 0, 0, 0, 0, 0, 0xd0, 0, 0, 0xd3, 0, 0, 0xd6, 0, 0, 0xd9, 0,
+    0, 0xdc
+  };
+  static unsigned char handaku[] = {
+    0xd1, 0, 0, 0xd4, 0, 0, 0xd7, 0, 0, 0xda, 0, 0, 0xdd
+  };
+  int16_t *ch;
+  const unsigned char *s, *s_, *e;
+  unsigned char *d, *d0, *d_, b;
+  uint_least8_t *cp, *ctypes, ctype;
+  size_t size = nstr->orig_blen, length = 0;
+  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+  if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
+    return GRN_NO_MEMORY_AVAILABLE;
+  }
+  d0 = (unsigned char *) nstr->norm;
+  if (nstr->flags & GRN_STR_WITH_CHECKS) {
+    if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
+      GRN_FREE(nstr->norm);
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  ch = nstr->checks;
+  if (nstr->flags & GRN_STR_WITH_CTYPES) {
+    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+      GRN_FREE(nstr->checks);
+      GRN_FREE(nstr->norm);
+      nstr->checks = NULL;
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  cp = ctypes = nstr->ctypes;
+  e = (unsigned char *)nstr->orig + size;
+  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+    if ((*s & 0x80)) {
+      if (((s + 1) < e) && (*(s + 1) & 0x80)) {
+        unsigned char c1 = *s++, c2 = *s, c3 = 0;
+        switch (c1 >> 4) {
+        case 0x08 :
+          if (c1 == 0x8e && 0xa0 <= c2 && c2 <= 0xdf) {
+            uint16_t c = hankana[c2 - 0xa0];
+            switch (c) {
+            case 0xa1ab :
+              if (d > d0 + 1 && d[-2] == 0xa5
+                  && 0xa6 <= d[-1] && d[-1] <= 0xdb && (b = dakuten[d[-1] - 0xa6])) {
+                *(d - 1) = b;
+                if (ch) { ch[-1] += 2; s_ += 2; }
+                continue;
+              } else {
+                *d++ = c >> 8; *d = c & 0xff;
+              }
+              break;
+            case 0xa1eb :
+              if (d > d0 + 1 && d[-2] == 0xa5
+                  && 0xcf <= d[-1] && d[-1] <= 0xdb && (b = handaku[d[-1] - 0xcf])) {
+                *(d - 1) = b;
+                if (ch) { ch[-1] += 2; s_ += 2; }
+                continue;
+              } else {
+                *d++ = c >> 8; *d = c & 0xff;
+              }
+              break;
+            default :
+              *d++ = c >> 8; *d = c & 0xff;
+              break;
+            }
+            ctype = grn_str_katakana;
+          } else {
+            *d++ = c1; *d = c2;
+            ctype = grn_str_others;
+          }
+          break;
+        case 0x09 :
+          *d++ = c1; *d = c2;
+          ctype = grn_str_others;
+          break;
+        case 0x0a :
+          switch (c1 & 0x0f) {
+          case 1 :
+            switch (c2) {
+            case 0xbc :
+              *d++ = c1; *d = c2;
+              ctype = grn_str_katakana;
+              break;
+            case 0xb9 :
+              *d++ = c1; *d = c2;
+              ctype = grn_str_kanji;
+              break;
+            case 0xa1 :
+              if (removeblankp) {
+                if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+                continue;
+              } else {
+                *d = ' ';
+                ctype = GRN_STR_BLANK|grn_str_symbol;
+              }
+              break;
+            default :
+              if (c2 >= 0xa4 && (c3 = symbol[c2 - 0xa4])) {
+                *d = c3;
+                ctype = grn_str_symbol;
+              } else {
+                *d++ = c1; *d = c2;
+                ctype = grn_str_others;
+              }
+              break;
+            }
+            break;
+          case 2 :
+            *d++ = c1; *d = c2;
+            ctype = grn_str_symbol;
+            break;
+          case 3 :
+            c3 = c2 - 0x80;
+            if ('a' <= c3 && c3 <= 'z') {
+              ctype = grn_str_alpha;
+              *d = c3;
+            } else if ('A' <= c3 && c3 <= 'Z') {
+              ctype = grn_str_alpha;
+              *d = c3 + 0x20;
+            } else if ('0' <= c3 && c3 <= '9') {
+              ctype = grn_str_digit;
+              *d = c3;
+            } else {
+              ctype = grn_str_others;
+              *d++ = c1; *d = c2;
+            }
+            break;
+          case 4 :
+            *d++ = c1; *d = c2;
+            ctype = grn_str_hiragana;
+            break;
+          case 5 :
+            *d++ = c1; *d = c2;
+            ctype = grn_str_katakana;
+            break;
+          case 6 :
+          case 7 :
+          case 8 :
+            *d++ = c1; *d = c2;
+            ctype = grn_str_symbol;
+            break;
+          default :
+            *d++ = c1; *d = c2;
+            ctype = grn_str_others;
+            break;
+          }
+          break;
+        default :
+          *d++ = c1; *d = c2;
+          ctype = grn_str_kanji;
+          break;
+        }
+      } else {
+        /* skip invalid character */
+        continue;
+      }
+    } else {
+      unsigned char c = *s;
+      switch (c >> 4) {
+      case 0 :
+      case 1 :
+        /* skip unprintable ascii */
+        if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+        continue;
+      case 2 :
+        if (c == 0x20) {
+          if (removeblankp) {
+            if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+            continue;
+          } else {
+            *d = ' ';
+            ctype = GRN_STR_BLANK|grn_str_symbol;
+          }
+        } else {
+          *d = c;
+          ctype = grn_str_symbol;
+        }
+        break;
+      case 3 :
+        *d = c;
+        ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
+        break;
+      case 4 :
+        *d = ('A' <= c) ? c + 0x20 : c;
+        ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
+        break;
+      case 5 :
+        *d = (c <= 'Z') ? c + 0x20 : c;
+        ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
+        break;
+      case 6 :
+        *d = c;
+        ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
+        break;
+      case 7 :
+        *d = c;
+        ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
+        break;
+      default :
+        *d = c;
+        ctype = grn_str_others;
+        break;
+      }
+    }
+    d++;
+    length++;
+    if (cp) { *cp++ = ctype; }
+    if (ch) {
+      *ch++ = (int16_t)(s + 1 - s_);
+      s_ = s + 1;
+      while (++d_ < d) { *ch++ = 0; }
+    }
+  }
+  if (cp) { *cp = grn_str_null; }
+  *d = '\0';
+  nstr->length = length;
+  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+  return GRN_SUCCESS;
+}
+
+#ifdef WITH_NFKC
+uint_least8_t grn_nfkc_ctype(const unsigned char *str);
+const char *grn_nfkc_map1(const unsigned char *str);
+const char *grn_nfkc_map2(const unsigned char *prefix, const unsigned char *suffix);
+
+inline static grn_rc
+normalize_utf8(grn_ctx *ctx, grn_str *nstr)
+{
+  int16_t *ch;
+  const unsigned char *s, *s_, *s__ = NULL, *p, *p2, *pe, *e;
+  unsigned char *d, *d_, *de;
+  uint_least8_t *cp;
+  size_t length = 0, ls, lp, size = nstr->orig_blen, ds = size * 3;
+  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+  if (!(nstr->norm = GRN_MALLOC(ds + 1))) {
+    return GRN_NO_MEMORY_AVAILABLE;
+  }
+  if (nstr->flags & GRN_STR_WITH_CHECKS) {
+    if (!(nstr->checks = GRN_MALLOC(ds * sizeof(int16_t) + 1))) {
+      GRN_FREE(nstr->norm); nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  ch = nstr->checks;
+  if (nstr->flags & GRN_STR_WITH_CTYPES) {
+    if (!(nstr->ctypes = GRN_MALLOC(ds + 1))) {
+      if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
+      GRN_FREE(nstr->norm); nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  cp = nstr->ctypes;
+  d = (unsigned char *)nstr->norm;
+  de = d + ds;
+  d_ = NULL;
+  e = (unsigned char *)nstr->orig + size;
+  for (s = s_ = (unsigned char *)nstr->orig; ; s += ls) {
+    if (!(ls = grn_str_charlen_utf8(ctx, s, e))) {
+      break;
+    }
+    if ((p = (unsigned char *)grn_nfkc_map1(s))) {
+      pe = p + strlen((char *)p);
+    } else {
+      p = s;
+      pe = p + ls;
+    }
+    if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) {
+      p = p2;
+      pe = p + strlen((char *)p);
+      if (cp) { cp--; }
+      if (ch) {
+        ch -= (d - d_);
+        s_ = s__;
+      }
+      d = d_;
+      length--;
+    }
+    for (; ; p += lp) {
+      if (!(lp = grn_str_charlen_utf8(ctx, p, pe))) {
+        break;
+      }
+      if ((*p == ' ' && removeblankp) || *p < 0x20  /* skip unprintable ascii */ ) {
+        if (cp > nstr->ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+      } else {
+        if (de <= d + lp) {
+          unsigned char *norm;
+          ds += (ds >> 1) + lp;
+          if (!(norm = GRN_REALLOC(nstr->norm, ds + 1))) {
+            if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
+            if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
+            GRN_FREE(nstr->norm); nstr->norm = NULL;
+            return GRN_NO_MEMORY_AVAILABLE;
+          }
+          de = norm + ds;
+          d = norm + (d - (unsigned char *)nstr->norm);
+          nstr->norm = norm;
+          if (ch) {
+            int16_t *checks;
+            if (!(checks = GRN_REALLOC(nstr->checks, ds * sizeof(int16_t)+ 1))) {
+              if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
+              GRN_FREE(nstr->checks); nstr->checks = NULL;
+              GRN_FREE(nstr->norm); nstr->norm = NULL;
+              return GRN_NO_MEMORY_AVAILABLE;
+            }
+            ch = checks + (ch - nstr->checks);
+            nstr->checks = checks;
+          }
+          if (cp) {
+            uint_least8_t *ctypes;
+            if (!(ctypes = GRN_REALLOC(nstr->ctypes, ds + 1))) {
+              GRN_FREE(nstr->ctypes); nstr->ctypes = NULL;
+              if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
+              GRN_FREE(nstr->norm); nstr->norm = NULL;
+              return GRN_NO_MEMORY_AVAILABLE;
+            }
+            cp = ctypes + (cp - nstr->ctypes);
+            nstr->ctypes = ctypes;
+          }
+        }
+        memcpy(d, p, lp);
+        d_ = d;
+        d += lp;
+        length++;
+        if (cp) { *cp++ = grn_nfkc_ctype(p); }
+        if (ch) {
+          size_t i;
+          if (s_ == s + ls) {
+            *ch++ = -1;
+          } else {
+            *ch++ = (int16_t)(s + ls - s_);
+            s__ = s_;
+            s_ = s + ls;
+          }
+          for (i = lp; i > 1; i--) { *ch++ = 0; }
+        }
+      }
+    }
+  }
+  if (cp) { *cp = grn_str_null; }
+  *d = '\0';
+  nstr->length = length;
+  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+  return GRN_SUCCESS;
+}
+#endif /* WITH_NFKC */
+
+inline static grn_rc
+normalize_sjis(grn_ctx *ctx, grn_str *nstr)
+{
+  static uint16_t hankana[] = {
+    0x8140, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340, 0x8342,
+    0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362, 0x815b, 0x8341,
+    0x8343, 0x8345, 0x8347, 0x8349, 0x834a, 0x834c, 0x834e, 0x8350, 0x8352,
+    0x8354, 0x8356, 0x8358, 0x835a, 0x835c, 0x835e, 0x8360, 0x8363, 0x8365,
+    0x8367, 0x8369, 0x836a, 0x836b, 0x836c, 0x836d, 0x836e, 0x8371, 0x8374,
+    0x8377, 0x837a, 0x837d, 0x837e, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386,
+    0x8388, 0x8389, 0x838a, 0x838b, 0x838c, 0x838d, 0x838f, 0x8393, 0x814a,
+    0x814b
+  };
+  static unsigned char dakuten[] = {
+    0x94, 0, 0, 0, 0, 0x4b, 0, 0x4d, 0, 0x4f, 0, 0x51, 0, 0x53, 0, 0x55, 0,
+    0x57, 0, 0x59, 0, 0x5b, 0, 0x5d, 0, 0x5f, 0, 0x61, 0, 0, 0x64, 0, 0x66,
+    0, 0x68, 0, 0, 0, 0, 0, 0, 0x6f, 0, 0, 0x72, 0, 0, 0x75, 0, 0, 0x78, 0,
+    0, 0x7b
+  };
+  static unsigned char handaku[] = {
+    0x70, 0, 0, 0x73, 0, 0, 0x76, 0, 0, 0x79, 0, 0, 0x7c
+  };
+  int16_t *ch;
+  const unsigned char *s, *s_;
+  unsigned char *d, *d0, *d_, b, *e;
+  uint_least8_t *cp, *ctypes, ctype;
+  size_t size = nstr->orig_blen, length = 0;
+  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+  if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
+    return GRN_NO_MEMORY_AVAILABLE;
+  }
+  d0 = (unsigned char *) nstr->norm;
+  if (nstr->flags & GRN_STR_WITH_CHECKS) {
+    if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
+      GRN_FREE(nstr->norm);
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  ch = nstr->checks;
+  if (nstr->flags & GRN_STR_WITH_CTYPES) {
+    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+      GRN_FREE(nstr->checks);
+      GRN_FREE(nstr->norm);
+      nstr->checks = NULL;
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  cp = ctypes = nstr->ctypes;
+  e = (unsigned char *)nstr->orig + size;
+  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+    if ((*s & 0x80)) {
+      if (0xa0 <= *s && *s <= 0xdf) {
+        uint16_t c = hankana[*s - 0xa0];
+        switch (c) {
+        case 0x814a :
+          if (d > d0 + 1 && d[-2] == 0x83
+              && 0x45 <= d[-1] && d[-1] <= 0x7a && (b = dakuten[d[-1] - 0x45])) {
+            *(d - 1) = b;
+            if (ch) { ch[-1]++; s_++; }
+            continue;
+          } else {
+            *d++ = c >> 8; *d = c & 0xff;
+          }
+          break;
+        case 0x814b :
+          if (d > d0 + 1 && d[-2] == 0x83
+              && 0x6e <= d[-1] && d[-1] <= 0x7a && (b = handaku[d[-1] - 0x6e])) {
+            *(d - 1) = b;
+            if (ch) { ch[-1]++; s_++; }
+            continue;
+          } else {
+            *d++ = c >> 8; *d = c & 0xff;
+          }
+          break;
+        default :
+          *d++ = c >> 8; *d = c & 0xff;
+          break;
+        }
+        ctype = grn_str_katakana;
+      } else {
+        if ((s + 1) < e && 0x40 <= *(s + 1) && *(s + 1) <= 0xfc) {
+          unsigned char c1 = *s++, c2 = *s, c3 = 0;
+          if (0x81 <= c1 && c1 <= 0x87) {
+            switch (c1 & 0x0f) {
+            case 1 :
+              switch (c2) {
+              case 0x5b :
+                *d++ = c1; *d = c2;
+                ctype = grn_str_katakana;
+                break;
+              case 0x58 :
+                *d++ = c1; *d = c2;
+                ctype = grn_str_kanji;
+                break;
+              case 0x40 :
+                if (removeblankp) {
+                  if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+                  continue;
+                } else {
+                  *d = ' ';
+                  ctype = GRN_STR_BLANK|grn_str_symbol;
+                }
+                break;
+              default :
+                if (0x43 <= c2 && c2 <= 0x7e && (c3 = symbol[c2 - 0x43])) {
+                  *d = c3;
+                  ctype = grn_str_symbol;
+                } else if (0x7f <= c2 && c2 <= 0x97 && (c3 = symbol[c2 - 0x44])) {
+                  *d = c3;
+                  ctype = grn_str_symbol;
+                } else {
+                  *d++ = c1; *d = c2;
+                  ctype = grn_str_others;
+                }
+                break;
+              }
+              break;
+            case 2 :
+              c3 = c2 - 0x1f;
+              if (0x4f <= c2 && c2 <= 0x58) {
+                ctype = grn_str_digit;
+                *d = c2 - 0x1f;
+              } else if (0x60 <= c2 && c2 <= 0x79) {
+                ctype = grn_str_alpha;
+                *d = c2 + 0x01;
+              } else if (0x81 <= c2 && c2 <= 0x9a) {
+                ctype = grn_str_alpha;
+                *d = c2 - 0x20;
+              } else if (0x9f <= c2 && c2 <= 0xf1) {
+                *d++ = c1; *d = c2;
+                ctype = grn_str_hiragana;
+              } else {
+                *d++ = c1; *d = c2;
+                ctype = grn_str_others;
+              }
+              break;
+            case 3 :
+              if (0x40 <= c2 && c2 <= 0x96) {
+                *d++ = c1; *d = c2;
+                ctype = grn_str_katakana;
+              } else {
+                *d++ = c1; *d = c2;
+                ctype = grn_str_symbol;
+              }
+              break;
+            case 4 :
+            case 7 :
+              *d++ = c1; *d = c2;
+              ctype = grn_str_symbol;
+              break;
+            default :
+              *d++ = c1; *d = c2;
+              ctype = grn_str_others;
+              break;
+            }
+          } else {
+            *d++ = c1; *d = c2;
+            ctype = grn_str_kanji;
+          }
+        } else {
+          /* skip invalid character */
+          continue;
+        }
+      }
+    } else {
+      unsigned char c = *s;
+      switch (c >> 4) {
+      case 0 :
+      case 1 :
+        /* skip unprintable ascii */
+        if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+        continue;
+      case 2 :
+        if (c == 0x20) {
+          if (removeblankp) {
+            if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+            continue;
+          } else {
+            *d = ' ';
+            ctype = GRN_STR_BLANK|grn_str_symbol;
+          }
+        } else {
+          *d = c;
+          ctype = grn_str_symbol;
+        }
+        break;
+      case 3 :
+        *d = c;
+        ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
+        break;
+      case 4 :
+        *d = ('A' <= c) ? c + 0x20 : c;
+        ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
+        break;
+      case 5 :
+        *d = (c <= 'Z') ? c + 0x20 : c;
+        ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
+        break;
+      case 6 :
+        *d = c;
+        ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
+        break;
+      case 7 :
+        *d = c;
+        ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
+        break;
+      default :
+        *d = c;
+        ctype = grn_str_others;
+        break;
+      }
+    }
+    d++;
+    length++;
+    if (cp) { *cp++ = ctype; }
+    if (ch) {
+      *ch++ = (int16_t)(s + 1 - s_);
+      s_ = s + 1;
+      while (++d_ < d) { *ch++ = 0; }
+    }
+  }
+  if (cp) { *cp = grn_str_null; }
+  *d = '\0';
+  nstr->length = length;
+  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+  return GRN_SUCCESS;
+}
+
+inline static grn_rc
+normalize_none(grn_ctx *ctx, grn_str *nstr)
+{
+  int16_t *ch;
+  const unsigned char *s, *s_, *e;
+  unsigned char *d, *d0, *d_;
+  uint_least8_t *cp, *ctypes, ctype;
+  size_t size = nstr->orig_blen, length = 0;
+  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+  if (!(nstr->norm = GRN_MALLOC(size + 1))) {
+    return GRN_NO_MEMORY_AVAILABLE;
+  }
+  d0 = (unsigned char *) nstr->norm;
+  if (nstr->flags & GRN_STR_WITH_CHECKS) {
+    if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
+      GRN_FREE(nstr->norm);
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  ch = nstr->checks;
+  if (nstr->flags & GRN_STR_WITH_CTYPES) {
+    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+      GRN_FREE(nstr->checks);
+      GRN_FREE(nstr->norm);
+      nstr->checks = NULL;
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  cp = ctypes = nstr->ctypes;
+  e = (unsigned char *)nstr->orig + size;
+  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+    unsigned char c = *s;
+    switch (c >> 4) {
+    case 0 :
+    case 1 :
+      /* skip unprintable ascii */
+      if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+      continue;
+    case 2 :
+      if (c == 0x20) {
+        if (removeblankp) {
+          if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+          continue;
+        } else {
+          *d = ' ';
+          ctype = GRN_STR_BLANK|grn_str_symbol;
+        }
+      } else {
+        *d = c;
+        ctype = grn_str_symbol;
+      }
+      break;
+    case 3 :
+      *d = c;
+      ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
+      break;
+    case 4 :
+      *d = ('A' <= c) ? c + 0x20 : c;
+      ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 5 :
+      *d = (c <= 'Z') ? c + 0x20 : c;
+      ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
+      break;
+    case 6 :
+      *d = c;
+      ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 7 :
+      *d = c;
+      ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
+      break;
+    default :
+      *d = c;
+      ctype = grn_str_others;
+      break;
+    }
+    d++;
+    length++;
+    if (cp) { *cp++ = ctype; }
+    if (ch) {
+      *ch++ = (int16_t)(s + 1 - s_);
+      s_ = s + 1;
+      while (++d_ < d) { *ch++ = 0; }
+    }
+  }
+  if (cp) { *cp = grn_str_null; }
+  *d = '\0';
+  nstr->length = length;
+  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+  return GRN_SUCCESS;
+}
+
+/* use cp1252 as latin1 */
+inline static grn_rc
+normalize_latin1(grn_ctx *ctx, grn_str *nstr)
+{
+  int16_t *ch;
+  const unsigned char *s, *s_, *e;
+  unsigned char *d, *d0, *d_;
+  uint_least8_t *cp, *ctypes, ctype;
+  size_t size = nstr->orig_blen, length = 0;
+  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+  if (!(nstr->norm = GRN_MALLOC(size + 1))) {
+    return GRN_NO_MEMORY_AVAILABLE;
+  }
+  d0 = (unsigned char *) nstr->norm;
+  if (nstr->flags & GRN_STR_WITH_CHECKS) {
+    if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
+      GRN_FREE(nstr->norm);
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  ch = nstr->checks;
+  if (nstr->flags & GRN_STR_WITH_CTYPES) {
+    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+      GRN_FREE(nstr->checks);
+      GRN_FREE(nstr->norm);
+      nstr->checks = NULL;
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  cp = ctypes = nstr->ctypes;
+  e = (unsigned char *)nstr->orig + size;
+  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+    unsigned char c = *s;
+    switch (c >> 4) {
+    case 0 :
+    case 1 :
+      /* skip unprintable ascii */
+      if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+      continue;
+    case 2 :
+      if (c == 0x20) {
+        if (removeblankp) {
+          if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+          continue;
+        } else {
+          *d = ' ';
+          ctype = GRN_STR_BLANK|grn_str_symbol;
+        }
+      } else {
+        *d = c;
+        ctype = grn_str_symbol;
+      }
+      break;
+    case 3 :
+      *d = c;
+      ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
+      break;
+    case 4 :
+      *d = ('A' <= c) ? c + 0x20 : c;
+      ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 5 :
+      *d = (c <= 'Z') ? c + 0x20 : c;
+      ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
+      break;
+    case 6 :
+      *d = c;
+      ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 7 :
+      *d = c;
+      ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
+      break;
+    case 8 :
+      if (c == 0x8a || c == 0x8c || c == 0x8e) {
+        *d = c + 0x10;
+        ctype = grn_str_alpha;
+      } else {
+        *d = c;
+        ctype = grn_str_symbol;
+      }
+      break;
+    case 9 :
+      if (c == 0x9a || c == 0x9c || c == 0x9e || c == 0x9f) {
+        *d = (c == 0x9f) ? c + 0x60 : c;
+        ctype = grn_str_alpha;
+      } else {
+        *d = c;
+        ctype = grn_str_symbol;
+      }
+      break;
+    case 0x0c :
+      *d = c + 0x20;
+      ctype = grn_str_alpha;
+      break;
+    case 0x0d :
+      *d = (c == 0xd7 || c == 0xdf) ? c : c + 0x20;
+      ctype = (c == 0xd7) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 0x0e :
+      *d = c;
+      ctype = grn_str_alpha;
+      break;
+    case 0x0f :
+      *d = c;
+      ctype = (c == 0xf7) ? grn_str_symbol : grn_str_alpha;
+      break;
+    default :
+      *d = c;
+      ctype = grn_str_others;
+      break;
+    }
+    d++;
+    length++;
+    if (cp) { *cp++ = ctype; }
+    if (ch) {
+      *ch++ = (int16_t)(s + 1 - s_);
+      s_ = s + 1;
+      while (++d_ < d) { *ch++ = 0; }
+    }
+  }
+  if (cp) { *cp = grn_str_null; }
+  *d = '\0';
+  nstr->length = length;
+  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+  return GRN_SUCCESS;
+}
+
+inline static grn_rc
+normalize_koi8r(grn_ctx *ctx, grn_str *nstr)
+{
+  int16_t *ch;
+  const unsigned char *s, *s_, *e;
+  unsigned char *d, *d0, *d_;
+  uint_least8_t *cp, *ctypes, ctype;
+  size_t size = strlen(nstr->orig), length = 0;
+  int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
+  if (!(nstr->norm = GRN_MALLOC(size + 1))) {
+    return GRN_NO_MEMORY_AVAILABLE;
+  }
+  d0 = (unsigned char *) nstr->norm;
+  if (nstr->flags & GRN_STR_WITH_CHECKS) {
+    if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
+      GRN_FREE(nstr->norm);
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  ch = nstr->checks;
+  if (nstr->flags & GRN_STR_WITH_CTYPES) {
+    if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
+      GRN_FREE(nstr->checks);
+      GRN_FREE(nstr->norm);
+      nstr->checks = NULL;
+      nstr->norm = NULL;
+      return GRN_NO_MEMORY_AVAILABLE;
+    }
+  }
+  cp = ctypes = nstr->ctypes;
+  e = (unsigned char *)nstr->orig + size;
+  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
+    unsigned char c = *s;
+    switch (c >> 4) {
+    case 0 :
+    case 1 :
+      /* skip unprintable ascii */
+      if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+      continue;
+    case 2 :
+      if (c == 0x20) {
+        if (removeblankp) {
+          if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
+          continue;
+        } else {
+          *d = ' ';
+          ctype = GRN_STR_BLANK|grn_str_symbol;
+        }
+      } else {
+        *d = c;
+        ctype = grn_str_symbol;
+      }
+      break;
+    case 3 :
+      *d = c;
+      ctype = (c <= 0x39) ? grn_str_digit : grn_str_symbol;
+      break;
+    case 4 :
+      *d = ('A' <= c) ? c + 0x20 : c;
+      ctype = (c == 0x40) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 5 :
+      *d = (c <= 'Z') ? c + 0x20 : c;
+      ctype = (c <= 0x5a) ? grn_str_alpha : grn_str_symbol;
+      break;
+    case 6 :
+      *d = c;
+      ctype = (c == 0x60) ? grn_str_symbol : grn_str_alpha;
+      break;
+    case 7 :
+      *d = c;
+      ctype = (c <= 0x7a) ? grn_str_alpha : (c == 0x7f ? grn_str_others : grn_str_symbol);
+      break;
+    case 0x0a :
+      *d = c;
+      ctype = (c == 0xa3) ? grn_str_alpha : grn_str_others;
+      break;
+    case 0x0b :
+      if (c == 0xb3) {
+        *d = c - 0x10;
+        ctype = grn_str_alpha;
+      } else {
+        *d = c;
+        ctype = grn_str_others;
+      }
+      break;
+    case 0x0c :
+    case 0x0d :
+      *d = c;
+      ctype = grn_str_alpha;
+      break;
+    case 0x0e :
+    case 0x0f :
+      *d = c - 0x20;
+      ctype = grn_str_alpha;
+      break;
+    default :
+      *d = c;
+      ctype = grn_str_others;
+      break;
+    }
+    d++;
+    length++;
+    if (cp) { *cp++ = ctype; }
+    if (ch) {
+      *ch++ = (int16_t)(s + 1 - s_);
+      s_ = s + 1;
+      while (++d_ < d) { *ch++ = 0; }
+    }
+  }
+  if (cp) { *cp = grn_str_null; }
+  *d = '\0';
+  nstr->length = length;
+  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
+  return GRN_SUCCESS;
+}
+
+static grn_str *
+grn_fakenstr_open(grn_ctx *ctx, const char *str, size_t str_len, grn_encoding encoding, int flags)
+{
+  /* TODO: support GRN_STR_REMOVEBLANK flag and ctypes */
+  grn_str *nstr;
+  if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) {
+    GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_fakenstr_open failed !");
+    return NULL;
+  }
+  if (!(nstr->norm = GRN_MALLOC(str_len + 1))) {
+    GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation for keyword on grn_snip_add_cond failed !");
+    GRN_FREE(nstr);
+    return NULL;
+  }
+  nstr->orig = str;
+  nstr->orig_blen = str_len;
+  memcpy(nstr->norm, str, str_len);
+  nstr->norm[str_len] = '\0';
+  nstr->norm_blen = str_len;
+  nstr->ctypes = NULL;
+  nstr->flags = flags;
+
+  if (flags & GRN_STR_WITH_CHECKS) {
+    int16_t f = 0;
+    unsigned char c;
+    size_t i;
+    if (!(nstr->checks = (int16_t *) GRN_MALLOC(sizeof(int16_t) * str_len))) {
+      GRN_FREE(nstr->norm);
+      GRN_FREE(nstr);
+      return NULL;
+    }
+    switch (encoding) {
+    case GRN_ENC_EUC_JP:
+      for (i = 0; i < str_len; i++) {
+        if (!f) {
+          c = (unsigned char) str[i];
+          f = ((c >= 0xa1U && c <= 0xfeU) || c == 0x8eU ? 2 : (c == 0x8fU ? 3 : 1)
+            );
+          nstr->checks[i] = f;
+        } else {
+          nstr->checks[i] = 0;
+        }
+        f--;
+      }
+      break;
+    case GRN_ENC_SJIS:
+      for (i = 0; i < str_len; i++) {
+        if (!f) {
+          c = (unsigned char) str[i];
+          f = (c >= 0x81U && ((c <= 0x9fU) || (c >= 0xe0U && c <= 0xfcU)) ? 2 : 1);
+          nstr->checks[i] = f;
+        } else {
+          nstr->checks[i] = 0;
+        }
+        f--;
+      }
+      break;
+    case GRN_ENC_UTF8:
+      for (i = 0; i < str_len; i++) {
+        if (!f) {
+          c = (unsigned char) str[i];
+          f = (c & 0x80U ? (c & 0x20U ? (c & 0x10U ? 4 : 3)
+                           : 2)
+               : 1);
+          nstr->checks[i] = f;
+        } else {
+          nstr->checks[i] = 0;
+        }
+        f--;
+      }
+      break;
+    default:
+      for (i = 0; i < str_len; i++) {
+        nstr->checks[i] = 1;
+      }
+      break;
+    }
+  } else {
+    nstr->checks = NULL;
+  }
+  return nstr;
+}
+
+grn_str *
+grn_str_open_(grn_ctx *ctx, const char *str, unsigned int str_len, int flags, grn_encoding encoding)
+{
+  grn_rc rc;
+  grn_str *nstr;
+  if (!str || !str_len) { return NULL; }
+
+  if (!(flags & GRN_STR_NORMALIZE)) {
+    return grn_fakenstr_open(ctx, str, str_len, encoding, flags);
+  }
+
+  if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) {
+    GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_str_open failed !");
+    return NULL;
+  }
+  nstr->orig = str;
+  nstr->orig_blen = str_len;
+  nstr->norm = NULL;
+  nstr->norm_blen = 0;
+  nstr->checks = NULL;
+  nstr->ctypes = NULL;
+  nstr->encoding = encoding;
+  nstr->flags = flags;
+  switch (encoding) {
+  case GRN_ENC_EUC_JP :
+    rc = normalize_euc(ctx, nstr);
+    break;
+  case GRN_ENC_UTF8 :
+#ifdef WITH_NFKC
+    rc = normalize_utf8(ctx, nstr);
+#else /* WITH_NFKC */
+    rc = normalize_none(ctx, nstr);
+#endif /* WITH_NFKC */
+    break;
+  case GRN_ENC_SJIS :
+    rc = normalize_sjis(ctx, nstr);
+    break;
+  case GRN_ENC_LATIN1 :
+    rc = normalize_latin1(ctx, nstr);
+    break;
+  case GRN_ENC_KOI8R :
+    rc = normalize_koi8r(ctx, nstr);
+    break;
+  default :
+    rc = normalize_none(ctx, nstr);
+    break;
+  }
+  if (rc) {
+    grn_str_close(ctx, nstr);
+    return NULL;
+  }
+  return nstr;
+}
+
+grn_str *
+grn_str_open(grn_ctx *ctx, const char *str, unsigned int str_len, int flags)
+{
+  return grn_str_open_(ctx, str, str_len, flags, ctx->encoding);
+}
+
+grn_rc
+grn_str_close(grn_ctx *ctx, grn_str *nstr)
+{
+  if (nstr) {
+    if (nstr->norm) { GRN_FREE(nstr->norm); }
+    if (nstr->ctypes) { GRN_FREE(nstr->ctypes); }
+    if (nstr->checks) { GRN_FREE(nstr->checks); }
+    GRN_FREE(nstr);
+    return GRN_SUCCESS;
+  } else {
+    return GRN_INVALID_ARGUMENT;
+  }
+}
+
 static const char *grn_enc_string[] = {
   "default",
   "none",

  Modified: plugins/suggest/suggest.c (+6 -17)
===================================================================
--- plugins/suggest/suggest.c    2013-01-21 17:53:14 +0900 (b47174c)
+++ plugins/suggest/suggest.c    2013-01-21 17:55:05 +0900 (0d84cfc)
@@ -304,25 +304,16 @@ complete(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_obj *col,
   if ((res = grn_table_create(ctx, NULL, 0, NULL,
                               GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, items, NULL))) {
     grn_id tid = grn_table_get(ctx, items, TEXT_VALUE_LEN(query));
-    grn_obj *string;
+    grn_str *norm;
     if (GRN_TEXT_LEN(query) &&
-        (string = grn_string_open(ctx, TEXT_VALUE_LEN(query),
-                                  GRN_NORMALIZER_AUTO, 0))) {
+        (norm = grn_str_open(ctx, TEXT_VALUE_LEN(query), GRN_STR_NORMALIZE))) {
       grn_table_cursor *cur;
       /* RK search + prefix search */
       grn_obj *index;
-      const char *normalized;
-      unsigned int normalized_length_in_bytes;
-      grn_string_get_normalized(ctx, string,
-                                &normalized,
-                                &normalized_length_in_bytes,
-                                NULL);
-            /* FIXME: support index selection */
+      /* FIXME: support index selection */
       if (grn_column_index(ctx, col, GRN_OP_PREFIX, &index, 1, NULL)) {
         if ((cur = grn_table_cursor_open(ctx, grn_ctx_at(ctx, index->header.domain),
-                                         normalized,
-                                         normalized_length_in_bytes,
-                                         NULL, 0, 0, -1,
+                                         norm->norm, norm->norm_blen, NULL, 0, 0, -1,
                                          GRN_CURSOR_PREFIX|GRN_CURSOR_RK))) {
           grn_id id;
           while ((id = grn_table_cursor_next(ctx, cur))) {
@@ -351,9 +342,7 @@ complete(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_obj *col,
       if (((prefix_search_mode == GRN_SUGGEST_SEARCH_YES) ||
            (prefix_search_mode == GRN_SUGGEST_SEARCH_AUTO &&
             !grn_table_size(ctx, res))) &&
-          (cur = grn_table_cursor_open(ctx, items,
-                                       normalized,
-                                       normalized_length_in_bytes,
+          (cur = grn_table_cursor_open(ctx, items, norm->norm, norm->norm_blen,
                                        NULL, 0, 0, -1, GRN_CURSOR_PREFIX))) {
         grn_id id;
         while ((id = grn_table_cursor_next(ctx, cur))) {
@@ -362,7 +351,7 @@ complete(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_obj *col,
         }
         grn_table_cursor_close(ctx, cur);
       }
-      grn_obj_close(ctx, string);
+      grn_str_close(ctx, norm);
     }
     output(ctx, items, res, tid, sortby, output_columns, offset, limit);
     grn_obj_close(ctx, res);

  Modified: test/unit/util/test-string.c (+14 -34)
===================================================================
--- test/unit/util/test-string.c    2013-01-21 17:53:14 +0900 (05fcb67)
+++ test/unit/util/test-string.c    2013-01-21 17:55:05 +0900 (b1b617b)
@@ -190,36 +190,26 @@ test_normalize(gconstpointer data)
 {
   const gchar *utf8_expected, *encoded_expected;
   const gchar *utf8_input, *encoded_input;
-  grn_obj *string;
+  grn_str *string;
   const gchar *normalized_text;
-  guint normalized_text_length;
-  guint normalized_text_n_characters;
+  guint normalized_text_len;
   int flags;
   grn_encoding encoding;
 
   encoding = gcut_data_get_int(data, "encoding");
   GRN_CTX_SET_ENCODING(&context, encoding);
-  flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES;
+  flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES;
   utf8_input = gcut_data_get_string(data, "input");
   encoded_input = convert_encoding(utf8_input, encoding);
-  string = grn_string_open(&context,
-                           encoded_input,
-                           strlen(encoded_input),
-                           GRN_NORMALIZER_AUTO,
-                           flags);
-  grn_string_get_normalized(&context, string,
-                            &normalized_text,
-                            &normalized_text_length,
-                            &normalized_text_n_characters);
-  normalized_text = cut_take_strndup(normalized_text, normalized_text_length);
-  grn_obj_unlink(&context, string);
+  string = grn_str_open(&context, encoded_input, strlen(encoded_input), flags);
+  normalized_text = cut_take_strndup(string->norm, string->norm_blen);
+  normalized_text_len = string->norm_blen;
+  grn_test_assert(grn_str_close(&context, string));
 
   utf8_expected = gcut_data_get_string(data, "expected");
   encoded_expected = convert_encoding(utf8_expected, encoding);
   cut_assert_equal_string(encoded_expected, normalized_text);
-  cut_assert_equal_uint(strlen(encoded_expected), normalized_text_length);
-  cut_assert_equal_uint(g_utf8_strlen(utf8_expected, -1),
-                        normalized_text_n_characters);
+  cut_assert_equal_int(strlen(encoded_expected), normalized_text_len);
 }
 
 void
@@ -253,13 +243,11 @@ data_normalize_broken(void)
 void
 test_normalize_broken(gconstpointer data)
 {
-  grn_obj *string;
+  grn_str *string;
   const gchar *input, *encoded_input;
-  const gchar *normalized_text;
   grn_encoding input_encoding, context_encoding;
   gint input_length;
-  guint normalized_text_length, normalized_text_n_characters;
-  int flags = GRN_STRING_WITH_CHECKS | GRN_STRING_WITH_TYPES;
+  int flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES;
 
   context_encoding = gcut_data_get_int(data, "context-encoding");
   GRN_CTX_SET_ENCODING(&context, context_encoding);
@@ -271,18 +259,10 @@ test_normalize_broken(gconstpointer data)
   if (input_length < 0) {
     input_length = strlen(encoded_input);
   }
-  string = grn_string_open(&context, encoded_input, input_length,
-                           GRN_NORMALIZER_AUTO, flags);
-  grn_string_get_normalized(&context, string,
-                            &normalized_text,
-                            &normalized_text_length,
-                            &normalized_text_n_characters);
-  normalized_text = cut_take_strndup(normalized_text, normalized_text_length);
-  grn_obj_unlink(&context, string);
-
-  cut_assert_equal_string("", normalized_text);
-  cut_assert_equal_int(0, normalized_text_length);
-  cut_assert_equal_int(0, normalized_text_n_characters);
+  string = grn_str_open(&context, encoded_input, input_length, flags);
+  cut_assert_equal_string("", string->norm);
+  cut_assert_equal_int(0, string->norm_blen);
+  grn_test_assert(grn_str_close(&context, string));
 }
 
 void
-------------- next part --------------
HTML����������������������������...
Télécharger 



More information about the Groonga-commit mailing list
Back to archive index