groonga/groonga at f872b73 [master] Extract common code to support auto expand (Groonga-commit) - Groonga - fulltext search engine.

Kouhei Sutou	2018-11-05 16:39:24 +0900 (Mon, 05 Nov 2018)

  Revision: f872b737db4a44d54804e4b58af7353342efc690
  https://github.com/groonga/groonga/commit/f872b737db4a44d54804e4b58af7353342efc690

  Message:
    Extract common code to support auto expand

  Modified files:
    lib/normalizer.c

  Modified: lib/normalizer.c (+290 -264)
===================================================================

--- lib/normalizer.c    2018-11-05 14:36:31 +0900 (3b3ae08c1)
+++ lib/normalizer.c    2018-11-05 16:39:24 +0900 (44972cb94)
@@ -560,25 +560,28 @@ sjis_normalize(grn_ctx *ctx, grn_string *nstr)
 
 #ifdef GRN_WITH_NFKC
 typedef struct {
-  grn_string *string;
-  grn_nfkc_normalize_options *options;
-  int16_t *ch;
-  const unsigned char *s;
-  const unsigned char *s_;
-  const unsigned char *s__;
-  const unsigned char *p;
-  const unsigned char *p2;
-  const unsigned char *pe;
-  const unsigned char *e;
+  size_t size;
+
+  unsigned char *dest;
+  unsigned char *dest_end;
   unsigned char *d;
-  unsigned char *d_;
-  unsigned char *de;
-  uint8_t *cp;
+  unsigned char *d_; /* -1 */
+  size_t n_characters;
+
+  int16_t *checks;
+  int16_t *c;
+
+  uint8_t *types;
+  uint8_t *t;
+
   uint64_t *offsets;
-  size_t length;
-  size_t ls;
-  size_t lp;
-  size_t ds;
+  uint64_t *o;
+} grn_nfkc_normalize_context;
+
+typedef struct {
+  grn_string *string;
+  grn_nfkc_normalize_options *options;
+  grn_nfkc_normalize_context context;
   grn_bool remove_blank_p;
   grn_bool remove_tokenized_delimiter_p;
 
@@ -591,55 +594,92 @@ typedef struct {
 } grn_nfkc_normalize_data;
 
 grn_inline static void
-grn_nfkc_normalize_data_init(grn_ctx *ctx,
-                             grn_nfkc_normalize_data *data,
-                             grn_obj *string,
-                             grn_nfkc_normalize_options *options)
+grn_nfkc_normalize_context_init(grn_ctx *ctx,
+                                grn_nfkc_normalize_context *context,
+                                grn_bool need_checks,
+                                grn_bool need_types,
+                                grn_bool need_offsets,
+                                const char *context_tag)
 {
-  size_t size;
-
-  memset(data, 0, sizeof(grn_nfkc_normalize_data));
-  data->string = (grn_string *)string;
-  data->options = options;
-  size = data->string->original_length_in_bytes;
-  data->ds = size * 3;
-  data->remove_blank_p = (data->string->flags & GRN_STRING_REMOVE_BLANK);
-  data->remove_tokenized_delimiter_p =
-    (data->string->flags & GRN_STRING_REMOVE_TOKENIZED_DELIMITER);
-  if (!(data->string->normalized = GRN_MALLOC(data->ds + 1))) {
+  if (!(context->dest = GRN_MALLOC(context->size + 1))) {
     ERR(GRN_NO_MEMORY_AVAILABLE,
         "[normalize][nfkc] failed to allocate normalized text space");
     return;
   }
-  if (data->string->flags & GRN_STRING_WITH_CHECKS) {
-    if (!(data->string->checks = GRN_MALLOC(sizeof(int16_t) * (data->ds + 1)))) {
+  context->dest_end = context->dest + context->size;
+  context->d = context->dest;
+  context->d_ = NULL;
+
+  if (need_checks) {
+    if (!(context->checks = GRN_MALLOC(sizeof(int16_t) * (context->size + 1)))) {
       ERR(GRN_NO_MEMORY_AVAILABLE,
           "[normalize][nfkc] failed to allocate checks space");
       return;
     }
   }
-  data->ch = data->string->checks;
-  if (data->string->flags & GRN_STRING_WITH_TYPES) {
-    if (!(data->string->ctypes = GRN_MALLOC(data->ds + 1))) {
+  context->c = context->checks;
+
+  if (need_types) {
+    if (!(context->types = GRN_MALLOC(sizeof(uint8_t) * (context->size + 1)))) {
       ERR(GRN_NO_MEMORY_AVAILABLE,
           "[normalize][nfkc] failed to allocate character types space");
       return;
     }
   }
-  data->cp = data->string->ctypes;
-  if (data->options->report_source_offset) {
-    data->string->offsets = GRN_MALLOC(sizeof(uint64_t) * (data->ds + 1));
-    if (!data->string->offsets) {
+  context->t = context->types;
+
+  if (need_offsets) {
+    if (!(context->offsets = GRN_MALLOC(sizeof(uint64_t) * (context->size + 1)))) {
       ERR(GRN_NO_MEMORY_AVAILABLE,
           "[normalize][nfkc] failed to allocate offsets space");
       return;
     }
   }
-  data->offsets = data->string->offsets;
-  data->d = (unsigned char *)(data->string->normalized);
-  data->de = data->d + data->ds;
-  data->d_ = NULL;
-  data->e = (unsigned char *)(data->string->original) + size;
+  context->o = context->offsets;
+}
+
+grn_inline static void
+grn_nfkc_normalize_context_fin(grn_ctx *ctx,
+                               grn_nfkc_normalize_context *context)
+{
+  if (context->dest) {
+    GRN_FREE(context->dest);
+  }
+  if (context->checks) {
+    GRN_FREE(context->checks);
+  }
+  if (context->types) {
+    GRN_FREE(context->types);
+  }
+  if (context->offsets) {
+    GRN_FREE(context->offsets);
+  }
+}
+
+grn_inline static void
+grn_nfkc_normalize_data_init(grn_ctx *ctx,
+                             grn_nfkc_normalize_data *data,
+                             grn_obj *string,
+                             grn_nfkc_normalize_options *options)
+{
+  size_t size;
+
+  memset(data, 0, sizeof(grn_nfkc_normalize_data));
+  data->string = (grn_string *)string;
+  data->options = options;
+  data->remove_blank_p = (data->string->flags & GRN_STRING_REMOVE_BLANK);
+  data->remove_tokenized_delimiter_p =
+    (data->string->flags & GRN_STRING_REMOVE_TOKENIZED_DELIMITER);
+
+  size = data->string->original_length_in_bytes;
+  data->context.size = size * 3;
+
+  grn_nfkc_normalize_context_init(ctx,
+                                  &(data->context),
+                                  data->string->flags & GRN_STRING_WITH_CHECKS,
+                                  data->string->flags & GRN_STRING_WITH_TYPES,
+                                  data->options->report_source_offset,
+                                  "");
 
   data->unified_hyphen[0] = '-';
   /* U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK */
@@ -652,55 +692,72 @@ grn_nfkc_normalize_data_init(grn_ctx *ctx,
 }
 
 grn_inline static void
-grn_nfkc_normalize_expand(grn_ctx *ctx,
-                          grn_nfkc_normalize_data *data)
+grn_nfkc_normalize_context_expand(grn_ctx *ctx,
+                                  grn_nfkc_normalize_context *context,
+                                  size_t least_required_size,
+                                  const char *context_tag)
 {
-  unsigned char *normalized;
-  data->ds += (data->ds >> 1) + data->lp;
-  normalized = GRN_REALLOC(data->string->normalized, data->ds + 1);
-  if (!normalized) {
+  unsigned char *dest;
+  context->size += (context->size >> 1) + least_required_size;
+  dest = GRN_REALLOC(context->dest, context->size + 1);
+  if (!dest) {
     ERR(GRN_NO_MEMORY_AVAILABLE,
-        "[normalize][nfkc] failed to expand normalized text space");
+        "[normalize][nfkc]%s failed to expand destination text space",
+        context_tag);
     return;
   }
-  data->de = normalized + data->ds;
-  data->d =
-    normalized + (data->d - (unsigned char *)(data->string->normalized));
-  data->string->normalized = (char *)normalized;
-  if (data->ch) {
+  context->dest_end = dest + context->size;
+  context->d = dest + (context->d - context->dest);
+  context->dest = dest;
+  if (context->c) {
     int16_t *checks;
-    if (!(checks = GRN_REALLOC(data->string->checks,
-                               sizeof(int16_t) * (data->ds + 1)))) {
+    if (!(checks = GRN_REALLOC(context->c,
+                               sizeof(int16_t) * (context->size + 1)))) {
       ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalize][nfkc] failed to expand checks space");
+          "[normalize][nfkc]%s failed to expand checks space",
+          context_tag);
       return;
     }
-    data->ch = checks + (data->ch - data->string->checks);
-    data->string->checks = checks;
+    context->c = checks + (context->c - context->checks);
+    context->checks = checks;
   }
-  if (data->cp) {
-    uint8_t *ctypes;
-    if (!(ctypes = GRN_REALLOC(data->string->ctypes, data->ds + 1))) {
+  if (context->t) {
+    uint8_t *types;
+    if (!(types = GRN_REALLOC(context->types,
+                              sizeof(uint8_t) * (context->size + 1)))) {
       ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalize][nfkc] failed to expand character types space");
+          "[normalize][nfkc]%s failed to expand character types space",
+        context_tag);
       return;
     }
-    data->cp = ctypes + (data->cp - data->string->ctypes);
-    data->string->ctypes = ctypes;
+    context->t = types + (context->t - context->types);
+    context->types = types;
   }
-  if (data->offsets) {
-    uint64_t *new_offsets;
-    if (!(new_offsets = GRN_REALLOC(data->string->offsets,
-                                    sizeof(uint64_t) * (data->ds + 1)))) {
+  if (context->o) {
+    uint64_t *offsets;
+    if (!(offsets = GRN_REALLOC(context->offsets,
+                                sizeof(uint64_t) * (context->size + 1)))) {
       ERR(GRN_NO_MEMORY_AVAILABLE,
-          "[normalize][nfkc] failed to expand offsets space");
+          "[normalize][nfkc]%s failed to expand offsets space",
+          context_tag);
       return;
     }
-    data->offsets = new_offsets + (data->offsets - data->string->offsets);
-    data->string->offsets = new_offsets;
+    context->o = offsets + (context->o - context->offsets);
+    context->offsets = offsets;
   }
 }
 
+grn_inline static void
+grn_nfkc_normalize_expand(grn_ctx *ctx,
+                          grn_nfkc_normalize_data *data,
+                          size_t least_required_size)
+{
+  grn_nfkc_normalize_context_expand(ctx,
+                                    &(data->context),
+                                    least_required_size,
+                                    "");
+}
+
 grn_inline static const unsigned char *
 grn_nfkc_normalize_unify_kana(const unsigned char *utf8_char,
                               unsigned char *unified)
@@ -1131,22 +1188,11 @@ static void
 grn_nfkc_normalize_unify(grn_ctx *ctx,
                          grn_nfkc_normalize_data *data)
 {
-  const unsigned char *current = data->string->normalized;
-  const unsigned char *end = data->d;
+  const unsigned char *current = data->context.dest;
+  const unsigned char *end = data->context.d;
   size_t i_byte;
   size_t i_character;
-  unsigned char *unified = NULL;
-  unsigned char *unified_end = NULL;
-  unsigned char *unified_previous = NULL;
-  unsigned char *unified_current;
-  uint8_t *unified_char_types = NULL;
-  uint8_t *unified_char_types_current = NULL;
-  int16_t *unified_checks = NULL;
-  int16_t *unified_checks_current = NULL;
-  uint64_t *unified_offsets = NULL;
-  uint64_t *unified_offsets_current = NULL;
-  unsigned int unified_n_characters = 0;
-  size_t unified_data_size = data->ds;
+  grn_nfkc_normalize_context unify;
 
   if (!(data->options->unify_kana ||
         data->options->unify_kana_case ||
@@ -1161,32 +1207,16 @@ grn_nfkc_normalize_unify(grn_ctx *ctx,
     return;
   }
 
-  unified = GRN_MALLOC(unified_data_size + 1);
-  if (!unified) {goto exit;}
-  unified_end = unified + unified_data_size;
-  unified_current = unified;
-
-  if (data->ch) {
-    unified_checks = GRN_MALLOC(sizeof(int16_t) * (unified_data_size + 1));
-    if (!unified_checks) {
-      goto exit;
-    }
-    unified_checks_current = unified_checks;
-  }
-  if (data->cp) {
-    unified_char_types = GRN_MALLOC(sizeof(uint8_t) * (unified_data_size + 1));
-    if (!unified_char_types) {
-      goto exit;
-    }
-    unified_char_types_current = unified_char_types;
-  }
-
-  if (data->offsets) {
-    unified_offsets = GRN_MALLOC(sizeof(uint64_t) * (unified_data_size + 1));
-    if (!unified_char_types) {
-      goto exit;
-    }
-    unified_offsets_current = unified_offsets;
+  memset(&unify, 0, sizeof(grn_nfkc_normalize_context));
+  unify.size = data->context.size;
+  grn_nfkc_normalize_context_init(ctx,
+                                  &unify,
+                                  data->context.checks != NULL,
+                                  data->context.types != NULL,
+                                  data->context.offsets != NULL,
+                                  "[unify]");
+  if (ctx->rc != GRN_SUCCESS) {
+    goto exit;
   }
 
   i_byte = 0;
@@ -1201,8 +1231,8 @@ grn_nfkc_normalize_unify(grn_ctx *ctx,
     char_length = grn_charlen_(ctx, current, end, GRN_ENC_UTF8);
     unified_char_length = char_length;
 
-    if (data->cp) {
-      char_type = data->string->ctypes[i_character];
+    if (data->context.t) {
+      char_type = data->context.types[i_character];
     } else {
       char_type = data->options->char_type_func(current);
     }
@@ -1293,8 +1323,8 @@ grn_nfkc_normalize_unify(grn_ctx *ctx,
     if (data->options->unify_katakana_v_sounds) {
       if (grn_nfkc_normalize_unify_katakana_v_sounds(unifying,
                                                      unified_char_length,
-                                                     unified_previous,
-                                                     unified_current)) {
+                                                     unify.d_,
+                                                     unify.d)) {
         skip = GRN_TRUE;
       }
     }
@@ -1302,35 +1332,38 @@ grn_nfkc_normalize_unify(grn_ctx *ctx,
     if (data->options->unify_katakana_bu_sound) {
       if (grn_nfkc_normalize_unify_katakana_bu_sound(unifying,
                                                      unified_char_length,
-                                                     unified_previous,
-                                                     unified_current)) {
+                                                     unify.d_,
+                                                     unify.d)) {
         skip = GRN_TRUE;
       }
     }
 
     if (!skip) {
-      if (unified_current + unified_char_length >= unified_end) {
-        /* TODO: Expand automatically. */
-        ERR(GRN_NO_MEMORY_AVAILABLE,
-            "[normalize][nfkc] too large unified data");
-        goto exit;
+      if (unify.d + unified_char_length >= unify.dest_end) {
+        grn_nfkc_normalize_context_expand(ctx,
+                                          &unify,
+                                          unified_char_length,
+                                          "[unify]");
+        if (ctx->rc != GRN_SUCCESS) {
+          goto exit;
+        }
       }
-      grn_memcpy(unified_current, unifying, unified_char_length);
-      unified_previous = unified_current;
-      unified_current += unified_char_length;
-      unified_n_characters++;
-      if (unified_char_types_current) {
-        *(unified_char_types_current++) = char_type;
+      grn_memcpy(unify.d, unifying, unified_char_length);
+      unify.d_ = unify.d;
+      unify.d += unified_char_length;
+      unify.n_characters++;
+      if (unify.t) {
+        *(unify.t++) = char_type;
       }
-      if (unified_checks_current) {
+      if (unify.c) {
         size_t i;
-        *(unified_checks_current++) = data->string->checks[i_byte];
+        *(unify.c++) = data->context.checks[i_byte];
         for (i = 1; i < unified_char_length; i++) {
-          *(unified_checks_current++) = 0;
+          *(unify.c++) = 0;
         }
       }
-      if (unified_offsets_current) {
-        *(unified_offsets_current++) = data->string->offsets[i_character];
+      if (unify.o) {
+        *(unify.o++) = data->context.offsets[i_character];
       }
     }
 
@@ -1339,56 +1372,36 @@ grn_nfkc_normalize_unify(grn_ctx *ctx,
     i_character++;
   }
   if (data->options->unify_katakana_v_sounds) {
-    grn_nfkc_normalize_unify_katakana_v_sounds(NULL,
-                                               0,
-                                               unified_previous,
-                                               unified_current);
+    grn_nfkc_normalize_unify_katakana_v_sounds(NULL, 0, unify.d_, unify.d);
   }
   if (data->options->unify_katakana_bu_sound) {
-    grn_nfkc_normalize_unify_katakana_bu_sound(NULL,
-                                               0,
-                                               unified_previous,
-                                               unified_current);
+    grn_nfkc_normalize_unify_katakana_bu_sound(NULL, 0, unify.d_, unify.d);
   }
 
-  GRN_FREE(data->string->normalized);
-  if (data->string->checks) {
-    GRN_FREE(data->string->checks);
-  }
-  if (data->string->ctypes) {
-    GRN_FREE(data->string->ctypes);
-  }
-  if (data->string->offsets) {
-    GRN_FREE(data->string->offsets);
-  }
-  data->string->normalized = unified;
-  data->d = unified_current;
-  data->d_ = unified_previous;
-  data->string->checks = unified_checks;
-  data->ch = unified_checks_current;
-  data->string->ctypes = unified_char_types;
-  data->cp = unified_char_types_current;
-  data->string->offsets = unified_offsets;
-  data->offsets = unified_offsets_current;
-  data->length = unified_n_characters;
-  unified = NULL;
-  unified_checks = NULL;
-  unified_char_types = NULL;
-  unified_offsets = NULL;
+  grn_nfkc_normalize_context_fin(ctx, &(data->context));
+
+  data->context.size = unify.size;
+
+  data->context.dest = unify.dest;
+  data->context.d = unify.d;
+  data->context.d_ = unify.d_;
+  data->context.n_characters = unify.n_characters;
+  unify.dest = NULL;
+
+  data->context.checks = unify.checks;
+  data->context.c = unify.c;
+  unify.checks = NULL;
+
+  data->context.types = unify.types;
+  data->context.t = unify.t;
+  unify.types = NULL;
+
+  data->context.offsets = unify.offsets;
+  data->context.o = unify.o;
+  unify.offsets = NULL;
 
 exit:
-  if (unified) {
-    GRN_FREE(unified);
-  }
-  if (unified_checks) {
-    GRN_FREE(unified_checks);
-  }
-  if (unified_char_types) {
-    GRN_FREE(unified_char_types);
-  }
-  if (unified_offsets) {
-    GRN_FREE(unified_offsets);
-  }
+  grn_nfkc_normalize_context_fin(ctx, &unify);
 }
 
 grn_rc
@@ -1397,120 +1410,133 @@ grn_nfkc_normalize(grn_ctx *ctx,
                    grn_nfkc_normalize_options *options)
 {
   grn_nfkc_normalize_data data;
+  const unsigned char *source;
+  const unsigned char *source_ = NULL; /* -1 */
+  const unsigned char *source__ = NULL; /* -2 */
+  const unsigned char *source_end;
+  size_t source_char_length;
+  grn_nfkc_normalize_context *context;
 
   grn_nfkc_normalize_data_init(ctx, &data, string, options);
+  context = &(data.context);
   if (ctx->rc != GRN_SUCCESS) {
     goto exit;
   }
 
-  for (data.s = data.s_ = (unsigned char *)(data.string->original);
-       ;
-       data.s += data.ls) {
-    if (!(data.ls = grn_charlen_(ctx, data.s, data.e, GRN_ENC_UTF8))) {
+  source = source_ = (unsigned char *)(data.string->original);
+  source_end = source + data.string->original_length_in_bytes;
+  for (; source < source_end; source += source_char_length) {
+    source_char_length = grn_charlen_(ctx, source, source_end, GRN_ENC_UTF8);
+    if (source_char_length == 0) {
       break;
     }
     if (data.remove_tokenized_delimiter_p &&
         grn_tokenizer_is_tokenized_delimiter(ctx,
-                                             (const char *)(data.s),
-                                             data.ls,
+                                             (const char *)source,
+                                             source_char_length,
                                              GRN_ENC_UTF8)) {
       continue;
     }
-    if ((data.p = (unsigned char *)data.options->decompose_func(data.s))) {
-      data.pe = data.p + strlen((char *)data.p);
-    } else {
-      data.p = data.s;
-      data.pe = data.p + data.ls;
-    }
-    if (data.d_ &&
-        (data.p2 = (unsigned char *)options->compose_func(data.d_, data.p))) {
-      data.p = data.p2;
-      data.pe = data.p + strlen((char *)(data.p));
-      if (data.cp) { data.cp--; }
-      if (data.ch) {
-        data.ch -= (data.d - data.d_);
-        if (data.ch[0] >= 0) {
-          data.s_ = data.s__;
-        }
-      }
-      if (data.offsets) {
-        data.offsets--;
+    {
+      const char *decomposed;
+      const unsigned char *current;
+      const unsigned char *current_end;
+      size_t current_length;
+
+      decomposed = data.options->decompose_func(source);
+      if (decomposed) {
+        current = decomposed;
+        current_end = current + strlen(decomposed);
+      } else {
+        current = source;
+        current_end = current + source_char_length;
       }
-      data.d = data.d_;
-      data.length--;
-    }
-    for (; ; data.p += data.lp) {
-      if (!(data.lp = grn_charlen_(ctx, data.p, data.pe, GRN_ENC_UTF8))) {
-        break;
+      if (context->d_) {
+        const char *composed = NULL;
+        composed = options->compose_func(context->d_, current);
+        if (composed) {
+          current = composed;
+          current_end = current + strlen(composed);
+          if (context->t) { context->t--; }
+          if (context->c) {
+            context->c -= (context->d - context->d_);
+            if (context->c[0] >= 0) {
+              source_ = source__;
+            }
+          }
+          if (context->o) {
+            context->o--;
+          }
+          context->d = context->d_;
+          context->n_characters--;
+        }
       }
-      if ((*(data.p) == ' ' && data.remove_blank_p) ||
-          *(data.p) < 0x20 /* skip unprintable ascii */) {
-        if (data.cp > data.string->ctypes) { *(data.cp - 1) |= GRN_CHAR_BLANK; }
-        if (!data.options->include_removed_source_location) {
-          data.s_ += data.lp;
+      for (; current < current_end; current += current_length) {
+        current_length = grn_charlen_(ctx, current, current_end, GRN_ENC_UTF8);
+        if (current_length == 0) {
+          break;
         }
-      } else {
-        if (data.de <= data.d + data.lp) {
-          grn_nfkc_normalize_expand(ctx, &data);
-          if (ctx->rc != GRN_SUCCESS) {
-            goto exit;
+        if ((current[0] == ' ' && data.remove_blank_p) ||
+            current[0] < 0x20 /* skip unprintable ascii */) {
+          if (context->t > context->types) {
+            context->t[-1] |= GRN_CHAR_BLANK;
+          }
+          if (!data.options->include_removed_source_location) {
+            source_ += current_length;
+          }
+        } else {
+          if (context->dest_end <= context->d + current_length) {
+            grn_nfkc_normalize_expand(ctx, &data, current_length);
+            if (ctx->rc != GRN_SUCCESS) {
+              goto exit;
+            }
           }
-        }
 
-        grn_memcpy(data.d, data.p, data.lp);
-        data.d_ = data.d;
-        if (data.lp > 0) {
-          data.d += data.lp;
-          data.length++;
-          if (data.cp) {
+          grn_memcpy(context->d, current, current_length);
+          context->d_ = context->d;
+          context->d += current_length;
+          context->n_characters++;
+          if (context->t) {
             grn_char_type char_type;
-            char_type = data.options->char_type_func(data.p);
-            *(data.cp++) = char_type;
+            char_type = data.options->char_type_func(current);
+            *(context->t++) = char_type;
           }
-          if (data.ch) {
+          if (context->c) {
             size_t i;
-            if (data.s_ == data.s + data.ls) {
-              *(data.ch++) = -1;
+            if (source_ == source + source_char_length) {
+              *(context->c++) = -1;
             } else {
-              *(data.ch++) = (int16_t)(data.s + data.ls - data.s_);
-              data.s__ = data.s_;
-              data.s_ = data.s + data.ls;
+              *(context->c++) = (int16_t)(source + source_char_length - source_);
+              source__ = source_;
+              source_ = source + source_char_length;
             }
-            for (i = data.lp; i > 1; i--) { *(data.ch++) = 0; }
+            for (i = current_length; i > 1; i--) { *(context->c++) = 0; }
           }
-          if (data.offsets) {
-            *(data.offsets++) =
-              (uint64_t)(data.s - (const unsigned char *)(data.string->original));
+          if (context->o) {
+            *(context->o++) =
+              (uint64_t)(source - (const unsigned char *)(data.string->original));
           }
         }
       }
     }
   }
   grn_nfkc_normalize_unify(ctx, &data);
-  if (data.cp) { *(data.cp) = GRN_CHAR_NULL; }
-  if (data.offsets) { *(data.offsets) = data.string->original_length_in_bytes; }
-  *(data.d) = '\0';
-  data.string->n_characters = data.length;
-  data.string->normalized_length_in_bytes =
-    (size_t)(data.d - (unsigned char *)(data.string->normalized));
+  if (context->t) { *(context->t) = GRN_CHAR_NULL; }
+  if (context->o) { *(context->o) = data.string->original_length_in_bytes; }
+  *(context->d) = '\0';
+  data.string->n_characters = context->n_characters;
+  data.string->normalized = context->dest;
+  data.string->normalized_length_in_bytes = (size_t)(context->d - context->dest);
+  data.string->checks = context->checks;
+  data.string->ctypes = context->types;
+  data.string->offsets = context->offsets;
+  context->dest = NULL;
+  context->checks = NULL;
+  context->types = NULL;
+  context->offsets = NULL;
 exit:
   if (ctx->rc != GRN_SUCCESS) {
-    if (data.string->normalized) {
-      GRN_FREE(data.string->normalized);
-      data.string->normalized = NULL;
-    }
-    if (data.string->checks) {
-      GRN_FREE(data.string->checks);
-      data.string->checks = NULL;
-    }
-    if (data.string->ctypes) {
-      GRN_FREE(data.string->ctypes);
-      data.string->ctypes = NULL;
-    }
-    if (data.string->offsets) {
-      GRN_FREE(data.string->offsets);
-      data.string->offsets = NULL;
-    }
+    grn_nfkc_normalize_context_fin(ctx, context);
   }
   return ctx->rc;
 }
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181105/b2086ed1/attachment-0001.html>


Groonga - fulltext search engine.

[Groonga-commit] groonga/groonga at f872b73 [master] Extract common code to support auto expand