[Groonga-commit] groonga/groonga at c1604ae [master] Add reindex API

Back to archive index

Kouhei Sutou null+****@clear*****
Sat Nov 7 21:11:14 JST 2015


Kouhei Sutou	2015-11-07 21:11:14 +0900 (Sat, 07 Nov 2015)

  New Revision: c1604ae6b2a8976b094685826015aacc74578c82
  https://github.com/groonga/groonga/commit/c1604ae6b2a8976b094685826015aacc74578c82

  Message:
    Add reindex API
    
    New API:
    
      * grn_obj_reindex()
    
    New command:
    
      * reindex

  Added files:
    lib/grn_index_column.h
    lib/index_column.c
    test/command/suite/reindex/db.expected
    test/command/suite/reindex/db.test
    test/command/suite/reindex/index_column.expected
    test/command/suite/reindex/index_column.test
    test/command/suite/reindex/table.expected
    test/command/suite/reindex/table.test
  Modified files:
    include/groonga/obj.h
    lib/ctx.c
    lib/db.c
    lib/grn_db.h
    lib/grn_ii.h
    lib/obj.c
    lib/proc.c
    lib/sources.am

  Modified: include/groonga/obj.h (+2 -0)
===================================================================
--- include/groonga/obj.h    2015-11-04 16:39:28 +0900 (9ed8f44)
+++ include/groonga/obj.h    2015-11-07 21:11:14 +0900 (d829a92)
@@ -86,6 +86,8 @@ GRN_API grn_rc grn_obj_cast(grn_ctx *ctx,
                             grn_obj *dest,
                             grn_bool add_record_if_not_exist);
 
+GRN_API grn_rc grn_obj_reindex(grn_ctx *ctx, grn_obj *obj);
+
 #ifdef __cplusplus
 }
 #endif

  Modified: lib/ctx.c (+2 -0)
===================================================================
--- lib/ctx.c    2015-11-04 16:39:28 +0900 (6fe1345)
+++ lib/ctx.c    2015-11-07 21:11:14 +0900 (90a4d93)
@@ -23,6 +23,7 @@
 #include "grn_ctx_impl.h"
 #include "grn_ii.h"
 #include "grn_pat.h"
+#include "grn_index_column.h"
 #include "grn_proc.h"
 #include "grn_plugin.h"
 #include "grn_snip.h"
@@ -96,6 +97,7 @@ grn_init_from_env(void)
   grn_io_init_from_env();
   grn_ii_init_from_env();
   grn_db_init_from_env();
+  grn_index_column_init_from_env();
   grn_proc_init_from_env();
   grn_plugin_init_from_env();
 }

  Modified: lib/db.c (+8 -146)
===================================================================
--- lib/db.c    2015-11-04 16:39:28 +0900 (2ac64ba)
+++ lib/db.c    2015-11-07 21:11:14 +0900 (7ef15a0)
@@ -20,6 +20,7 @@
 #include "grn_pat.h"
 #include "grn_dat.h"
 #include "grn_ii.h"
+#include "grn_index_column.h"
 #include "grn_ctx_impl.h"
 #include "grn_token_cursor.h"
 #include "grn_tokenizers.h"
@@ -88,7 +89,6 @@ grn_obj_get_range_info(grn_ctx *ctx, grn_obj *obj,
 
 
 static char grn_db_key[GRN_ENV_BUFFER_SIZE];
-static uint64_t grn_index_sparsity = 10;
 
 void
 grn_db_init_from_env(void)
@@ -96,21 +96,6 @@ grn_db_init_from_env(void)
   grn_getenv("GRN_DB_KEY",
              grn_db_key,
              GRN_ENV_BUFFER_SIZE);
-
-  {
-    char grn_index_sparsity_env[GRN_ENV_BUFFER_SIZE];
-    grn_getenv("GRN_INDEX_SPARSITY",
-               grn_index_sparsity_env,
-               GRN_ENV_BUFFER_SIZE);
-    if (grn_index_sparsity_env[0]) {
-      uint64_t sparsity;
-      errno = 0;
-      sparsity = strtoull(grn_index_sparsity_env, NULL, 0);
-      if (errno == 0) {
-        grn_index_sparsity = sparsity;
-      }
-    }
-  }
 }
 
 inline static void
@@ -1255,14 +1240,8 @@ typedef struct {
   unsigned int section;
 } default_set_value_hook_data;
 
-struct _grn_hook {
-  grn_hook *next;
-  grn_proc *proc;
-  uint32_t hld_size;
-};
-
-static grn_obj *
-default_set_value_hook(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+grn_obj *
+grn_obj_default_set_value_hook(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 {
   grn_proc_ctx *pctx = (grn_proc_ctx *)user_data;
   if (!pctx) {
@@ -1392,7 +1371,7 @@ grn_table_add(grn_ctx *ctx, grn_obj *table, const void *key, unsigned int key_si
           if (hooks->proc) {
             hooks->proc->funcs[PROC_INIT](ctx, 1, &table, &pctx.user_data);
           } else {
-            default_set_value_hook(ctx, 1, &table, &pctx.user_data);
+            grn_obj_default_set_value_hook(ctx, 1, &table, &pctx.user_data);
           }
           if (ctx->rc) { break; }
           hooks = hooks->next;
@@ -1685,7 +1664,7 @@ call_delete_hook(grn_ctx *ctx, grn_obj *table, grn_id rid, const void *key, unsi
         if (hooks->proc) {
           hooks->proc->funcs[PROC_INIT](ctx, 1, &table, &pctx.user_data);
         } else {
-          default_set_value_hook(ctx, 1, &table, &pctx.user_data);
+          grn_obj_default_set_value_hook(ctx, 1, &table, &pctx.user_data);
         }
         if (ctx->rc) { break; }
         hooks = hooks->next;
@@ -2015,7 +1994,6 @@ grn_table_delete_by_id(grn_ctx *ctx, grn_obj *table, grn_id id)
   GRN_API_RETURN(rc);
 }
 
-grn_rc grn_ii_truncate(grn_ctx *ctx, grn_ii *ii);
 grn_rc grn_ja_truncate(grn_ctx *ctx, grn_ja *ja);
 grn_rc grn_ra_truncate(grn_ctx *ctx, grn_ra *ra);
 
@@ -6651,7 +6629,7 @@ call_hook(grn_ctx *ctx, grn_obj *obj, grn_id id, grn_obj *value, int flags)
         if (hooks->proc) {
           hooks->proc->funcs[PROC_INIT](ctx, 1, &obj, &pctx.user_data);
         } else {
-          default_set_value_hook(ctx, 1, &obj, &pctx.user_data);
+          grn_obj_default_set_value_hook(ctx, 1, &obj, &pctx.user_data);
         }
         if (ctx->rc) {
           grn_obj_close(ctx, oldvalue);
@@ -6666,48 +6644,6 @@ call_hook(grn_ctx *ctx, grn_obj *obj, grn_id id, grn_obj *value, int flags)
   return 0;
 }
 
-inline static int
-call_hook_for_build(grn_ctx *ctx, grn_obj *obj, grn_id id, grn_obj *value, int flags)
-{
-  grn_hook *hooks = DB_OBJ(obj)->hooks[GRN_HOOK_SET];
-
-  if (hooks || obj->header.type == GRN_COLUMN_VAR_SIZE) {
-    grn_obj oldvalue;
-    GRN_TEXT_INIT(&oldvalue, 0);
-
-    if (hooks) {
-      // todo : grn_proc_ctx_open()
-      grn_obj id_, flags_;
-      grn_proc_ctx pctx = {{0}, hooks->proc, NULL, hooks, hooks, PROC_INIT, 4, 4};
-      GRN_UINT32_INIT(&id_, 0);
-      GRN_UINT32_INIT(&flags_, 0);
-      GRN_UINT32_SET(ctx, &id_, id);
-      GRN_UINT32_SET(ctx, &flags_, flags);
-      while (hooks) {
-        grn_ctx_push(ctx, &id_);
-        grn_ctx_push(ctx, &oldvalue);
-        grn_ctx_push(ctx, value);
-        grn_ctx_push(ctx, &flags_);
-        pctx.caller = NULL;
-        pctx.currh = hooks;
-        if (hooks->proc) {
-          hooks->proc->funcs[PROC_INIT](ctx, 1, &obj, &pctx.user_data);
-        } else {
-          default_set_value_hook(ctx, 1, &obj, &pctx.user_data);
-        }
-        if (ctx->rc) {
-          grn_obj_close(ctx, &oldvalue);
-          return 1;
-        }
-        hooks = hooks->next;
-        pctx.offset++;
-      }
-    }
-    grn_obj_close(ctx, &oldvalue);
-  }
-  return 0;
-}
-
 static grn_rc
 grn_obj_set_value_table_pat_key(grn_ctx *ctx, grn_obj *obj, grn_id id,
                                 grn_obj *value, int flags)
@@ -7640,77 +7576,6 @@ exit :
 }
 
 static void
-build_index(grn_ctx *ctx, grn_obj *obj)
-{
-  grn_obj *src, **cp, **col, *target;
-  grn_id *s = DB_OBJ(obj)->source;
-  if (!(DB_OBJ(obj)->source_size) || !s) { return; }
-  if ((src = grn_ctx_at(ctx, *s))) {
-    target = GRN_OBJ_TABLEP(src) ? src : grn_ctx_at(ctx, src->header.domain);
-    if (target) {
-      int i, ncol = DB_OBJ(obj)->source_size / sizeof(grn_id);
-      grn_obj_flags flags;
-      grn_ii *ii = (grn_ii *)obj;
-      grn_bool use_grn_ii_build;
-      grn_table_get_info(ctx, ii->lexicon, &flags, NULL, NULL, NULL, NULL);
-      switch (flags & GRN_OBJ_TABLE_TYPE_MASK) {
-      case GRN_OBJ_TABLE_PAT_KEY :
-      case GRN_OBJ_TABLE_DAT_KEY :
-        use_grn_ii_build = GRN_TRUE;
-        break;
-      default :
-        use_grn_ii_build = GRN_FALSE;
-        break;
-      }
-      if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
-        use_grn_ii_build = GRN_FALSE;
-      }
-      if ((col = GRN_MALLOC(ncol * sizeof(grn_obj *)))) {
-        for (cp = col, i = ncol; i; s++, cp++, i--) {
-          if (!(*cp = grn_ctx_at(ctx, *s))) {
-            ERR(GRN_INVALID_ARGUMENT, "source invalid, n=%d",i);
-            GRN_FREE(col);
-            return;
-          }
-          if (GRN_OBJ_TABLEP(grn_ctx_at(ctx, DB_OBJ(*cp)->range))) {
-            use_grn_ii_build = GRN_FALSE;
-          }
-        }
-        if (use_grn_ii_build) {
-          grn_ii_build(ctx, ii, grn_index_sparsity);
-        } else {
-          grn_table_cursor  *tc;
-          if ((tc = grn_table_cursor_open(ctx, target, NULL, 0, NULL, 0,
-                                          0, -1, GRN_CURSOR_BY_ID))) {
-            grn_id id;
-            grn_obj rv;
-            GRN_TEXT_INIT(&rv, 0);
-            while ((id = grn_table_cursor_next_inline(ctx, tc)) != GRN_ID_NIL) {
-              for (cp = col, i = ncol; i; i--, cp++) {
-                GRN_BULK_REWIND(&rv);
-                if (GRN_OBJ_TABLEP(*cp)) {
-                  grn_table_get_key2(ctx, *cp, id, &rv);
-                } else {
-                  grn_obj_get_value(ctx, *cp, id, &rv);
-                }
-                call_hook_for_build(ctx, *cp, id, &rv, 0);
-              }
-            }
-            GRN_OBJ_FIN(ctx, &rv);
-            grn_table_cursor_close(ctx, tc);
-          }
-        }
-        GRN_FREE(col);
-      }
-    } else {
-      ERR(GRN_INVALID_ARGUMENT, "invalid target");
-    }
-  } else {
-    ERR(GRN_INVALID_ARGUMENT, "invalid source");
-  }
-}
-
-static void
 update_source_hook(grn_ctx *ctx, grn_obj *obj)
 {
   grn_id *s = DB_OBJ(obj)->source;
@@ -8088,7 +7953,7 @@ grn_obj_set_info_source_update(grn_ctx *ctx, grn_obj *obj, grn_obj *value)
 
     if (obj->header.type == GRN_COLUMN_INDEX) {
       update_source_hook(ctx, obj);
-      build_index(ctx, obj);
+      grn_index_column_build(ctx, obj);
     }
   } else {
     DB_OBJ(obj)->source = NULL;
@@ -13302,14 +13167,11 @@ grn_db_recover_data_column(grn_ctx *ctx, grn_obj *data_column)
 static void
 grn_db_recover_index_column(grn_ctx *ctx, grn_obj *index_column)
 {
-  grn_ii *ii = (grn_ii *)index_column;
-
   if (!grn_obj_is_locked(ctx, index_column)) {
     return;
   }
 
-  grn_ii_truncate(ctx, ii);
-  build_index(ctx, index_column);
+  grn_index_column_rebuild(ctx, index_column);
 }
 
 grn_rc

  Modified: lib/grn_db.h (+11 -0)
===================================================================
--- lib/grn_db.h    2015-11-04 16:39:28 +0900 (89e65d9)
+++ lib/grn_db.h    2015-11-07 21:11:14 +0900 (38a2292)
@@ -138,6 +138,12 @@ struct _grn_type {
   (GRN_OBJ_VECTOR_COLUMNP(obj) &&\
    (DB_OBJ(obj)->header.flags & GRN_OBJ_WITH_WEIGHT))
 
+struct _grn_hook {
+  grn_hook *next;
+  grn_proc *proc;
+  uint32_t hld_size;
+};
+
 typedef struct _grn_proc_ctx grn_proc_ctx;
 
 struct _grn_proc_ctx {
@@ -420,6 +426,11 @@ grn_rc grn_accessor_resolve(grn_ctx *ctx, grn_obj *accessor, int deep,
                             grn_obj *base_res, grn_obj **res,
                             grn_search_optarg *optarg);
 
+grn_obj *grn_obj_default_set_value_hook(grn_ctx *ctx,
+                                        int nargs,
+                                        grn_obj **args,
+                                        grn_user_data *user_data);
+
 #ifdef __cplusplus
 }
 #endif

  Modified: lib/grn_ii.h (+1 -0)
===================================================================
--- lib/grn_ii.h    2015-11-04 16:39:28 +0900 (4dc3c4a)
+++ lib/grn_ii.h    2015-11-07 21:11:14 +0900 (06edf2b)
@@ -164,6 +164,7 @@ grn_rc grn_ii_at(grn_ctx *ctx, grn_ii *ii, grn_id id, grn_hash *s, grn_operator
 void grn_ii_inspect_values(grn_ctx *ctx, grn_ii *ii, grn_obj *buf);
 void grn_ii_cursor_inspect(grn_ctx *ctx, grn_ii_cursor *c, grn_obj *buf);
 
+grn_rc grn_ii_truncate(grn_ctx *ctx, grn_ii *ii);
 grn_rc grn_ii_build(grn_ctx *ctx, grn_ii *ii, uint64_t sparsity);
 
 #ifdef __cplusplus

  Added: lib/grn_index_column.h (+37 -0) 100644
===================================================================
--- /dev/null
+++ lib/grn_index_column.h    2015-11-07 21:11:14 +0900 (9b094e1)
@@ -0,0 +1,37 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+  Copyright(C) 2015 Brazil
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#ifndef GRN_INDEX_COLUMN_H
+#define GRN_INDEX_COLUMN_H
+
+#include "grn_db.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+void grn_index_column_init_from_env(void);
+grn_rc grn_index_column_build(grn_ctx *ctx, grn_obj *index_column);
+grn_rc grn_index_column_rebuild(grn_ctx *ctx, grn_obj *index_column);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* GRN_INDEX_COLUMN_H */

  Added: lib/index_column.c (+166 -0) 100644
===================================================================
--- /dev/null
+++ lib/index_column.c    2015-11-07 21:11:14 +0900 (95dac40)
@@ -0,0 +1,166 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+  Copyright(C) 2009-2015 Brazil
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License version 2.1 as published by the Free Software Foundation.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include "grn_index_column.h"
+#include "grn_ii.h"
+#include "grn_hash.h"
+
+#include <string.h>
+
+static uint64_t grn_index_sparsity = 10;
+
+void
+grn_index_column_init_from_env(void)
+{
+  char grn_index_sparsity_env[GRN_ENV_BUFFER_SIZE];
+  grn_getenv("GRN_INDEX_SPARSITY",
+             grn_index_sparsity_env,
+             GRN_ENV_BUFFER_SIZE);
+  if (grn_index_sparsity_env[0]) {
+    uint64_t sparsity;
+    errno = 0;
+    sparsity = strtoull(grn_index_sparsity_env, NULL, 0);
+    if (errno == 0) {
+      grn_index_sparsity = sparsity;
+    }
+  }
+}
+
+inline static void
+grn_index_column_build_call_hook(grn_ctx *ctx, grn_obj *obj,
+                                 grn_id id, grn_obj *value, int flags)
+{
+  grn_hook *hooks = DB_OBJ(obj)->hooks[GRN_HOOK_SET];
+
+  if (hooks) {
+    grn_obj oldvalue;
+    /* todo : grn_proc_ctx_open() */
+    grn_obj id_, flags_;
+    grn_proc_ctx pctx = {{0}, hooks->proc, NULL, hooks, hooks, PROC_INIT, 4, 4};
+    GRN_TEXT_INIT(&oldvalue, 0);
+    GRN_UINT32_INIT(&id_, 0);
+    GRN_UINT32_INIT(&flags_, 0);
+    GRN_UINT32_SET(ctx, &id_, id);
+    GRN_UINT32_SET(ctx, &flags_, flags);
+    while (hooks) {
+      grn_ctx_push(ctx, &id_);
+      grn_ctx_push(ctx, &oldvalue);
+      grn_ctx_push(ctx, value);
+      grn_ctx_push(ctx, &flags_);
+      pctx.caller = NULL;
+      pctx.currh = hooks;
+      if (hooks->proc) {
+        hooks->proc->funcs[PROC_INIT](ctx, 1, &obj, &pctx.user_data);
+      } else {
+        grn_obj_default_set_value_hook(ctx, 1, &obj, &pctx.user_data);
+      }
+      if (ctx->rc) {
+        grn_obj_close(ctx, &oldvalue);
+        return;
+      }
+      hooks = hooks->next;
+      pctx.offset++;
+    }
+    grn_obj_close(ctx, &oldvalue);
+  }
+}
+
+grn_rc
+grn_index_column_build(grn_ctx *ctx, grn_obj *index_column)
+{
+  grn_obj *src, **cp, **col, *target;
+  grn_id *s = DB_OBJ(index_column)->source;
+  if (!(DB_OBJ(index_column)->source_size) || !s) { return ctx->rc; }
+  if ((src = grn_ctx_at(ctx, *s))) {
+    target = GRN_OBJ_TABLEP(src) ? src : grn_ctx_at(ctx, src->header.domain);
+    if (target) {
+      int i, ncol = DB_OBJ(index_column)->source_size / sizeof(grn_id);
+      grn_obj_flags flags;
+      grn_ii *ii = (grn_ii *)index_column;
+      grn_bool use_grn_ii_build;
+      grn_table_get_info(ctx, ii->lexicon, &flags, NULL, NULL, NULL, NULL);
+      switch (flags & GRN_OBJ_TABLE_TYPE_MASK) {
+      case GRN_OBJ_TABLE_PAT_KEY :
+      case GRN_OBJ_TABLE_DAT_KEY :
+        use_grn_ii_build = GRN_TRUE;
+        break;
+      default :
+        use_grn_ii_build = GRN_FALSE;
+        break;
+      }
+      if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
+        use_grn_ii_build = GRN_FALSE;
+      }
+      if ((col = GRN_MALLOC(ncol * sizeof(grn_obj *)))) {
+        for (cp = col, i = ncol; i; s++, cp++, i--) {
+          if (!(*cp = grn_ctx_at(ctx, *s))) {
+            ERR(GRN_INVALID_ARGUMENT, "source invalid, n=%d",i);
+            GRN_FREE(col);
+            return ctx->rc;
+          }
+          if (GRN_OBJ_TABLEP(grn_ctx_at(ctx, DB_OBJ(*cp)->range))) {
+            use_grn_ii_build = GRN_FALSE;
+          }
+        }
+        if (use_grn_ii_build) {
+          grn_ii_build(ctx, ii, grn_index_sparsity);
+        } else {
+          grn_table_cursor  *tc;
+          if ((tc = grn_table_cursor_open(ctx, target, NULL, 0, NULL, 0,
+                                          0, -1, GRN_CURSOR_BY_ID))) {
+            grn_id id;
+            grn_obj rv;
+            GRN_TEXT_INIT(&rv, 0);
+            while ((id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) {
+              for (cp = col, i = ncol; i; i--, cp++) {
+                GRN_BULK_REWIND(&rv);
+                if (GRN_OBJ_TABLEP(*cp)) {
+                  grn_table_get_key2(ctx, *cp, id, &rv);
+                } else {
+                  grn_obj_get_value(ctx, *cp, id, &rv);
+                }
+                grn_index_column_build_call_hook(ctx, *cp, id, &rv, 0);
+              }
+            }
+            GRN_OBJ_FIN(ctx, &rv);
+            grn_table_cursor_close(ctx, tc);
+          }
+        }
+        GRN_FREE(col);
+      }
+    } else {
+      ERR(GRN_INVALID_ARGUMENT, "invalid target");
+    }
+  } else {
+    ERR(GRN_INVALID_ARGUMENT, "invalid source");
+  }
+  return ctx->rc;
+}
+
+grn_rc
+grn_index_column_rebuild(grn_ctx *ctx, grn_obj *index_column)
+{
+  grn_ii *ii = (grn_ii *)index_column;
+
+  GRN_API_ENTER;
+
+  grn_ii_truncate(ctx, ii);
+  grn_index_column_build(ctx, index_column);
+
+  GRN_API_RETURN(ctx->rc);
+}

  Modified: lib/obj.c (+120 -0)
===================================================================
--- lib/obj.c    2015-11-04 16:39:28 +0900 (feb1d1b)
+++ lib/obj.c    2015-11-07 21:11:14 +0900 (a460dc0)
@@ -17,6 +17,7 @@
 */
 #include "grn.h"
 #include "grn_db.h"
+#include "grn_index_column.h"
 #include <groonga/obj.h>
 
 grn_bool
@@ -166,3 +167,122 @@ grn_obj_is_scorer_proc(grn_ctx *ctx, grn_obj *obj)
   proc = (grn_proc *)obj;
   return proc->type == GRN_PROC_SCORER;
 }
+
+static void
+grn_db_reindex(grn_ctx *ctx, grn_obj *db)
+{
+  grn_table_cursor *cursor;
+  grn_id id;
+
+  cursor = grn_table_cursor_open(ctx, db,
+                                 NULL, 0, NULL, 0,
+                                 0, -1,
+                                 GRN_CURSOR_BY_ID);
+  if (!cursor) {
+    return;
+  }
+
+  while ((id = grn_table_cursor_next(ctx, cursor)) != GRN_ID_NIL) {
+    grn_obj *object;
+
+    object = grn_ctx_at(ctx, id);
+    if (!object) {
+      ERRCLR(ctx);
+      continue;
+    }
+
+    switch (object->header.type) {
+    case GRN_TABLE_HASH_KEY :
+    case GRN_TABLE_PAT_KEY :
+    case GRN_TABLE_DAT_KEY :
+      grn_obj_reindex(ctx, object);
+      break;
+    default:
+      break;
+    }
+
+    grn_obj_unlink(ctx, object);
+
+    if (ctx->rc != GRN_SUCCESS) {
+      break;
+    }
+  }
+  grn_table_cursor_close(ctx, cursor);
+}
+
+static void
+grn_table_reindex(grn_ctx *ctx, grn_obj *table)
+{
+  grn_hash *columns;
+
+  columns = grn_hash_create(ctx, NULL, sizeof(grn_id), 0,
+                            GRN_OBJ_TABLE_HASH_KEY | GRN_HASH_TINY);
+  if (!columns) {
+    ERR(GRN_NO_MEMORY_AVAILABLE,
+        "[table][reindex] failed to create a table to store columns");
+    return;
+  }
+
+  if (grn_table_columns(ctx, table, "", 0, (grn_obj *)columns) > 0) {
+    grn_bool have_data_column = GRN_FALSE;
+    grn_id *key;
+    GRN_HASH_EACH(ctx, columns, id, &key, NULL, NULL, {
+      grn_obj *column = grn_ctx_at(ctx, *key);
+      if (column && column->header.type != GRN_COLUMN_INDEX) {
+        have_data_column = GRN_TRUE;
+        break;
+      }
+    });
+    if (!have_data_column) {
+      grn_table_truncate(ctx, table);
+    }
+    GRN_HASH_EACH(ctx, columns, id, &key, NULL, NULL, {
+      grn_obj *column = grn_ctx_at(ctx, *key);
+      if (column && column->header.type == GRN_COLUMN_INDEX) {
+        grn_obj_reindex(ctx, column);
+      }
+    });
+  }
+  grn_hash_close(ctx, columns);
+}
+
+grn_rc
+grn_obj_reindex(grn_ctx *ctx, grn_obj *obj)
+{
+  GRN_API_ENTER;
+
+  if (!obj) {
+    ERR(GRN_INVALID_ARGUMENT, "[object][reindex] object must not be NULL");
+    GRN_API_RETURN(ctx->rc);
+  }
+
+  switch (obj->header.type) {
+  case GRN_DB :
+    grn_db_reindex(ctx, obj);
+    break;
+  case GRN_TABLE_HASH_KEY :
+  case GRN_TABLE_PAT_KEY :
+  case GRN_TABLE_DAT_KEY :
+    grn_table_reindex(ctx, obj);
+    break;
+  case GRN_COLUMN_INDEX :
+    grn_index_column_rebuild(ctx, obj);
+    break;
+  default :
+    {
+      grn_obj type_name;
+      GRN_TEXT_INIT(&type_name, 0);
+      grn_inspect_type(ctx, &type_name, obj->header.type);
+      ERR(GRN_INVALID_ARGUMENT,
+          "[object][reindex] object must be TABLE_HASH_KEY, "
+          "TABLE_PAT_KEY, TABLE_DAT_KEY or COLUMN_INDEX: <%.*s>",
+          (int)GRN_TEXT_LEN(&type_name),
+          GRN_TEXT_VALUE(&type_name));
+      GRN_OBJ_FIN(ctx, &type_name);
+      GRN_API_RETURN(ctx->rc);
+    }
+    break;
+  }
+
+  GRN_API_RETURN(ctx->rc);
+}

  Modified: lib/proc.c (+34 -0)
===================================================================
--- lib/proc.c    2015-11-04 16:39:28 +0900 (da0542a)
+++ lib/proc.c    2015-11-07 21:11:14 +0900 (ae9baec)
@@ -8190,6 +8190,37 @@ proc_schema(grn_ctx *ctx, int nargs, grn_obj **args,
   return NULL;
 }
 
+static grn_obj *
+proc_reindex(grn_ctx *ctx, int nargs, grn_obj **args,
+             grn_user_data *user_data)
+{
+  grn_obj *object_name;
+  grn_obj *object;
+
+  object_name = VAR(0);
+  if (GRN_TEXT_LEN(object_name) == 0) {
+    object = grn_ctx_db(ctx);
+  } else {
+    object = grn_ctx_get(ctx,
+                         GRN_TEXT_VALUE(object_name),
+                         GRN_TEXT_LEN(object_name));
+    if (!object) {
+      ERR(GRN_INVALID_ARGUMENT,
+          "[reindex] nonexistent object: <%.*s>",
+          (int)GRN_TEXT_LEN(object_name),
+          GRN_TEXT_VALUE(object_name));
+      GRN_OUTPUT_BOOL(GRN_FALSE);
+      return NULL;
+    }
+  }
+
+  grn_obj_reindex(ctx, object);
+
+  GRN_OUTPUT_BOOL(ctx->rc == GRN_SUCCESS);
+
+  return NULL;
+}
+
 #define DEF_VAR(v,name_str) do {\
   (v).name = (name_str);\
   (v).name_size = GRN_STRLEN(name_str);\
@@ -8489,4 +8520,7 @@ grn_db_init_builtin_query(grn_ctx *ctx)
   DEF_COMMAND("column_copy", proc_column_copy, 4, vars);
 
   DEF_COMMAND("schema", proc_schema, 0, vars);
+
+  DEF_VAR(vars[0], "object");
+  DEF_COMMAND("reindex", proc_reindex, 1, vars);
 }

  Modified: lib/sources.am (+2 -0)
===================================================================
--- lib/sources.am    2015-11-04 16:39:28 +0900 (f56aef3)
+++ lib/sources.am    2015-11-07 21:11:14 +0900 (a1c670c)
@@ -27,6 +27,8 @@ libgroonga_la_SOURCES =				\
 	grn_hash.h				\
 	ii.c					\
 	grn_ii.h				\
+	index_column.c				\
+	grn_index_column.h			\
 	io.c					\
 	grn_io.h				\
 	logger.c				\

  Added: test/command/suite/reindex/db.expected (+21 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/reindex/db.expected    2015-11-07 21:11:14 +0900 (2811d4d)
@@ -0,0 +1,21 @@
+table_create Memos TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Memos content COLUMN_SCALAR Text
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto
+[[0,0.0,0.0],true]
+column_create Terms index COLUMN_INDEX|WITH_POSITION Memos content
+[[0,0.0,0.0],true]
+load --table Memos
+[
+{"content": "This is a memo"}
+]
+[[0,0.0,0.0],1]
+delete Terms --key this
+[[0,0.0,0.0],true]
+select Terms --output_columns _key, --sortby _key
+[[0,0.0,0.0],[[[3],[["_key","ShortText"]],["a"],["is"],["memo"]]]]
+reindex
+[[0,0.0,0.0],true]
+select Terms --output_columns _key, --sortby _key
+[[0,0.0,0.0],[[[4],[["_key","ShortText"]],["a"],["is"],["memo"],["this"]]]]

  Added: test/command/suite/reindex/db.test (+19 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/reindex/db.test    2015-11-07 21:11:14 +0900 (92a9650)
@@ -0,0 +1,19 @@
+table_create Memos TABLE_NO_KEY
+column_create Memos content COLUMN_SCALAR Text
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto
+column_create Terms index COLUMN_INDEX|WITH_POSITION Memos content
+
+load --table Memos
+[
+{"content": "This is a memo"}
+]
+
+delete Terms --key this
+select Terms --output_columns _key, --sortby _key
+
+reindex
+
+select Terms --output_columns _key, --sortby _key

  Added: test/command/suite/reindex/index_column.expected (+21 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/reindex/index_column.expected    2015-11-07 21:11:14 +0900 (f06df98)
@@ -0,0 +1,21 @@
+table_create Memos TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Memos content COLUMN_SCALAR Text
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto
+[[0,0.0,0.0],true]
+column_create Terms index COLUMN_INDEX|WITH_POSITION Memos content
+[[0,0.0,0.0],true]
+load --table Memos
+[
+{"content": "This is a memo"}
+]
+[[0,0.0,0.0],1]
+delete Terms --key this
+[[0,0.0,0.0],true]
+select Terms --output_columns _key, --sortby _key
+[[0,0.0,0.0],[[[3],[["_key","ShortText"]],["a"],["is"],["memo"]]]]
+reindex Terms.index
+[[0,0.0,0.0],true]
+select Terms --output_columns _key, --sortby _key
+[[0,0.0,0.0],[[[4],[["_key","ShortText"]],["a"],["is"],["memo"],["this"]]]]

  Added: test/command/suite/reindex/index_column.test (+19 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/reindex/index_column.test    2015-11-07 21:11:14 +0900 (d7e6e40)
@@ -0,0 +1,19 @@
+table_create Memos TABLE_NO_KEY
+column_create Memos content COLUMN_SCALAR Text
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto
+column_create Terms index COLUMN_INDEX|WITH_POSITION Memos content
+
+load --table Memos
+[
+{"content": "This is a memo"}
+]
+
+delete Terms --key this
+select Terms --output_columns _key, --sortby _key
+
+reindex Terms.index
+
+select Terms --output_columns _key, --sortby _key

  Added: test/command/suite/reindex/table.expected (+21 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/reindex/table.expected    2015-11-07 21:11:14 +0900 (c6cc32c)
@@ -0,0 +1,21 @@
+table_create Memos TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Memos content COLUMN_SCALAR Text
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto
+[[0,0.0,0.0],true]
+column_create Terms index COLUMN_INDEX|WITH_POSITION Memos content
+[[0,0.0,0.0],true]
+load --table Memos
+[
+{"content": "This is a memo"}
+]
+[[0,0.0,0.0],1]
+delete Terms --key this
+[[0,0.0,0.0],true]
+select Terms --output_columns _key, --sortby _key
+[[0,0.0,0.0],[[[3],[["_key","ShortText"]],["a"],["is"],["memo"]]]]
+reindex Terms
+[[0,0.0,0.0],true]
+select Terms --output_columns _key, --sortby _key
+[[0,0.0,0.0],[[[4],[["_key","ShortText"]],["a"],["is"],["memo"],["this"]]]]

  Added: test/command/suite/reindex/table.test (+19 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/reindex/table.test    2015-11-07 21:11:14 +0900 (4b514f2)
@@ -0,0 +1,19 @@
+table_create Memos TABLE_NO_KEY
+column_create Memos content COLUMN_SCALAR Text
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto
+column_create Terms index COLUMN_INDEX|WITH_POSITION Memos content
+
+load --table Memos
+[
+{"content": "This is a memo"}
+]
+
+delete Terms --key this
+select Terms --output_columns _key, --sortby _key
+
+reindex Terms
+
+select Terms --output_columns _key, --sortby _key
-------------- next part --------------
HTML����������������������������...
Télécharger 



More information about the Groonga-commit mailing list
Back to archive index