[Groonga-commit] groonga/groonga at 7b1f7a2 [master] token-filter: use token filter specific callbacks instead of generic callbacks

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Oct 10 19:31:43 JST 2014


Kouhei Sutou	2014-10-10 19:31:43 +0900 (Fri, 10 Oct 2014)

  New Revision: 7b1f7a2f620e3ab698c6e0f09e78a8e6048d00ed
  https://github.com/groonga/groonga/commit/7b1f7a2f620e3ab698c6e0f09e78a8e6048d00ed

  Merged 86db854: Merge pull request #214 from groonga/improve-token-filter-api

  Message:
    token-filter: use token filter specific callbacks instead of generic callbacks
    
    Generic callbacks are difficult to understand how to use API because
    its signature doesn't show what arguments are passed.
    
    Specific callbacks are easy to understand how to use API because its
    signature shows what arguments are passed.

  Modified files:
    include/groonga/token_filter.h
    include/groonga/tokenizer.h
    lib/db.h
    lib/token.c
    lib/token.h
    lib/token_filter.c
    lib/tokenizer.c
    plugins/token_filters/stop_word.c

  Modified: include/groonga/token_filter.h (+29 -10)
===================================================================
--- include/groonga/token_filter.h    2014-10-05 12:36:13 +0900 (347715a)
+++ include/groonga/token_filter.h    2014-10-10 19:31:43 +0900 (2ae553b)
@@ -26,24 +26,43 @@
 extern "C" {
 #endif  /* __cplusplus */
 
+typedef void *grn_token_filter_init_func(grn_ctx *ctx,
+                                         grn_obj *table,
+                                         grn_token_mode mode);
+
+typedef void grn_token_filter_filter_func(grn_ctx *ctx,
+                                          grn_token *current_token,
+                                          grn_token *next_token,
+                                          void *user_data);
+
+typedef void grn_token_filter_fin_func(grn_ctx *ctx,
+                                       void *user_data);
+
+
 /*
   grn_token_filter_register() registers a plugin to the database which is
   associated with `ctx'. `plugin_name_ptr' and `plugin_name_length' specify the
   plugin name. Alphabetic letters ('A'-'Z' and 'a'-'z'), digits ('0'-'9') and
-  an underscore ('_') are capable characters. `init', `next' and `fin' specify
-  the plugin functions. `init' is called for initializing a token_filter for a
-  document or query. `next' is called for extracting tokens one by one. `fin'
-  is called for finalizing a token_filter. grn_token_filter_register() returns
-  GRN_SUCCESS on success, an error code on failure. See "groonga.h" for more
-  details of grn_proc_func and grn_user_data, that is used as an argument of
-  grn_proc_func.
+  an underscore ('_') are capable characters.
+
+  `init', `filter' and `fin' specify the plugin functions.
+
+  `init' is called for initializing a token_filter for a document or
+  query.
+
+  `filter' is called for filtering tokens one by one.
+
+  `fin' is called for finalizing a token_filter.
+
+  grn_token_filter_register() returns GRN_SUCCESS on success, an error
+  code on failure.
  */
 GRN_PLUGIN_EXPORT grn_rc grn_token_filter_register(grn_ctx *ctx,
                                                    const char *plugin_name_ptr,
                                                    int plugin_name_length,
-                                                   grn_proc_func *init,
-                                                   grn_proc_func *next,
-                                                   grn_proc_func *fin);
+                                                   grn_token_filter_init_func *init,
+                                                   grn_token_filter_filter_func *filter,
+                                                   grn_token_filter_fin_func *fin);
 
 #ifdef __cplusplus
 }  /* extern "C" */

  Modified: include/groonga/tokenizer.h (+15 -0)
===================================================================
--- include/groonga/tokenizer.h    2014-10-05 12:36:13 +0900 (b57a513)
+++ include/groonga/tokenizer.h    2014-10-10 19:31:43 +0900 (9792e01)
@@ -193,6 +193,21 @@ typedef unsigned int grn_tokenizer_status;
 #define GRN_TOKENIZER_CONTINUE GRN_TOKENIZER_TOKEN_CONTINUE
 #define GRN_TOKENIZER_LAST     GRN_TOKENIZER_TOKEN_LAST
 
+typedef struct _grn_token grn_token;
+
+GRN_PLUGIN_EXPORT grn_obj *grn_token_get_data(grn_ctx *ctx,
+                                              grn_token *token);
+GRN_PLUGIN_EXPORT grn_rc grn_token_set_data(grn_ctx *ctx,
+                                            grn_token *token,
+                                            const char *str_ptr,
+                                            int str_length);
+GRN_PLUGIN_EXPORT grn_tokenizer_status grn_token_get_status(grn_ctx *ctx,
+                                                            grn_token *token);
+GRN_PLUGIN_EXPORT grn_rc grn_token_set_status(grn_ctx *ctx,
+                                              grn_token *token,
+                                              grn_tokenizer_status status);
+
+
 /*
   grn_tokenizer_token_push() pushes the next token into `token'. Note that
   grn_tokenizer_token_push() does not make a copy of the given string. This

  Modified: lib/db.h (+12 -0)
===================================================================
--- lib/db.h    2014-10-05 12:36:13 +0900 (e9d2278)
+++ lib/db.h    2014-10-10 19:31:43 +0900 (f2f9556)
@@ -29,6 +29,8 @@
 #include "store.h"
 #endif /* GRN_STORE_H */
 
+#include <groonga/token_filter.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -185,6 +187,16 @@ struct _grn_proc {
 
   grn_selector_func *selector;
 
+  union {
+    struct {
+      grn_token_filter_init_func   *init;
+      grn_token_filter_filter_func *filter;
+      grn_token_filter_fin_func    *fin;
+    } token_filter;
+  } callbacks;
+
+  void *user_data;
+
   grn_id module;
   //  uint32_t nargs;
   //  uint32_t nresults;

  Modified: lib/token.c (+40 -61)
===================================================================
--- lib/token.c    2014-10-05 12:36:13 +0900 (f0a0ff7)
+++ lib/token.c    2014-10-10 19:31:43 +0900 (00b3de2)
@@ -501,7 +501,6 @@ grn_token_cursor_open_initialize_token_filters(grn_ctx *ctx,
 {
   grn_obj *token_filters = token_cursor->token_filters;
   unsigned int i, n_token_filters;
-  grn_obj mode;
 
   if (token_filters) {
     n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *);
@@ -509,41 +508,15 @@ grn_token_cursor_open_initialize_token_filters(grn_ctx *ctx,
     n_token_filters = 0;
   }
 
-  if (n_token_filters == 0) {
-    token_cursor->token_filter_ctxs = NULL;
-    return;
-  }
-
-  token_cursor->token_filter_ctxs =
-    GRN_MALLOC(sizeof(grn_proc_ctx) * n_token_filters);
-  if (!token_cursor->token_filter_ctxs) {
-    ERR(GRN_NO_MEMORY_AVAILABLE,
-        "[token-cursor][open] failed to allocate token filter contexts");
-    return;
-  }
-
-  GRN_UINT32_INIT(&mode, 0);
-  GRN_UINT32_SET(ctx, &mode, token_cursor->mode);
   for (i = 0; i < n_token_filters; i++) {
-    grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i);
-    grn_proc_ctx *token_filter_ctx = &token_cursor->token_filter_ctxs[i];
-    int n_args = 0;
-    grn_obj *args[2];
-
-    token_filter_ctx->caller = NULL;
-    token_filter_ctx->user_data.ptr = NULL;
-    token_filter_ctx->proc = (grn_proc *)token_filter;
-    token_filter_ctx->hooks = NULL;
-    token_filter_ctx->currh = NULL;
-    token_filter_ctx->phase = PROC_INIT;
-
-    args[n_args++] = token_cursor->table;
-    args[n_args++] = &mode;
-    ((grn_proc *)token_filter)->funcs[PROC_INIT](ctx,
-                                                 n_args, args,
-                                                 &token_filter_ctx->user_data);
+    grn_obj *token_filter_object = GRN_PTR_VALUE_AT(token_filters, i);
+    grn_proc *token_filter = (grn_proc *)token_filter_object;
+
+    token_filter->user_data =
+      token_filter->callbacks.token_filter.init(ctx,
+                                                token_cursor->table,
+                                                token_cursor->mode);
   }
-  GRN_OBJ_FIN(ctx, &mode);
 }
 
 grn_token_cursor *
@@ -625,45 +598,58 @@ grn_token_cursor_open(grn_ctx *ctx, grn_obj *table,
 static int
 grn_token_cursor_next_apply_token_filters(grn_ctx *ctx,
                                           grn_token_cursor *token_cursor,
-                                          grn_obj *current_token,
+                                          grn_obj *current_token_data,
                                           grn_obj *status)
 {
   grn_obj *token_filters = token_cursor->token_filters;
   unsigned int i, n_token_filters;
+  grn_token current_token;
+  grn_token next_token;
 
   if (token_filters) {
     n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *);
   } else {
     n_token_filters = 0;
   }
+
+  GRN_TEXT_INIT(&(current_token.data), GRN_OBJ_DO_SHALLOW_COPY);
+  GRN_TEXT_SET(ctx, &(current_token.data),
+               GRN_TEXT_VALUE(current_token_data),
+               GRN_TEXT_LEN(current_token_data));
+  current_token.status = GRN_INT32_VALUE(status);
+  GRN_TEXT_INIT(&(next_token.data), GRN_OBJ_DO_SHALLOW_COPY);
+  GRN_TEXT_SET(ctx, &(next_token.data),
+               GRN_TEXT_VALUE(&(current_token.data)),
+               GRN_TEXT_LEN(&(current_token.data)));
+  next_token.status = current_token.status;
+
   for (i = 0; i < n_token_filters; i++) {
-    grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i);
-    grn_proc_ctx *token_filter_ctx = &token_cursor->token_filter_ctxs[i];
-    int n_args = 0;
-    grn_obj *args[2];
+    grn_obj *token_filter_object = GRN_PTR_VALUE_AT(token_filters, i);
+    grn_proc *token_filter = (grn_proc *)token_filter_object;
 
 #define SKIP_FLAGS\
     (GRN_TOKENIZER_TOKEN_SKIP |\
      GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION)
-    if (GRN_INT32_VALUE(status) & SKIP_FLAGS) {
+    if (current_token.status & SKIP_FLAGS) {
       break;
     }
 #undef SKIP_FLAGS
 
-    args[n_args++] = current_token;
-    args[n_args++] = status;
-    ((grn_proc *)token_filter)->funcs[PROC_NEXT](ctx,
-                                                 n_args,
-                                                 args,
-                                                 &token_filter_ctx->user_data);
-    status = grn_ctx_pop(ctx);
-    current_token = grn_ctx_pop(ctx);
+    token_filter->callbacks.token_filter.filter(ctx,
+                                                &current_token,
+                                                &next_token,
+                                                token_filter->user_data);
+    GRN_TEXT_SET(ctx, &(current_token.data),
+                 GRN_TEXT_VALUE(&(next_token.data)),
+                 GRN_TEXT_LEN(&(next_token.data)));
+    current_token.status = next_token.status;
   }
 
-  token_cursor->curr = (const unsigned char *)GRN_TEXT_VALUE(current_token);
-  token_cursor->curr_size = GRN_TEXT_LEN(current_token);
+  token_cursor->curr =
+    (const unsigned char *)GRN_TEXT_VALUE(&(current_token.data));
+  token_cursor->curr_size = GRN_TEXT_LEN(&(current_token.data));
 
-  return GRN_INT32_VALUE(status);
+  return current_token.status;
 }
 
 grn_id
@@ -811,17 +797,10 @@ grn_token_cursor_close_token_filters(grn_ctx *ctx,
     n_token_filters = 0;
   }
   for (i = 0; i < n_token_filters; i++) {
-    grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i);
-    grn_proc_ctx *token_filter_ctx = &token_cursor->token_filter_ctxs[i];
-
-    ((grn_proc *)token_filter)->funcs[PROC_FIN](ctx,
-                                                1,
-                                                &token_cursor->table,
-                                                &token_filter_ctx->user_data);
-  }
+    grn_obj *token_filter_object = GRN_PTR_VALUE_AT(token_filters, i);
+    grn_proc *token_filter = (grn_proc *)token_filter_object;
 
-  if (token_cursor->token_filter_ctxs) {
-    GRN_FREE(token_cursor->token_filter_ctxs);
+    token_filter->callbacks.token_filter.fin(ctx, token_filter->user_data);
   }
 }
 

  Modified: lib/token.h (+5 -1)
===================================================================
--- lib/token.h    2014-10-05 12:36:13 +0900 (89a2eb0)
+++ lib/token.h    2014-10-10 19:31:43 +0900 (8689307)
@@ -46,6 +46,11 @@ typedef enum {
   GRN_TOKEN_NOT_FOUND
 } grn_token_status;
 
+struct _grn_token {
+  grn_obj data;
+  grn_tokenizer_status status;
+};
+
 typedef struct {
   grn_obj *table;
   const unsigned char *orig;
@@ -61,7 +66,6 @@ typedef struct {
   grn_obj *tokenizer;
   grn_proc_ctx pctx;
   grn_obj *token_filters;
-  grn_proc_ctx *token_filter_ctxs;
   uint32_t variant;
   grn_obj *nstr;
 } grn_token_cursor;

  Modified: lib/token_filter.c (+22 -19)
===================================================================
--- lib/token_filter.c    2014-10-05 12:36:13 +0900 (470f9f1)
+++ lib/token_filter.c    2014-10-10 19:31:43 +0900 (910bb9a)
@@ -19,38 +19,41 @@
 #include <string.h>
 
 #include "groonga_in.h"
+#include "db.h"
 #include <groonga/token_filter.h>
 
 grn_rc
 grn_token_filter_register(grn_ctx *ctx,
                           const char *plugin_name_ptr,
                           int plugin_name_length,
-                          grn_proc_func *init,
-                          grn_proc_func *next,
-                          grn_proc_func *fin)
+                          grn_token_filter_init_func *init,
+                          grn_token_filter_filter_func *filter,
+                          grn_token_filter_fin_func *fin)
 {
-  grn_expr_var vars[] = {
-    { NULL, 0 },
-    { NULL, 0 },
-    { NULL, 0 }
-  };
-  GRN_TEXT_INIT(&vars[0].value, 0);
-  GRN_TEXT_INIT(&vars[1].value, 0);
-  GRN_UINT32_INIT(&vars[2].value, 0);
-
   if (plugin_name_length == -1) {
     plugin_name_length = strlen(plugin_name_ptr);
   }
+
   {
-    grn_obj * const obj = grn_proc_create(ctx,
-                                          plugin_name_ptr,
-                                          plugin_name_length,
-                                          GRN_PROC_TOKENIZER,
-                                          init, next, fin, 3, vars);
-    if (obj == NULL) {
-      GRN_PLUGIN_ERROR(ctx, GRN_TOKEN_FILTER_ERROR, "grn_proc_create() failed");
+    grn_obj *token_filter_object = grn_proc_create(ctx,
+                                                   plugin_name_ptr,
+                                                   plugin_name_length,
+                                                   GRN_PROC_TOKENIZER,
+                                                   NULL, NULL, NULL, 0, NULL);
+    if (token_filter_object == NULL) {
+      GRN_PLUGIN_ERROR(ctx, GRN_TOKEN_FILTER_ERROR,
+                       "[token-filter][%.*s] failed to grn_proc_create()",
+                       plugin_name_length, plugin_name_ptr);
       return ctx->rc;
     }
+
+    {
+      grn_proc *token_filter = (grn_proc *)token_filter_object;
+      token_filter->callbacks.token_filter.init = init;
+      token_filter->callbacks.token_filter.filter = filter;
+      token_filter->callbacks.token_filter.fin = fin;
+    }
   }
+
   return GRN_SUCCESS;
 }

  Modified: lib/tokenizer.c (+57 -1)
===================================================================
--- lib/tokenizer.c    2014-10-05 12:36:13 +0900 (fc3495d)
+++ lib/tokenizer.c    2014-10-10 19:31:43 +0900 (ac4628b)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2012 Brazil
+  Copyright(C) 2012-2014 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -318,3 +318,59 @@ grn_tokenizer_register(grn_ctx *ctx, const char *plugin_name_ptr,
   }
   return GRN_SUCCESS;
 }
+
+grn_obj *
+grn_token_get_data(grn_ctx *ctx, grn_token *token)
+{
+  GRN_API_ENTER;
+  if (!token) {
+    ERR(GRN_INVALID_ARGUMENT, "token must not be NULL");
+    GRN_API_RETURN(NULL);
+  }
+  GRN_API_RETURN(&(token->data));
+}
+
+grn_rc
+grn_token_set_data(grn_ctx *ctx,
+                   grn_token *token,
+                   const char *str_ptr,
+                   int str_length)
+{
+  GRN_API_ENTER;
+  if (!token) {
+    ERR(GRN_INVALID_ARGUMENT, "token must not be NULL");
+    goto exit;
+  }
+  if (str_length == -1) {
+    str_length = strlen(str_ptr);
+  }
+  GRN_TEXT_SET(ctx, &(token->data), str_ptr, str_length);
+exit:
+  GRN_API_RETURN(ctx->rc);
+}
+
+grn_tokenizer_status
+grn_token_get_status(grn_ctx *ctx, grn_token *token)
+{
+  GRN_API_ENTER;
+  if (!token) {
+    ERR(GRN_INVALID_ARGUMENT, "token must not be NULL");
+    GRN_API_RETURN(GRN_TOKENIZER_TOKEN_CONTINUE);
+  }
+  GRN_API_RETURN(token->status);
+}
+
+grn_rc
+grn_token_set_status(grn_ctx *ctx,
+                     grn_token *token,
+                     grn_tokenizer_status status)
+{
+  GRN_API_ENTER;
+  if (!token) {
+    ERR(GRN_INVALID_ARGUMENT, "token must not be NULL");
+    goto exit;
+  }
+  token->status = status;
+exit:
+  GRN_API_RETURN(ctx->rc);
+}

  Modified: plugins/token_filters/stop_word.c (+25 -28)
===================================================================
--- plugins/token_filters/stop_word.c    2014-10-05 12:36:13 +0900 (6bb6311)
+++ plugins/token_filters/stop_word.c    2014-10-10 19:31:43 +0900 (21451f0)
@@ -33,8 +33,8 @@ typedef struct {
   grn_tokenizer_token token;
 } grn_stop_word_token_filter;
 
-static grn_obj *
-stop_word_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+static void *
+stop_word_init(grn_ctx *ctx, grn_obj *table, grn_token_mode mode)
 {
   grn_stop_word_token_filter *token_filter;
 
@@ -46,8 +46,8 @@ stop_word_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data
     return NULL;
   }
 
-  token_filter->table = args[0];
-  token_filter->mode = GRN_UINT32_VALUE(args[1]);
+  token_filter->table = table;
+  token_filter->mode = mode;
   token_filter->column = grn_obj_column(ctx,
                                         token_filter->table,
                                         COLUMN_NAME,
@@ -70,27 +70,29 @@ stop_word_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data
     return NULL;
   }
 
-  user_data->ptr = token_filter;
-
   GRN_BOOL_INIT(&(token_filter->value), 0);
   grn_tokenizer_token_init(ctx, &(token_filter->token));
 
-  return NULL;
+  return token_filter;
 }
 
-static grn_obj *
-stop_word_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+static void
+stop_word_filter(grn_ctx *ctx,
+                 grn_token *current_token,
+                 grn_token *next_token,
+                 void *user_data)
 {
-  grn_stop_word_token_filter *token_filter = user_data->ptr;
-  grn_obj *current_token = args[0];
-  int status = GRN_INT32_VALUE(args[1]);
+  grn_stop_word_token_filter *token_filter = user_data;
 
   if (token_filter->mode == GRN_TOKEN_GET) {
     grn_id id;
+    grn_obj *data;
+
+    data = grn_token_get_data(ctx, current_token);
     id = grn_table_get(ctx,
                        token_filter->table,
-                       GRN_TEXT_VALUE(current_token),
-                       GRN_TEXT_LEN(current_token));
+                       GRN_TEXT_VALUE(data),
+                       GRN_TEXT_LEN(data));
     if (id != GRN_ID_NIL) {
       GRN_BULK_REWIND(&(token_filter->value));
       grn_obj_get_value(ctx,
@@ -98,32 +100,27 @@ stop_word_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data
                         id,
                         &(token_filter->value));
       if (GRN_BOOL_VALUE(&(token_filter->value))) {
+        grn_tokenizer_status status;
+        status = grn_token_get_status(ctx, current_token);
         status |= GRN_TOKENIZER_TOKEN_SKIP;
+        grn_token_set_status(ctx, next_token, status);
       }
     }
   }
-
-  grn_tokenizer_token_push(ctx,
-                           &(token_filter->token),
-                           GRN_TEXT_VALUE(current_token),
-                           GRN_TEXT_LEN(current_token),
-                           status);
-
-  return NULL;
 }
 
-static grn_obj *
-stop_word_fin(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+static void
+stop_word_fin(grn_ctx *ctx, void *user_data)
 {
-  grn_stop_word_token_filter *token_filter = user_data->ptr;
+  grn_stop_word_token_filter *token_filter = user_data;
   if (!token_filter) {
-    return NULL;
+    return;
   }
+
   grn_tokenizer_token_fin(ctx, &(token_filter->token));
   grn_obj_unlink(ctx, token_filter->column);
   grn_obj_unlink(ctx, &(token_filter->value));
   GRN_PLUGIN_FREE(ctx, token_filter);
-  return NULL;
 }
 
 grn_rc
@@ -140,7 +137,7 @@ GRN_PLUGIN_REGISTER(grn_ctx *ctx)
   rc = grn_token_filter_register(ctx,
                                  "TokenFilterStopWord", -1,
                                  stop_word_init,
-                                 stop_word_next,
+                                 stop_word_filter,
                                  stop_word_fin);
 
   return rc;
-------------- next part --------------
HTML����������������������������...
Télécharger 



More information about the Groonga-commit mailing list
Back to archive index