[Groonga-commit] groonga/groonga at 531e0de [master] Copy tokenize function to util to share internally

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Feb 3 14:15:00 JST 2017


Kouhei Sutou	2017-02-03 14:15:00 +0900 (Fri, 03 Feb 2017)

  New Revision: 531e0dee22f9228aa24428d4b3ee6987142199f4
  https://github.com/groonga/groonga/commit/531e0dee22f9228aa24428d4b3ee6987142199f4

  Message:
    Copy tokenize function to util to share internally

  Modified files:
    lib/db.c
    lib/grn_util.h
    lib/util.c

  Modified: lib/db.c (+3 -3)
===================================================================
--- lib/db.c    2017-02-03 13:15:58 +0900 (2a88714)
+++ lib/db.c    2017-02-03 14:15:00 +0900 (8c92436)
@@ -13382,7 +13382,7 @@ grn_obj_columns(grn_ctx *ctx, grn_obj *table,
   grn_obj *col;
   const char *p = (char *)str, *q, *r, *pe = p + str_size, *tokbuf[256];
   while (p < pe) {
-    int i, n = tokenize(p, pe - p, tokbuf, 256, &q);
+    int i, n = grn_tokenize(p, pe - p, tokbuf, 256, &q);
     for (i = 0; i < n; i++) {
       r = tokbuf[i];
       while (p < r && (' ' == *p || ',' == *p)) { p++; }
@@ -13473,7 +13473,7 @@ grn_table_sort_key_from_str_geo(grn_ctx *ctx, const char *str, unsigned int str_
   p = str;
   if ((tokbuf = GRN_MALLOCN(const char *, str_size))) {
     grn_id domain = GRN_ID_NIL;
-    int i, n = tokenize(str, str_size, tokbuf, str_size, NULL);
+    int i, n = grn_tokenize(str, str_size, tokbuf, str_size, NULL);
     if ((keys = GRN_MALLOCN(grn_table_sort_key, n))) {
       k = keys;
       for (i = 0; i < n; i++) {
@@ -13537,7 +13537,7 @@ grn_table_sort_key_from_str(grn_ctx *ctx, const char *str, unsigned int str_size
     return keys;
   }
   if ((tokbuf = GRN_MALLOCN(const char *, str_size))) {
-    int i, n = tokenize(str, str_size, tokbuf, str_size, NULL);
+    int i, n = grn_tokenize(str, str_size, tokbuf, str_size, NULL);
     if ((keys = GRN_MALLOCN(grn_table_sort_key, n))) {
       k = keys;
       for (i = 0; i < n; i++) {

  Modified: lib/grn_util.h (+4 -0)
===================================================================
--- lib/grn_util.h    2017-02-03 13:15:58 +0900 (38dbd8d)
+++ lib/grn_util.h    2017-02-03 14:15:00 +0900 (b9ed347)
@@ -40,6 +40,10 @@ void grn_p_record(grn_ctx *ctx, grn_obj *table, grn_id id);
 int grn_mkstemp(char *path_template);
 grn_bool grn_path_exist(const char *path);
 
+int grn_tokenize(const char *str, size_t str_len,
+                 const char **tokbuf, int buf_size,
+                 const char **rest);
+
 #ifdef __cplusplus
 }
 #endif

  Modified: lib/util.c (+35 -0)
===================================================================
--- lib/util.c    2017-02-03 13:15:58 +0900 (62bb03c)
+++ lib/util.c    2017-02-03 14:15:00 +0900 (166cdfa)
@@ -1572,3 +1572,38 @@ grn_path_exist(const char *path)
   struct stat status;
   return stat(path, &status) == 0;
 }
+
+/* todo : refine */
+/*
+ * grn_tokenize splits a string into at most buf_size tokens and
+ * returns the number of tokens. The ending address of each token is
+ * written into tokbuf. Delimiters are ' ' and ','.
+ * Then, the address to the remaining is set to rest.
+ */
+int
+grn_tokenize(const char *str, size_t str_len,
+             const char **tokbuf, int buf_size,
+             const char **rest)
+{
+  const char **tok = tokbuf, **tok_end = tokbuf + buf_size;
+  if (buf_size > 0) {
+    const char *str_end = str + str_len;
+    while (str < str_end && (' ' == *str || ',' == *str)) { str++; }
+    for (;;) {
+      if (str == str_end) {
+        *tok++ = str;
+        break;
+      }
+      if (' ' == *str || ',' == *str) {
+        /* *str = '\0'; */
+        *tok++ = str;
+        if (tok == tok_end) { break; }
+        do { str++; } while (str < str_end && (' ' == *str || ',' == *str));
+      } else {
+        str++;
+      }
+    }
+  }
+  if (rest) { *rest = str; }
+  return tok - tokbuf;
+}
-------------- next part --------------
HTML����������������������������...
Télécharger 



More information about the Groonga-commit mailing list
Back to archive index