Kouhei Sutou 2019-02-07 14:48:48 +0900 (Thu, 07 Feb 2019) Revision: 81dd354377cd870f74736988ad1f6d5181810bf8 https://github.com/groonga/groonga/commit/81dd354377cd870f74736988ad1f6d5181810bf8 Message: TokenTable: add a new tokenizer that tokenizes existing keys Added files: test/command/suite/select/query/match/with_index/token_table/same.expected test/command/suite/select/query/match/with_index/token_table/same.test test/command/suite/tokenizers/table/match.expected test/command/suite/tokenizers/table/match.test test/command/suite/tokenizers/table/no_table.expected test/command/suite/tokenizers/table/no_table.test test/command/suite/tokenizers/table/nonexistent_table.expected test/command/suite/tokenizers/table/nonexistent_table.test Modified files: lib/tokenizers.c test/command/suite/schema/plugins.expected test/command/suite/schema/tables/columns/compress/lz4.expected test/command/suite/schema/tables/columns/compress/zlib.expected test/command/suite/schema/tables/columns/compress/zstd.expected test/command/suite/schema/tables/columns/type/index_medium.expected test/command/suite/schema/tables/columns/type/index_small.expected test/command/suite/schema/tables/columns/type/scalar.expected test/command/suite/schema/tables/columns/type/vector.expected test/command/suite/schema/tables/normalizer.expected test/command/suite/schema/tables/normalizer_with_options.expected test/command/suite/schema/tables/token_filters.expected test/command/suite/schema/tables/token_filters_with_options.expected test/command/suite/schema/tables/tokenizer.expected test/command/suite/schema/tables/tokenizer_with_options.expected test/command/suite/schema/tables/type/array.expected test/command/suite/schema/tables/type/hash_table.expected test/command/suite/schema/tables/value_type/reference.expected test/command/suite/schema/tables/value_type/type.expected test/command/suite/tokenizer_list/default.expected Modified: lib/tokenizers.c (+250 -0) =================================================================== --- lib/tokenizers.c 2019-02-07 10:54:48 +0900 (d10118356) +++ lib/tokenizers.c 2019-02-07 14:48:48 +0900 (41f8ee843) @@ -1764,6 +1764,249 @@ pattern_fin(grn_ctx *ctx, void *user_data) GRN_FREE(tokenizer); } +/* table tokenizer */ + +typedef struct { + grn_obj *table; +} grn_table_options; + +typedef struct { + grn_tokenizer_token token; + grn_tokenizer_query *query; + grn_table_options *options; + grn_bool have_tokenized_delimiter; + grn_encoding encoding; + const unsigned char *start; + const unsigned char *current; + const unsigned char *next; + const unsigned char *end; + grn_pat_scan_hit hits[1024]; + int n_hits; + int current_hit; +} grn_table_tokenizer; + +static void +table_options_init(grn_table_options *options) +{ + options->table = NULL; +} + +static void * +table_open_options(grn_ctx *ctx, + grn_obj *tokenizer, + grn_obj *raw_options, + void *user_data) +{ + grn_table_options *options; + + options = GRN_MALLOC(sizeof(grn_table_options)); + if (!options) { + ERR(GRN_NO_MEMORY_AVAILABLE, + "[tokenizer][table] " + "failed to allocate memory for options"); + return NULL; + } + + table_options_init(options); + GRN_OPTION_VALUES_EACH_BEGIN(ctx, raw_options, i, name, name_length) { + grn_raw_string name_raw; + name_raw.value = name; + name_raw.length = name_length; + + if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "table")) { + const char *name; + unsigned int name_length; + grn_id domain; + + name_length = grn_vector_get_element(ctx, + raw_options, + i, + &name, + NULL, + &domain); + if (grn_type_id_is_text_family(ctx, domain) && name_length > 0) { + options->table = grn_ctx_get(ctx, name, name_length); + if (!options->table) { + ERR(GRN_INVALID_ARGUMENT, + "[tokenizer][table] nonexistent table: <%.*s>", + (int)name_length, name); + break; + } + if (options->table->header.type != GRN_TABLE_PAT_KEY) { + grn_obj inspected; + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, options->table); + ERR(GRN_INVALID_ARGUMENT, + "[tokenizer][table] table must be a patricia trie table: " + "<%.*s>: <%.*s>", + (int)name_length, + name, + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + break; + } + } + } + } GRN_OPTION_VALUES_EACH_END(); + + if (ctx->rc == GRN_SUCCESS && !options->table) { + ERR(GRN_INVALID_ARGUMENT, + "[tokenizer][table] table isn't specified"); + } + + return options; +} + +static void +table_close_options(grn_ctx *ctx, void *data) +{ + grn_table_options *options = data; + GRN_FREE(options); +} + +static void * +table_init(grn_ctx *ctx, grn_tokenizer_query *query) +{ + grn_obj *lexicon = grn_tokenizer_query_get_lexicon(ctx, query); + grn_table_options *options; + grn_table_tokenizer *tokenizer; + + options = grn_table_cache_default_tokenizer_options(ctx, + lexicon, + table_open_options, + table_close_options, + NULL); + if (ctx->rc != GRN_SUCCESS) { + return NULL; + } + + if (!(tokenizer = GRN_MALLOC(sizeof(grn_table_tokenizer)))) { + ERR(GRN_NO_MEMORY_AVAILABLE, + "[tokenizer][table] " + "memory allocation to grn_table_tokenizer failed"); + return NULL; + } + + tokenizer->query = query; + tokenizer->options = options; + + { + const char *raw_string; + size_t raw_string_length; + grn_encoding encoding; + + raw_string = grn_tokenizer_query_get_raw_string(ctx, + tokenizer->query, + &raw_string_length); + encoding = grn_tokenizer_query_get_encoding(ctx, tokenizer->query); + tokenizer->have_tokenized_delimiter = + grn_tokenizer_have_tokenized_delimiter(ctx, + raw_string, + raw_string_length, + encoding); + tokenizer->encoding = encoding; + } + { + grn_obj *string; + const char *normalized; + unsigned int normalized_length_in_bytes; + + string = grn_tokenizer_query_get_normalized_string(ctx, tokenizer->query); + grn_string_get_normalized(ctx, + string, + &normalized, &normalized_length_in_bytes, + NULL); + tokenizer->start = (const unsigned char *)normalized; + tokenizer->next = tokenizer->start; + tokenizer->current = tokenizer->start; + tokenizer->end = tokenizer->start + normalized_length_in_bytes; + } + + tokenizer->n_hits = 0; + tokenizer->current_hit = -1; + + return tokenizer; +} + +static void +table_scan(grn_ctx *ctx, + grn_table_tokenizer *tokenizer) +{ + const char *rest; + tokenizer->n_hits = grn_pat_scan(ctx, + (grn_pat *)(tokenizer->options->table), + tokenizer->next, + tokenizer->end - tokenizer->next, + tokenizer->hits, + sizeof(tokenizer->hits) / + sizeof(*(tokenizer->hits)), + &rest); + tokenizer->current = tokenizer->next; + tokenizer->next = rest; + tokenizer->current_hit = 0; +} + +static void +table_next(grn_ctx *ctx, + grn_tokenizer_query *query, + grn_token *token, + void *user_data) +{ + grn_table_tokenizer *tokenizer = user_data; + + if (tokenizer->have_tokenized_delimiter) { + unsigned int rest_length; + rest_length = tokenizer->end - tokenizer->next; + tokenizer->next = + (unsigned char *)grn_tokenizer_next_by_tokenized_delimiter( + ctx, + token, + (const char *)tokenizer->next, + rest_length, + tokenizer->encoding); + } else { + if (tokenizer->current_hit == -1) { + table_scan(ctx, tokenizer); + } + if (tokenizer->current_hit < tokenizer->n_hits) { + grn_pat_scan_hit *hit = &(tokenizer->hits[tokenizer->current_hit]); + grn_token_set_data(ctx, + token, + tokenizer->current + hit->offset, + hit->length); + tokenizer->current_hit++; + if (tokenizer->current_hit == tokenizer->n_hits) { + grn_token_status status = GRN_TOKEN_CONTINUE; + tokenizer->current_hit = -1; + if (tokenizer->next != tokenizer->end) { + table_scan(ctx, tokenizer); + } + if (tokenizer->next == tokenizer->end) { + status = GRN_TOKEN_LAST; + } + grn_token_set_status(ctx, token, status); + } else { + grn_token_set_status(ctx, token, GRN_TOKEN_CONTINUE); + } + } else { + grn_token_set_data(ctx, token, NULL, 0); + grn_token_set_status(ctx, token, GRN_TOKEN_LAST); + } + } +} + +static void +table_fin(grn_ctx *ctx, void *user_data) +{ + grn_table_tokenizer *tokenizer = user_data; + + if (!tokenizer) { + return; + } + GRN_FREE(tokenizer); +} + /* external */ grn_rc @@ -1958,6 +2201,13 @@ grn_db_init_builtin_tokenizers(grn_ctx *ctx) grn_tokenizer_set_next_func(ctx, tokenizer, pattern_next); grn_tokenizer_set_fin_func(ctx, tokenizer, pattern_fin); } + { + grn_obj *tokenizer; + tokenizer = grn_tokenizer_create(ctx, "TokenTable", -1); + grn_tokenizer_set_init_func(ctx, tokenizer, table_init); + grn_tokenizer_set_next_func(ctx, tokenizer, table_next); + grn_tokenizer_set_fin_func(ctx, tokenizer, table_fin); + } return GRN_SUCCESS; } Modified: test/command/suite/schema/plugins.expected (+7 -3) =================================================================== --- test/command/suite/schema/plugins.expected 2019-02-07 10:54:48 +0900 (c492ea356) +++ test/command/suite/schema/plugins.expected 2019-02-07 14:48:48 +0900 (caad98793) @@ -191,6 +191,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -202,15 +206,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Modified: test/command/suite/schema/tables/columns/compress/lz4.expected (+7 -3) =================================================================== --- test/command/suite/schema/tables/columns/compress/lz4.expected 2019-02-07 10:54:48 +0900 (486b12a9a) +++ test/command/suite/schema/tables/columns/compress/lz4.expected 2019-02-07 14:48:48 +0900 (e24b459cf) @@ -190,6 +190,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -201,15 +205,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Modified: test/command/suite/schema/tables/columns/compress/zlib.expected (+7 -3) =================================================================== --- test/command/suite/schema/tables/columns/compress/zlib.expected 2019-02-07 10:54:48 +0900 (d2e422f81) +++ test/command/suite/schema/tables/columns/compress/zlib.expected 2019-02-07 14:48:48 +0900 (7d5d1bae9) @@ -190,6 +190,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -201,15 +205,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Modified: test/command/suite/schema/tables/columns/compress/zstd.expected (+7 -3) =================================================================== --- test/command/suite/schema/tables/columns/compress/zstd.expected 2019-02-07 10:54:48 +0900 (60134b304) +++ test/command/suite/schema/tables/columns/compress/zstd.expected 2019-02-07 14:48:48 +0900 (4ceef10ba) @@ -190,6 +190,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -201,15 +205,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Modified: test/command/suite/schema/tables/columns/type/index_medium.expected (+8 -4) =================================================================== --- test/command/suite/schema/tables/columns/type/index_medium.expected 2019-02-07 10:54:48 +0900 (ffaa17a37) +++ test/command/suite/schema/tables/columns/type/index_medium.expected 2019-02-07 14:48:48 +0900 (3bc08528a) @@ -196,6 +196,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -207,15 +211,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, @@ -354,7 +358,7 @@ schema "options": null }, "normalizer": { - "id": 80, + "id": 81, "name": "NormalizerAuto", "options": null }, Modified: test/command/suite/schema/tables/columns/type/index_small.expected (+8 -4) =================================================================== --- test/command/suite/schema/tables/columns/type/index_small.expected 2019-02-07 10:54:48 +0900 (8864b4092) +++ test/command/suite/schema/tables/columns/type/index_small.expected 2019-02-07 14:48:48 +0900 (56432ad5a) @@ -196,6 +196,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -207,15 +211,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, @@ -354,7 +358,7 @@ schema "options": null }, "normalizer": { - "id": 80, + "id": 81, "name": "NormalizerAuto", "options": null }, Modified: test/command/suite/schema/tables/columns/type/scalar.expected (+7 -3) =================================================================== --- test/command/suite/schema/tables/columns/type/scalar.expected 2019-02-07 10:54:48 +0900 (2332d720a) +++ test/command/suite/schema/tables/columns/type/scalar.expected 2019-02-07 14:48:48 +0900 (13e3a1b88) @@ -190,6 +190,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -201,15 +205,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Modified: test/command/suite/schema/tables/columns/type/vector.expected (+8 -4) =================================================================== --- test/command/suite/schema/tables/columns/type/vector.expected 2019-02-07 10:54:48 +0900 (998a29ac0) +++ test/command/suite/schema/tables/columns/type/vector.expected 2019-02-07 14:48:48 +0900 (e6d59ad6e) @@ -192,6 +192,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -203,15 +207,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, @@ -296,7 +300,7 @@ schema "value_type": null, "tokenizer": null, "normalizer": { - "id": 80, + "id": 81, "name": "NormalizerAuto", "options": null }, Modified: test/command/suite/schema/tables/normalizer.expected (+8 -4) =================================================================== --- test/command/suite/schema/tables/normalizer.expected 2019-02-07 10:54:48 +0900 (2e9ff0399) +++ test/command/suite/schema/tables/normalizer.expected 2019-02-07 14:48:48 +0900 (1a0dc0f6a) @@ -188,6 +188,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -199,15 +203,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, @@ -230,7 +234,7 @@ schema "value_type": null, "tokenizer": null, "normalizer": { - "id": 80, + "id": 81, "name": "NormalizerAuto", "options": null }, Modified: test/command/suite/schema/tables/normalizer_with_options.expected (+8 -4) =================================================================== --- test/command/suite/schema/tables/normalizer_with_options.expected 2019-02-07 10:54:48 +0900 (9245470bf) +++ test/command/suite/schema/tables/normalizer_with_options.expected 2019-02-07 14:48:48 +0900 (a2f7426f7) @@ -188,6 +188,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -199,15 +203,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, @@ -230,7 +234,7 @@ schema "value_type": null, "tokenizer": null, "normalizer": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100", "options": [ "unify_kana", Modified: test/command/suite/schema/tables/token_filters.expected (+7 -3) =================================================================== --- test/command/suite/schema/tables/token_filters.expected 2019-02-07 10:54:48 +0900 (6ff6b581a) +++ test/command/suite/schema/tables/token_filters.expected 2019-02-07 14:48:48 +0900 (73115fd5a) @@ -193,6 +193,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -204,15 +208,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Modified: test/command/suite/schema/tables/token_filters_with_options.expected (+7 -3) =================================================================== --- test/command/suite/schema/tables/token_filters_with_options.expected 2019-02-07 10:54:48 +0900 (73242af8c) +++ test/command/suite/schema/tables/token_filters_with_options.expected 2019-02-07 14:48:48 +0900 (35af69113) @@ -193,6 +193,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -204,15 +208,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Modified: test/command/suite/schema/tables/tokenizer.expected (+7 -3) =================================================================== --- test/command/suite/schema/tables/tokenizer.expected 2019-02-07 10:54:48 +0900 (efc624dcf) +++ test/command/suite/schema/tables/tokenizer.expected 2019-02-07 14:48:48 +0900 (3b151e44c) @@ -188,6 +188,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -199,15 +203,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Modified: test/command/suite/schema/tables/tokenizer_with_options.expected (+7 -3) =================================================================== --- test/command/suite/schema/tables/tokenizer_with_options.expected 2019-02-07 10:54:48 +0900 (f494cae1d) +++ test/command/suite/schema/tables/tokenizer_with_options.expected 2019-02-07 14:48:48 +0900 (607f4c54b) @@ -188,6 +188,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -199,15 +203,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Modified: test/command/suite/schema/tables/type/array.expected (+7 -3) =================================================================== --- test/command/suite/schema/tables/type/array.expected 2019-02-07 10:54:48 +0900 (b985517a5) +++ test/command/suite/schema/tables/type/array.expected 2019-02-07 14:48:48 +0900 (d42dd8947) @@ -188,6 +188,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -199,15 +203,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Modified: test/command/suite/schema/tables/type/hash_table.expected (+7 -3) =================================================================== --- test/command/suite/schema/tables/type/hash_table.expected 2019-02-07 10:54:48 +0900 (d88b06e5b) +++ test/command/suite/schema/tables/type/hash_table.expected 2019-02-07 14:48:48 +0900 (6e36c1bc2) @@ -188,6 +188,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -199,15 +203,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Modified: test/command/suite/schema/tables/value_type/reference.expected (+7 -3) =================================================================== --- test/command/suite/schema/tables/value_type/reference.expected 2019-02-07 10:54:48 +0900 (4a06ff8e4) +++ test/command/suite/schema/tables/value_type/reference.expected 2019-02-07 14:48:48 +0900 (62893d048) @@ -190,6 +190,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -201,15 +205,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Modified: test/command/suite/schema/tables/value_type/type.expected (+7 -3) =================================================================== --- test/command/suite/schema/tables/value_type/type.expected 2019-02-07 10:54:48 +0900 (cb7c1aba7) +++ test/command/suite/schema/tables/value_type/type.expected 2019-02-07 14:48:48 +0900 (eb4d4833f) @@ -188,6 +188,10 @@ schema "id": 77, "name": "TokenRegexp" }, + "TokenTable": { + "id": 80, + "name": "TokenTable" + }, "TokenTrigram": { "id": 68, "name": "TokenTrigram" @@ -199,15 +203,15 @@ schema }, "normalizers": { "NormalizerAuto": { - "id": 80, + "id": 81, "name": "NormalizerAuto" }, "NormalizerNFKC100": { - "id": 82, + "id": 83, "name": "NormalizerNFKC100" }, "NormalizerNFKC51": { - "id": 81, + "id": 82, "name": "NormalizerNFKC51" } }, Added: test/command/suite/select/query/match/with_index/token_table/same.expected (+20 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/query/match/with_index/token_table/same.expected 2019-02-07 14:48:48 +0900 (b46d2b629) @@ -0,0 +1,20 @@ +table_create Menus TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Menus name COLUMN_SCALAR Text +[[0,0.0,0.0],true] +table_create Keywords TABLE_PAT_KEY ShortText --normalize NormalizerNFKC100 --default_tokenizer 'TokenTable("table", "Keywords")' +[[0,0.0,0.0],true] +column_create Keywords index COLUMN_INDEX Menus name +[[0,0.0,0.0],true] +load --table Keywords +[ +{"_key": "焼肉"} +] +[[0,0.0,0.0],1] +load --table Menus +[ +{"name": "焼肉定食"} +] +[[0,0.0,0.0],1] +select Menus --match_columns name --query "焼肉弁当" +[[0,0.0,0.0],[[[1],[["_id","UInt32"],["name","Text"]],[1,"焼肉定食"]]]] Added: test/command/suite/select/query/match/with_index/token_table/same.test (+19 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/query/match/with_index/token_table/same.test 2019-02-07 14:48:48 +0900 (ee722b31b) @@ -0,0 +1,19 @@ +table_create Menus TABLE_NO_KEY +column_create Menus name COLUMN_SCALAR Text + +table_create Keywords TABLE_PAT_KEY ShortText \ + --normalize NormalizerNFKC100 \ + --default_tokenizer 'TokenTable("table", "Keywords")' +column_create Keywords index COLUMN_INDEX Menus name + +load --table Keywords +[ +{"_key": "焼肉"} +] + +load --table Menus +[ +{"name": "焼肉定食"} +] + +select Menus --match_columns name --query "焼肉弁当" Modified: test/command/suite/tokenizer_list/default.expected (+3 -0) =================================================================== --- test/command/suite/tokenizer_list/default.expected 2019-02-07 10:54:48 +0900 (40424396d) +++ test/command/suite/tokenizer_list/default.expected 2019-02-07 14:48:48 +0900 (dc3adbb93) @@ -53,6 +53,9 @@ tokenizer_list }, { "name": "TokenPattern" + }, + { + "name": "TokenTable" } ] ] Added: test/command/suite/tokenizers/table/match.expected (+37 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/table/match.expected 2019-02-07 14:48:48 +0900 (a48159e4f) @@ -0,0 +1,37 @@ +table_create Keywords TABLE_PAT_KEY ShortText --normalizer NormalizerNFKC100 +[[0,0.0,0.0],true] +load --table Keywords +[ +{"_key": "100円"}, +{"_key": "りんご"}, +{"_key": "29円"} +] +[[0,0.0,0.0],3] +tokenize 'TokenTable("table", "Keywords")' "私は100円のりんごを29円で買いました。" +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "100円", + "position": 0, + "force_prefix": false, + "force_prefix_search": false + }, + { + "value": "りんご", + "position": 1, + "force_prefix": false, + "force_prefix_search": false + }, + { + "value": "29円", + "position": 2, + "force_prefix": false, + "force_prefix_search": false + } + ] +] Added: test/command/suite/tokenizers/table/match.test (+12 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/table/match.test 2019-02-07 14:48:48 +0900 (ef0ff48bd) @@ -0,0 +1,12 @@ +table_create Keywords TABLE_PAT_KEY ShortText --normalizer NormalizerNFKC100 + +load --table Keywords +[ +{"_key": "100円"}, +{"_key": "りんご"}, +{"_key": "29円"} +] + +tokenize \ + 'TokenTable("table", "Keywords")' \ + "私は100円のりんごを29円で買いました。" Added: test/command/suite/tokenizers/table/no_table.expected (+3 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/table/no_table.expected 2019-02-07 14:48:48 +0900 (8c15a3a0b) @@ -0,0 +1,3 @@ +tokenize 'TokenTable' "This is a pen." +[[[-22,0.0,0.0],"[tokenizer][table] table isn't specified"],[]] +#|e| [tokenizer][table] table isn't specified Added: test/command/suite/tokenizers/table/no_table.test (+3 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/table/no_table.test 2019-02-07 14:48:48 +0900 (47a8cf70c) @@ -0,0 +1,3 @@ +tokenize \ + 'TokenTable' \ + "This is a pen." Added: test/command/suite/tokenizers/table/nonexistent_table.expected (+3 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/table/nonexistent_table.expected 2019-02-07 14:48:48 +0900 (b5250a295) @@ -0,0 +1,3 @@ +tokenize 'TokenTable("table", "Nonexistent")' "This is a pen." +[[[-22,0.0,0.0],"[tokenizer][table] nonexistent table: <Nonexistent>"],[]] +#|e| [tokenizer][table] nonexistent table: <Nonexistent> Added: test/command/suite/tokenizers/table/nonexistent_table.test (+3 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/table/nonexistent_table.test 2019-02-07 14:48:48 +0900 (012761bca) @@ -0,0 +1,3 @@ +tokenize \ + 'TokenTable("table", "Nonexistent")' \ + "This is a pen." -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190207/33091554/attachment-0001.html>