Kouhei Sutou
null+****@clear*****
Fri Nov 9 13:56:02 JST 2012
Kouhei Sutou 2012-11-09 13:56:02 +0900 (Fri, 09 Nov 2012) New Revision: ba518d1e20a3bf85b596b717885604b6a80e1121 https://github.com/groonga/groonga/commit/ba518d1e20a3bf85b596b717885604b6a80e1121 Log: Add grn_tokenizer_have_delimiter() Modified files: include/groonga/tokenizer.h lib/tokenizer.c test/unit/core/test-tokenizer.c Modified: include/groonga/tokenizer.h (+10 -0) =================================================================== --- include/groonga/tokenizer.h 2012-11-09 13:38:10 +0900 (0d0a899) +++ include/groonga/tokenizer.h 2012-11-09 13:56:02 +0900 (37f9147) @@ -54,6 +54,16 @@ grn_bool grn_tokenizer_is_delimiter(grn_ctx *ctx, grn_encoding encoding); /* + grn_tokenizer_have_delimiter() returns whether is there the special + delimiter character in the string specified by `str_ptr' and + `str_length' the special delimiter character or not. + */ +grn_bool grn_tokenizer_have_delimiter(grn_ctx *ctx, + const char *str_ptr, + unsigned int str_length, + grn_encoding encoding); + +/* grn_tokenizer_query is a structure for storing a query. See the following functions. */ Modified: lib/tokenizer.c (+25 -0) =================================================================== --- lib/tokenizer.c 2012-11-09 13:38:10 +0900 (061b7f6) +++ lib/tokenizer.c 2012-11-09 13:56:02 +0900 (1b9085e) @@ -100,6 +100,31 @@ grn_tokenizer_is_delimiter(grn_ctx *ctx, const char *str_ptr, binary_string[2] == 0xBE; } +grn_bool +grn_tokenizer_have_delimiter(grn_ctx *ctx, const char *str_ptr, + unsigned int str_length, grn_encoding encoding) +{ + int char_length; + const char *current = str_ptr; + const char *end = str_ptr + str_length; + + if (encoding != GRN_ENC_UTF8) { + return GRN_FALSE; + } + + if (str_length == 0) { + return GRN_FALSE; + } + + while ((char_length = grn_charlen_(ctx, current, end, encoding)) > 0) { + if (grn_tokenizer_is_delimiter(ctx, current, char_length, encoding)) { + return GRN_TRUE; + } + current += char_length; + } + return GRN_FALSE; +} + grn_tokenizer_query * grn_tokenizer_query_create(grn_ctx *ctx, int num_args, grn_obj **args) { Modified: test/unit/core/test-tokenizer.c (+38 -0) =================================================================== --- test/unit/core/test-tokenizer.c 2012-11-09 13:38:10 +0900 (433ec5c) +++ test/unit/core/test-tokenizer.c 2012-11-09 13:56:02 +0900 (85da507) @@ -25,6 +25,8 @@ void data_is_delimiter(void); void test_is_delimiter(gconstpointer data); +void data_have_delimiter(void); +void test_have_delimiter(gconstpointer data); static grn_ctx context; static grn_obj *db; @@ -85,3 +87,39 @@ test_is_delimiter(gconstpointer data) encoding)); } } + +void +data_have_delimiter(void) +{ +#define ADD_DATUM(label, expected, input) \ + gcut_add_datum(label, \ + "expected", G_TYPE_BOOLEAN, expected, \ + "input", G_TYPE_STRING, input, \ + NULL) + +#define UFFFE_IN_UTF8 "\xef\xbf\xbe" + + ADD_DATUM("have", GRN_TRUE, "a" UFFFE_IN_UTF8 "b"); + ADD_DATUM("not have", GRN_FALSE, "ab"); + +#undef UFFFE_IN_UTF8 + +#undef ADD_DATUM +} + +void +test_have_delimiter(gconstpointer data) +{ + const gchar *input; + grn_encoding encoding = GRN_ENC_UTF8; + + GRN_CTX_SET_ENCODING(&context, encoding); + input = gcut_data_get_string(data, "input"); + if (gcut_data_get_boolean(data, "expected")) { + cut_assert_true(grn_tokenizer_have_delimiter(&context, input, strlen(input), + encoding)); + } else { + cut_assert_false(grn_tokenizer_have_delimiter(&context, input, strlen(input), + encoding)); + } +} -------------- next part -------------- HTML����������������������������... Télécharger