Kouhei Sutou 2019-03-22 09:56:59 +0900 (Fri, 22 Mar 2019) Revision: 9999b6e65bec999986609feebba38f2d0b2e5c68 https://github.com/groonga/groonga/commit/9999b6e65bec999986609feebba38f2d0b2e5c68 Message: index_column_diff: add support for customizing cache size We can customize cache size by GRN_INDEX_COLUMN_DIFF_CACHE_SIZE_MAX. This is for finding suitable cache size. It's not for users. It's for developers. Modified files: lib/index_column.c Modified: lib/index_column.c (+75 -15) =================================================================== --- lib/index_column.c 2019-03-22 09:29:11 +0900 (4c858f106) +++ lib/index_column.c 2019-03-22 09:56:59 +0900 (824ead211) @@ -26,6 +26,7 @@ static uint64_t grn_index_sparsity = 10; static grn_bool grn_index_chunk_split_enable = GRN_TRUE; +static size_t grn_index_column_diff_cache_size_max = 256; void grn_index_column_init_from_env(void) @@ -56,6 +57,21 @@ grn_index_column_init_from_env(void) grn_index_chunk_split_enable = GRN_TRUE; } } + + { + char grn_index_column_diff_cache_size_max_env[GRN_ENV_BUFFER_SIZE]; + grn_getenv("GRN_INDEX_COLUMN_DIFF_CACHE_SIZE_MAX", + grn_index_column_diff_cache_size_max_env, + GRN_ENV_BUFFER_SIZE); + if (grn_index_column_diff_cache_size_max_env[0]) { + uint64_t size; + errno = 0; + size = strtoull(grn_index_column_diff_cache_size_max_env, NULL, 0); + if (errno == 0 && size > 0) { + grn_index_column_diff_cache_size_max = size; + } + } + } } grn_inline static void @@ -239,7 +255,13 @@ grn_index_column_rebuild(grn_ctx *ctx, grn_obj *index_column) static const char *postings_column_name = "postings"; static const char *remains_column_name = "remains"; static const char *missings_column_name = "missings"; -#define GRN_INDEX_COLUMN_DIFF_CACHE_SIZE 256 + +typedef struct { + grn_id token_id; + grn_id diff_id; + grn_obj postings; + size_t offset; +} grn_index_column_diff_cache; typedef struct { grn_obj *lexicon; @@ -277,12 +299,8 @@ typedef struct { grn_timeval start_time; grn_timeval previous_time; } progress; - struct { - grn_id token_id; - grn_id diff_id; - grn_obj postings; - size_t offset; - } cache[GRN_INDEX_COLUMN_DIFF_CACHE_SIZE]; + grn_index_column_diff_cache *cache; + size_t cache_size; } grn_index_column_diff_data; static void @@ -293,12 +311,44 @@ grn_index_column_diff_data_init(grn_ctx *ctx, GRN_VOID_INIT(&(data->buffers.value)); GRN_UINT32_INIT(&(data->buffers.postings), GRN_OBJ_VECTOR); - for (size_t i = 0; i < GRN_INDEX_COLUMN_DIFF_CACHE_SIZE; i++) { + data->cache = NULL; + data->cache_size = 0; +} + +static void +grn_index_column_diff_data_init_cache(grn_ctx *ctx, + grn_index_column_diff_data *data) +{ + size_t cache_size; + const size_t n_tokens = grn_table_size(ctx, data->lexicon); + if (n_tokens == 0) { + cache_size = 1; + } else if (n_tokens < grn_index_column_diff_cache_size_max) { + cache_size = n_tokens; + } else { + cache_size = n_tokens * 0.01; + if (cache_size >= grn_index_column_diff_cache_size_max) { + cache_size = grn_index_column_diff_cache_size_max; + } + } + + data->cache = GRN_MALLOCN(grn_index_column_diff_cache, cache_size); + if (!data->cache) { + char message[GRN_CTX_MSGSIZE]; + grn_strcpy(message, GRN_CTX_MSGSIZE, ctx->errbuf); + ERR(GRN_NO_MEMORY_AVAILABLE, + "[index-column][diff] failed to allocate cache: <%" GRN_FMT_SIZE ">: %s", + cache_size, + message); + return; + } + for (size_t i = 0; i < cache_size; i++) { data->cache[i].token_id = GRN_ID_NIL; data->cache[i].diff_id = GRN_ID_NIL; GRN_UINT32_INIT(&(data->cache[i].postings), GRN_OBJ_VECTOR); data->cache[i].offset = 0; } + data->cache_size = cache_size; } static void @@ -319,9 +369,10 @@ grn_index_column_diff_data_fin(grn_ctx *ctx, GRN_OBJ_FIN(ctx, &(data->buffers.value)); GRN_OBJ_FIN(ctx, &(data->buffers.postings)); - for (size_t i = 0; i < GRN_INDEX_COLUMN_DIFF_CACHE_SIZE; i++) { + for (size_t i = 0; i < data->cache_size; i++) { GRN_OBJ_FIN(ctx, &(data->cache[i].postings)); } + GRN_FREE(data->cache); } static void @@ -440,9 +491,11 @@ grn_index_column_diff_progress(grn_ctx *ctx, } static size_t -grn_index_column_diff_cache_compute_key(grn_id token_id) +grn_index_column_diff_cache_compute_key(grn_ctx *ctx, + grn_index_column_diff_data *data, + grn_id token_id) { - return token_id % GRN_INDEX_COLUMN_DIFF_CACHE_SIZE; + return token_id % data->cache_size; } static void @@ -486,7 +539,7 @@ static void grn_index_column_diff_cache_flush_all(grn_ctx *ctx, grn_index_column_diff_data *data) { - for (size_t i = 0; i < GRN_INDEX_COLUMN_DIFF_CACHE_SIZE; i++) { + for (size_t i = 0; i < data->cache_size; i++) { grn_index_column_diff_cache_flush(ctx, data, i); } } @@ -496,7 +549,8 @@ grn_index_column_diff_cache_allocate(grn_ctx *ctx, grn_index_column_diff_data *data) { const grn_id token_id = data->current.token_id; - const size_t cache_key = grn_index_column_diff_cache_compute_key(token_id); + const size_t cache_key = + grn_index_column_diff_cache_compute_key(ctx, data, token_id); grn_index_column_diff_cache_flush(ctx, data, cache_key); data->cache[cache_key].token_id = token_id; data->cache[cache_key].diff_id = data->current.diff_id; @@ -509,7 +563,8 @@ grn_index_column_diff_cache_get(grn_ctx *ctx, size_t *n_postings) { const grn_id token_id = data->current.token_id; - const size_t cache_key = grn_index_column_diff_cache_compute_key(token_id); + const size_t cache_key + = grn_index_column_diff_cache_compute_key(ctx, data, token_id); const grn_id cached_token_id = data->cache[cache_key].token_id; grn_obj *postings; if (cached_token_id == token_id) { @@ -533,7 +588,8 @@ grn_index_column_diff_cache_remove_posting(grn_ctx *ctx, size_t nth_posting) { const grn_id token_id = data->current.token_id; - const size_t cache_key = grn_index_column_diff_cache_compute_key(token_id); + const size_t cache_key = + grn_index_column_diff_cache_compute_key(ctx, data, token_id); const size_t n_posting_elements = data->n_posting_elements; size_t n_postings = 0; const uint32_t *postings = @@ -852,6 +908,10 @@ grn_index_column_diff(grn_ctx *ctx, } data.lexicon = grn_ctx_at(ctx, index_column->header.domain); + grn_index_column_diff_data_init_cache(ctx, &data); + if (ctx->rc != GRN_SUCCESS) { + goto exit; + } data.diff = grn_table_create(ctx, NULL, 0, -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190322/2bb0f666/attachment-0001.html>