Kouhei Sutou
null+****@clear*****
Tue Aug 11 22:16:32 JST 2015
Kouhei Sutou 2015-08-11 22:16:32 +0900 (Tue, 11 Aug 2015) New Revision: 9c8109a636c473ed2680cdee14bc440290813a19 https://github.com/pgroonga/pgroonga/commit/9c8109a636c473ed2680cdee14bc440290813a19 Message: Add pgroonga.snippet_html() [groonga-dev,03398] Suggested by Hiroaki Nakamura. Thanks!!! Added files: expected/groonga/snippet-html/keywords/multiple.out expected/groonga/snippet-html/keywords/one.out sql/groonga/snippet-html/keywords/multiple.sql sql/groonga/snippet-html/keywords/one.sql Modified files: pgroonga.c pgroonga.h pgroonga.sql Added: expected/groonga/snippet-html/keywords/multiple.out (+19 -0) 100644 =================================================================== --- /dev/null +++ expected/groonga/snippet-html/keywords/multiple.out 2015-08-11 22:16:32 +0900 (d988ab2) @@ -0,0 +1,19 @@ +SELECT pgroonga.snippet_html( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + '\n' || + '\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['fast', 'PostgreSQL']); + snippet_html +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"Groonga is a <span class=\"keyword\">fast</span> and accurate full text search engine based on inverted index. One of the characteristics of Groonga is that a newly registered document instantly appears in search results. Also, Gro","ase management system (DBMS). Compared with well-known row-oriented systems, such as MySQL and <span class=\"keyword\">PostgreSQL</span>, column-oriented systems are more suited for aggregate queries. Due to this advantage, Groonga"} +(1 row) + Added: expected/groonga/snippet-html/keywords/one.out (+19 -0) 100644 =================================================================== --- /dev/null +++ expected/groonga/snippet-html/keywords/one.out 2015-08-11 22:16:32 +0900 (2ef52ac) @@ -0,0 +1,19 @@ +SELECT pgroonga.snippet_html( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + '\n' || + '\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['Groonga']); + snippet_html +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"<span class=\"keyword\">Groonga</span> is a fast and accurate full text search engine based on inverted index. One of the characteristics of <span class=\"keyword\">Groonga</span> is that a newly registered document instantly appears in search results. Also, Gro","t read locks. These characteristics result in superior performance on real-time applications.\\n\\n<span class=\"keyword\">Groonga</span> is also a column-oriented database management system (DBMS). Compared with well-known row-orien","ted systems, such as MySQL and PostgreSQL, column-oriented systems are more suited for aggregate queries. Due to this advantage, <span class=\"keyword\">Groonga</span> can cover weakness of row-oriented systems."} +(1 row) + Modified: pgroonga.c (+130 -0) =================================================================== --- pgroonga.c 2015-08-08 14:19:00 +0900 (73f4065) +++ pgroonga.c 2015-08-11 22:16:32 +0900 (681e9ce) @@ -156,6 +156,7 @@ static slist_head PGrnScanOpaques = SLIST_STATIC_INIT(PGrnScanOpaques); PG_FUNCTION_INFO_V1(pgroonga_score); PG_FUNCTION_INFO_V1(pgroonga_table_name); PG_FUNCTION_INFO_V1(pgroonga_command); +PG_FUNCTION_INFO_V1(pgroonga_snippet_html); PG_FUNCTION_INFO_V1(pgroonga_contain_text); PG_FUNCTION_INFO_V1(pgroonga_contain_text_array); @@ -1300,6 +1301,135 @@ pgroonga_command(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(result); } +static grn_obj * +PGrnSnipCreate(ArrayType *keywords) +{ + grn_obj *snip; + int flags = GRN_SNIP_SKIP_LEADING_SPACES; + unsigned int width = 200; + unsigned int maxNResults = 3; + const char *openTag = "<span class=\"keyword\">"; + const char *closeTag = "</span>"; + grn_snip_mapping *mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; + + snip = grn_snip_open(ctx, flags, width, maxNResults, + openTag, strlen(openTag), + closeTag, strlen(closeTag), + mapping); + if (!snip) + { + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("pgroonga: " + "failed to allocate memory for generating snippet"))); + return NULL; + } + + grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO); + + { + int i, n; + + n = ARR_DIMS(keywords)[0]; + for (i = 1; i <= n; i++) + { + Datum keywordDatum; + text *keyword; + bool isNULL; + + keywordDatum = array_ref(keywords, 1, &i, -1, -1, false, + 'i', &isNULL); + if (isNULL) + continue; + + keyword = DatumGetTextPP(keywordDatum); + grn_snip_add_cond(ctx, snip, + VARDATA_ANY(keyword), + VARSIZE_ANY_EXHDR(keyword), + NULL, 0, NULL, 0); + } + } + + return snip; +} + +static grn_rc +PGrnSnipExec(grn_obj *snip, text *target, ArrayType **snippetArray) +{ + grn_rc rc; + unsigned int i, nResults, maxTaggedLength; + char *buffer; + Datum *snippets; + int dims[1]; + int lbs[1]; + + rc = grn_snip_exec(ctx, snip, + VARDATA_ANY(target), + VARSIZE_ANY_EXHDR(target), + &nResults, &maxTaggedLength); + if (rc != GRN_SUCCESS) + { + return rc; + } + + if (nResults == 0) + { + *snippetArray = construct_empty_array(TEXTOID); + return GRN_SUCCESS; + } + + buffer = palloc(sizeof(char) * maxTaggedLength); + snippets = palloc(sizeof(Datum) * nResults); + for (i = 0; i < nResults; i++) + { + grn_rc rc; + unsigned int snippetLength = 0; + + rc = grn_snip_get_result(ctx, snip, i, buffer, &snippetLength); + if (rc != GRN_SUCCESS) + { + pfree(buffer); + return rc; + } + snippets[i] = PointerGetDatum(cstring_to_text_with_len(buffer, + snippetLength)); + } + pfree(buffer); + + dims[0] = nResults; + lbs[0] = 1; + + *snippetArray = construct_md_array(snippets, NULL, + 1, dims, lbs, + TEXTOID, -1, false, 'i'); + return GRN_SUCCESS; +} + +/** + * pgroonga.snippet_html(target text, keywords text[]) : text[] + */ +Datum +pgroonga_snippet_html(PG_FUNCTION_ARGS) +{ + text *target = PG_GETARG_TEXT_PP(0); + ArrayType *keywords = PG_GETARG_ARRAYTYPE_P(1); + grn_obj *snip; + grn_rc rc; + ArrayType *snippets; + + snip = PGrnSnipCreate(keywords); + rc = PGrnSnipExec(snip, target, &snippets); + grn_obj_close(ctx, snip); + + if (rc != GRN_SUCCESS) { + ereport(ERROR, + (errcode(PGrnRCToPgErrorCode(rc)), + errmsg("pgroonga: failed to compute snippets"))); + } + + PG_RETURN_POINTER(snippets); +} + static grn_bool pgroonga_contain_raw(const char *text, unsigned int textSize, const char *subText, unsigned int subTextSize) Modified: pgroonga.h (+1 -0) =================================================================== --- pgroonga.h 2015-08-08 14:19:00 +0900 (0b7da96) +++ pgroonga.h 2015-08-11 22:16:32 +0900 (aa44117) @@ -42,6 +42,7 @@ extern void PGDLLEXPORT _PG_init(void); extern Datum PGDLLEXPORT pgroonga_score(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_table_name(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_command(PG_FUNCTION_ARGS); +extern Datum PGDLLEXPORT pgroonga_snippet_html(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_contain_text(PG_FUNCTION_ARGS); extern Datum PGDLLEXPORT pgroonga_contain_text_array(PG_FUNCTION_ARGS); Modified: pgroonga.sql (+7 -0) =================================================================== --- pgroonga.sql 2015-08-08 14:19:00 +0900 (9a29cf5) +++ pgroonga.sql 2015-08-11 22:16:32 +0900 (d1bc0a8) @@ -23,6 +23,13 @@ CREATE FUNCTION pgroonga.command(groongaCommand text) VOLATILE STRICT; +CREATE FUNCTION pgroonga.snippet_html(target text, keywords text[]) + RETURNS text[] + AS 'MODULE_PATHNAME', 'pgroonga_snippet_html' + LANGUAGE C + VOLATILE + STRICT; + CREATE FUNCTION pgroonga.contain(target text, query text) RETURNS bool AS 'MODULE_PATHNAME', 'pgroonga_contain_text' Added: sql/groonga/snippet-html/keywords/multiple.sql (+14 -0) 100644 =================================================================== --- /dev/null +++ sql/groonga/snippet-html/keywords/multiple.sql 2015-08-11 22:16:32 +0900 (596847c) @@ -0,0 +1,14 @@ +SELECT pgroonga.snippet_html( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + '\n' || + '\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['fast', 'PostgreSQL']); Added: sql/groonga/snippet-html/keywords/one.sql (+14 -0) 100644 =================================================================== --- /dev/null +++ sql/groonga/snippet-html/keywords/one.sql 2015-08-11 22:16:32 +0900 (b8fde85) @@ -0,0 +1,14 @@ +SELECT pgroonga.snippet_html( + 'Groonga is a fast and accurate full text search engine based on ' || + 'inverted index. One of the characteristics of Groonga is that a ' || + 'newly registered document instantly appears in search results. ' || + 'Also, Groonga allows updates without read locks. These characteristics ' || + 'result in superior performance on real-time applications.' || + '\n' || + '\n' || + 'Groonga is also a column-oriented database management system (DBMS). ' || + 'Compared with well-known row-oriented systems, such as MySQL and ' || + 'PostgreSQL, column-oriented systems are more suited for aggregate ' || + 'queries. Due to this advantage, Groonga can cover weakness of ' || + 'row-oriented systems.', + ARRAY['Groonga']); -------------- next part -------------- HTML����������������������������... Télécharger