svnno****@sourc*****
svnno****@sourc*****
Fri Mar 7 04:31:04 JST 2008
Revision: 3450 http://svn.sourceforge.jp/cgi-bin/viewcvs.cgi?root=kazehakase&view=rev&rev=3450 Author: pal_gene Date: 2008-03-07 04:31:04 +0900 (Fri, 07 Mar 2008) Log Message: ----------- Refactor. separate function. Modified Paths: -------------- kazehakase/trunk/module/search/kz-hyper-estraier-search.c Modified: kazehakase/trunk/module/search/kz-hyper-estraier-search.c =================================================================== --- kazehakase/trunk/module/search/kz-hyper-estraier-search.c 2008-03-06 19:31:00 UTC (rev 3449) +++ kazehakase/trunk/module/search/kz-hyper-estraier-search.c 2008-03-06 19:31:04 UTC (rev 3450) @@ -592,6 +592,51 @@ return g_strndup(start, strstr(start, "<") - start); } +/** + * get utf-8 contents string for given html file + * @param filepath file name to get document contents. + * @return gchar* newly allocated utf-8 string, or NULL failed. + */ +static gchar * +get_utf8_contents(const char *filepath) +{ + gchar *contents = NULL; + + if (g_file_get_contents(filepath, &contents, NULL, NULL)) + { + gchar *encoding = get_document_encoding(contents); + if (!encoding) + encoding = g_strdup(est_enc_name(contents, + strlen(contents), + ESTLANGJA)); + + if (g_ascii_strcasecmp(encoding, "UTF-8") != 0) + { + gchar *utf8_contents, *upper_encoding; + upper_encoding = g_ascii_strup(encoding, -1); + utf8_contents = g_convert(contents, -1, + "UTF-8", upper_encoding, + NULL, NULL, + NULL); + g_free(upper_encoding); + g_free(contents); + if (utf8_contents) + { + contents = utf8_contents; + } + else + { + g_warning("failed convert encoding. [enc:file] [%s:%s]\n", + encoding, filepath); + contents = NULL; + } + } + g_free(encoding); + } + + return contents; +} + gboolean register_document (KzSearch *search, const gchar *uri, const gchar *title, const gchar *contents, GTime mtime) { @@ -730,7 +775,7 @@ } else { - gchar *uri, *title, *contents, *encoding; + gchar *uri, *title, *contents; GTime mtime; struct stat st; @@ -743,46 +788,12 @@ continue; } - g_file_get_contents(new_path, &contents, NULL, NULL); - encoding = get_document_encoding(contents); - if (!encoding) - encoding = g_strdup(est_enc_name(contents, - strlen(contents), - ESTLANGJA)); - if (encoding) - { - if (g_ascii_strcasecmp(encoding, "UTF-8") != 0) - { - gchar *utf8_contents, *upper_encoding; - upper_encoding = g_ascii_strup(encoding, -1); - utf8_contents = g_convert(contents, -1, - "UTF-8", upper_encoding, - NULL, NULL, - NULL); - g_free(upper_encoding); - g_free(contents); - if (utf8_contents) - { - contents = utf8_contents; - } - else - { - g_warning("failed convert encoding. [enc:file] [%s:%s]\n", - encoding, new_path); - g_free(encoding); - g_free(new_path); - continue; - } - } - g_free(encoding); - } - else - { - g_warning("unknown encoding. [%s]\n", new_path); - g_free(contents); + contents = get_utf8_contents(new_path); + if (!contents) { g_free(new_path); continue; } + g_stat(new_path, &st); mtime = st.st_mtime; title = get_document_title(contents);