Hiroyuki Ikezoe
ikezo****@users*****
Sun Dec 3 12:35:34 JST 2006
Index: kazehakase/module/search/Makefile.am diff -u /dev/null kazehakase/module/search/Makefile.am:1.1 --- /dev/null Sun Dec 3 12:35:34 2006 +++ kazehakase/module/search/Makefile.am Sun Dec 3 12:35:33 2006 @@ -0,0 +1,41 @@ +CLEANFILES = *~ *.bak + +AM_CPPFLAGS = \ + -DLOCALEDIR=\""$(localedir)"\" \ + -DSYSCONFDIR=\""$(sysconfdir)"\" \ + -DDATADIR=\""$(datadir)"\" \ + -DEXTDIR=\""$(extdir)"\" \ + -DKZ_SYSCONFDIR=\""$(sysconfdir)/$(PACKAGE)"\" \ + -DKZ_DATADIR=\""$(datadir)/$(PACKAGE)"\" \ + -DGTK_DISABLE_DEPRECATED=1 \ + -DGDK_DISABLE_DEPRECATED=1 \ + -DG_LOG_DOMAIN=\"Kazehakase-Search\" \ + -DG_DISABLE_DEPRECATED=1 + +INCLUDES = $(GLIB_CFLAGS) \ + $(GTK_CFLAGS) \ + $(LIBGNUTLS_CFLAGS) \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/actions \ + -I$(top_srcdir)/src/bookmarks \ + -I$(top_srcdir)/src/mozilla \ + -I$(top_srcdir)/src/libegg/pixbufthumbnail \ + -I$(top_srcdir)/src/libegg/regex \ + -I$(top_srcdir)/src/libegg/md5 \ + -I$(top_srcdir)/src/net \ + -I$(top_srcdir)/src/sidebar \ + -I$(top_srcdir)/src/utils \ + -I$(top_srcdir)/src/widget + +LIBADD = $(GLIB_LIBS) + +LDFLAGS = \ + -version-info $(LT_VERSION_INFO) \ + -export-dynamic $(no_undefined) $(LIBTOOL_EXPORT_OPTIONS) + + +search_LTLIBRARIES = libhyperestraier.la + +libhyperestraier_la_SOURCES = \ + estsearch.h estsearch.c + Index: kazehakase/module/search/estsearch.c diff -u /dev/null kazehakase/module/search/estsearch.c:1.1 --- /dev/null Sun Dec 3 12:35:34 2006 +++ kazehakase/module/search/estsearch.c Sun Dec 3 12:35:33 2006 @@ -0,0 +1,659 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ + +/* + * Copyright (C) 2004 Hiroyuki Ikezoe + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <ctype.h> +#include <glib/gi18n.h> + +#include "kazehakase.h" +#include "utils/utils.h" +#include "glib-utils.h" +#include "estsearch.h" +#include "egg-pixbuf-thumbnail.h" + + +#define ESTRAIER_URI "http://hyperestraier.sourceforge.net/" +#define DTD "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" +#define HEAD "<head>\n" \ + " <title>Full-text search in history</title>\n" \ + " <link rel=\"stylesheet\" type=\"text/css\" href=\"history-search:?css=search-result.css\">\n" \ + "</head>\n" +#define HEADER "" +#define CONTENT "<div class=\"content\">\n" \ + " <div class=\"header\"><span class=\"title\"><a href=\"%s\">%s</a></span></div>\n" \ + " <div class=\"summary\"><img src=\"%s\" class=\"thumbnail\">\n" \ + " <span class=\"sentence\">%s</span>\n" \ + " </div>\n" \ + " <div class=\"footer\">\n" \ + " <span class=\"uri\">%s</span>\n" \ + " <span class=\"cache\"><a href=\"%s\">cache</a></span>\n" \ + " <span class=\"date\">%s</span>\n" \ + " </div>\n" \ + "</div>\n" +#define FOOTER "<div class=\"footer\">\n" \ + "Powered by <a href=\"%s\">Hyper Estraier</a> version %s\n" \ + "</div>\n" + +typedef struct _KzSearchEstPrivate KzSearchEstPrivate; +struct _KzSearchEstPrivate +{ +}; + +#define KZ_SEARCH_EST_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE ((obj), KZ_TYPE_SEARCH_EST, KzSearchEstPrivate)) + +static GObject *constructor (GType type, + guint n_props, + GObjectConstructParam *props); +static void dispose (GObject *object); + +static gchar *get_search_result_html (KzSearch *search, const gchar *text); +static KzBookmark *get_search_result_bookmark (KzSearch *search, const gchar *text); +static gboolean register_document (KzSearch *search, const gchar *filename); +static gboolean update_index (KzSearch *search); +static gboolean purge_index (KzSearch *search); +static GPid optimize_index (KzSearch *search); +static void make_index (KzSearch *search); +static gboolean exist_index_dir (KzSearch *search); + +static gchar *estsearch_get_version (void); +static gboolean _update_index (gpointer data); + +static KzSearchEst *the_kz_search_est = NULL; + +G_DEFINE_TYPE(KzSearchEst, kz_search_est, KZ_TYPE_SEARCH) + +static void +kz_search_est_class_init (KzSearchEstClass *klass) +{ + GObjectClass *object_class; + KzSearchClass *search_class; + + kz_search_est_parent_class = g_type_class_peek_parent (klass); + object_class = (GObjectClass *) klass; + search_class = (KzSearchClass *) klass; + + object_class->constructor = constructor; + object_class->dispose = dispose; + + search_class->get_search_result_html = get_search_result_html; + search_class->get_search_result_bookmark = get_search_result_bookmark; + search_class->register_document = register_document; + search_class->update_index = update_index; + search_class->purge_index = purge_index; + search_class->optimize_index = optimize_index; + search_class->make_index = make_index; + search_class->exist_index_dir = exist_index_dir; + + g_type_class_add_private (object_class, sizeof(KzSearchEstPrivate)); +} + + +static void +kz_search_est_init (KzSearchEst *est) +{ +} + +static GObject* +constructor (GType type, + guint n_props, + GObjectConstructParam *props) +{ + GObject *object; + + if (!the_kz_search_est) + { + GObjectClass *klass = G_OBJECT_CLASS(kz_search_est_parent_class); + object = klass->constructor(type, n_props, props); + /* singleton does not work */ + /*the_kz_search_est = KZ_SEARCH_EST(object);*/ + } + else + { + object = g_object_ref(G_OBJECT(the_kz_search_est)); + } + return object; +} + +static void +dispose (GObject *object) +{ + if (G_OBJECT_CLASS (kz_search_est_parent_class)->dispose) + G_OBJECT_CLASS (kz_search_est_parent_class)->dispose(object); +} + + +KzSearch * +kz_search_est_get_instance (void) +{ + return KZ_SEARCH(g_object_new(KZ_TYPE_SEARCH_EST, NULL)); +} + +static gboolean +execute_search_command(const gchar *search_text, gint *standard_output) +{ + gboolean ret; + const gchar *estsearch_com = "estcmd search -vx "; + gchar *command; + gint argc; + gchar **argv = NULL; + GSpawnFlags flags; + GPid pid; + gint err; + gchar **split = NULL; + gchar *join = NULL; + gint max_results = 20, num_summary = 128, half_of_summary; + gchar *except_word; + gchar **except_keywords = NULL; + + KZ_CONF_GET("History", "num_summary", num_summary, INT); + KZ_CONF_GET("History", "max_results", max_results, INT); + half_of_summary = num_summary / 2; + + split = g_strsplit(search_text, " ", -1); + if (split) + { + join = g_strjoinv(" AND ", split); + g_strfreev(split); + } + + except_word = KZ_CONF_GET_STR("History", "except_keyword"); + if (except_word && *except_word) + { + except_keywords = g_strsplit(except_word, ",", -1); + g_free(except_word); + except_word = g_strjoinv(" ANDNOT ", except_keywords); + g_strfreev(except_keywords); + + command = g_strdup_printf("%s -max %d -sn %d %d %d %s%s %s ANDNOT %s", + estsearch_com, + max_results, + num_summary, + half_of_summary, + half_of_summary, + g_get_home_dir(), + HISTORY_INDEX" ", + join, + except_word); + g_free(except_word); + } + else + { + command = g_strdup_printf("%s -max %d -sn %d %d %d %s%s %s", + estsearch_com, + max_results, + num_summary, + half_of_summary, + half_of_summary, + g_get_home_dir(), + HISTORY_INDEX" ", + join); + } + + if (join) + g_free(join); + + g_shell_parse_argv(command, + &argc, + &argv, + NULL); + + flags = G_SPAWN_SEARCH_PATH; + ret = g_spawn_async_with_pipes(NULL, + argv, + NULL, + flags, + NULL, + NULL, + &pid, + NULL, + standard_output, + &err, + NULL); + g_strfreev(argv); + g_free(command); + + return ret; +} + + +static gchar * +create_search_result_html (gint out, const gchar *text) +{ + GIOChannel *io; + gchar *line; + gsize length; + gchar *title = NULL, *uri = NULL, *date = NULL, *desc = NULL; + gchar *cache_link = NULL; + gchar *estversion = estsearch_get_version(); + GString *html; + + io = g_io_channel_unix_new(out); + g_io_channel_set_encoding(io, NULL, NULL); + + html = g_string_sized_new(0); + + g_string_append(html, DTD"\n"); + g_string_append(html, "<html>\n"); + g_string_append(html, HEAD); + g_string_append(html, "<body>\n"); + + g_string_append_printf(html, "<h1>Search results for %s</h1>", + text); + + while (g_io_channel_read_line(io, &line, &length, NULL, NULL) == G_IO_STATUS_NORMAL) + { + if (g_str_has_prefix(line, "<document")) + { + size_t len; + gchar *dirname = g_strconcat(g_get_home_dir(), + HISTORY_DIR, + NULL); + len = strlen(dirname); + + cache_link = xml_get_attr(line, "uri"); + uri = create_uri_from_filename(cache_link + + strlen("file://") + + len); + g_free(dirname); + } + else if (g_str_has_prefix(line, "</document>")) + { + gchar *thumb_filename, *thumb_uri; + thumb_filename = egg_pixbuf_get_thumb_filename(uri, + EGG_PIXBUF_THUMB_LARGE); + thumb_uri = g_strdup_printf("history-search:?image=%s", + thumb_filename); + g_string_append_printf(html, + CONTENT, + uri, + title, + thumb_uri, /* thumbnail */ + desc, + uri, + cache_link, + date); + + g_free(desc); + g_free(title); + g_free(uri); + g_free(date); + g_free(cache_link); + g_free(thumb_filename); + g_free(thumb_uri); + } + else if (g_str_has_prefix(line, "<attribute name=\"@title\"")) + { + title = xml_get_attr(line, "value"); + } + else if (g_str_has_prefix(line, "<attribute name=\"@mdate\"")) + { + date = xml_get_attr(line, "value"); + } + else if (g_str_has_prefix(line, "<snippet")) + { + gchar *summary = xml_get_content(line); + desc = remove_tag(summary, g_strlen(summary)); + g_free(summary); + } + g_free(line); + } + g_io_channel_unref(io); + g_string_append_printf(html, FOOTER, ESTRAIER_URI, estversion); + g_string_append(html, "</body></html>"); + + if (estversion) + g_free(estversion); + return g_string_free(html, FALSE); +} + + +gchar * +get_search_result_html (KzSearch *search, const gchar *text) +{ + gint out; + + if (!text) return NULL; + if (!exists_search_cmd) return NULL; + + if (!execute_search_command(text, &out)) + return NULL; + + return create_search_result_html(out, text); +} + +static gboolean +_update_index (gpointer data) +{ + const gchar *estindex_prefix = "estcmd gather -sd -cm "; + gchar *index_dir; + gchar *command; + gchar *filename; + gint in; + gint argc; + gchar **argv = NULL; + GSpawnFlags flags; + GPid pid; + GIOChannel *io; + + index_dir = g_strconcat(g_get_home_dir(), HISTORY_INDEX, NULL); + + command = g_strconcat(estindex_prefix, + index_dir, + " - ", /* read filename from stdin */ + NULL); + g_free(index_dir); + + g_shell_parse_argv(command, + &argc, + &argv, + NULL); + + flags = G_SPAWN_SEARCH_PATH | + G_SPAWN_STDOUT_TO_DEV_NULL; + g_spawn_async_with_pipes(NULL, + argv, + NULL, + flags, + NULL, + NULL, + &pid, + &in, + NULL, + NULL, + NULL); + g_strfreev(argv); + g_free(command); + + /* set filename to add index */ + filename = g_strconcat((gchar*)data, "\n", NULL); + io = g_io_channel_unix_new(in); + g_io_channel_set_encoding(io, NULL, NULL); + g_io_channel_write_chars(io, filename, strlen(filename), NULL, NULL); + g_io_channel_shutdown(io, TRUE, NULL); + g_io_channel_unref(io); + + g_free(filename); + g_free(data); + + return FALSE; +} +gboolean +register_document (KzSearch *search, const gchar *filename) +{ + g_idle_add (_update_index, (gpointer)filename); + + return TRUE; +} + +gboolean +update_index (KzSearch *search) +{ + return TRUE; +} + +static gboolean +purge_index (KzSearch *search) +{ + const gchar *estpurge = "estcmd purge "; + gchar *command; + gint argc; + gchar **argv = NULL; + GSpawnFlags flags; + GPid pid; + + /* purge index */ + flags = G_SPAWN_SEARCH_PATH | + G_SPAWN_STDOUT_TO_DEV_NULL; + command = g_strconcat(estpurge, + g_get_home_dir(), + HISTORY_INDEX, + NULL); + + g_shell_parse_argv(command, + &argc, + &argv, + NULL); + flags = G_SPAWN_SEARCH_PATH | + G_SPAWN_STDOUT_TO_DEV_NULL; + + g_spawn_async(NULL, + argv, + NULL, + flags, + NULL, + NULL, + &pid, + NULL); + + g_strfreev(argv); + g_free(command); + + return FALSE; +} + +static GPid +optimize_index (KzSearch *search) +{ + const gchar *estoptimize = "estcmd optimize "; + gchar *command; + gint argc; + gchar **argv = NULL; + GSpawnFlags flags; + GPid pid; + + /* optimize index process */ + command = g_strconcat(estoptimize, + g_get_home_dir(), HISTORY_INDEX, + NULL); + + g_shell_parse_argv(command, + &argc, + &argv, + NULL); + flags = G_SPAWN_SEARCH_PATH | + G_SPAWN_STDOUT_TO_DEV_NULL; + + g_spawn_async(NULL, + argv, + NULL, + flags, + NULL, + NULL, + &pid, + NULL); + g_strfreev(argv); + g_free(command); + + return pid; +} + +static gchar* +estsearch_get_version (void) +{ + gchar *version, *estcmd_output; + gchar **splited_estcmd_output = NULL; + const gchar *estversion = "estcmd version"; + gint argc; + gchar **argv = NULL; + GSpawnFlags flags; + GPid pid; + gint out, err; + gboolean ret; + GIOChannel *io; + gsize length; + + if (!exists_search_cmd) return NULL; + + g_shell_parse_argv(estversion, + &argc, + &argv, + NULL); + + flags = G_SPAWN_SEARCH_PATH; + ret = g_spawn_async_with_pipes(NULL, + argv, + NULL, + flags, + NULL, + NULL, + &pid, + NULL, + &out, + &err, + NULL); + g_strfreev(argv); + if (!ret) return NULL; + + io = g_io_channel_unix_new(out); + g_io_channel_set_encoding(io, NULL, NULL); + g_io_channel_read_line(io, &estcmd_output, &length, NULL, NULL); + g_io_channel_shutdown(io, TRUE, NULL); + g_io_channel_unref(io); + + splited_estcmd_output = g_strsplit(estcmd_output, " ", -1); + version = g_strdup(splited_estcmd_output[2]); + + g_free(estcmd_output); + g_strfreev(splited_estcmd_output); + + return version; +} + +static KzBookmark * +create_search_result_bookmark (gint out, const gchar *text) +{ + GIOChannel *io; + gchar *line; + gsize length; + gchar *title = NULL, *uri = NULL, *desc = NULL; + KzBookmark *result; + + io = g_io_channel_unix_new(out); + g_io_channel_set_encoding(io, NULL, NULL); + + result = kz_bookmark_pure_folder_new(); + + while (g_io_channel_read_line(io, &line, &length, NULL, NULL) == G_IO_STATUS_NORMAL) + { + if (g_str_has_prefix(line, "</document>")) + { + KzBookmark *child; + child = kz_bookmark_new_with_attrs(title, uri, desc); + kz_bookmark_append(result, child); + g_object_unref(child); + g_free(desc); + g_free(title); + g_free(uri); + } + else if (g_str_has_prefix(line, "<uri>")) + { + gchar *dirname, *orig_uri; + gchar *link; + size_t len; + link = xml_get_attr(line, "uri"); + dirname = g_strconcat(g_get_home_dir(), + HISTORY_DIR, + NULL); + len = strlen(dirname); + orig_uri = create_uri_from_filename(link + strlen("file://") + len); + uri = url_decode(orig_uri); + g_free(orig_uri); + g_free(dirname); + g_free(link); + } + else if (g_str_has_prefix(line, "<title>")) + { + title = xml_get_content(line); + } + else if (g_str_has_prefix(line, "<summary")) + { + gchar *summary = xml_get_content(line); + desc = remove_tag(summary, g_strlen(summary)); + g_free(summary); + } + g_free(line); + } + g_io_channel_unref(io); + + return result; +} + +static KzBookmark * +get_search_result_bookmark (KzSearch *search, const gchar *text) +{ + gint out; + + if (!text) return NULL; + if (!exists_search_cmd) return NULL; + + if (!execute_search_command(text, &out)) + return NULL; + + return create_search_result_bookmark(out, text); +} + +static void +make_index (KzSearch *search) +{ + const gchar *estgather = "estcmd gather -sd "; + gchar *command; + gint argc; + gchar **argv = NULL; + GSpawnFlags flags; + GPid pid; + + command = g_strconcat(estgather, + g_get_home_dir(), + HISTORY_INDEX" ", + g_get_home_dir(), + HISTORY_DIR, + NULL); + + g_shell_parse_argv(command, + &argc, + &argv, + NULL); + flags = G_SPAWN_SEARCH_PATH | + G_SPAWN_STDOUT_TO_DEV_NULL; + + g_spawn_async(NULL, + argv, + NULL, + flags, + NULL, + NULL, + &pid, + NULL); + g_strfreev(argv); + g_free(command); +} + +static gboolean +exist_index_dir(KzSearch *search) +{ + gchar *index_dir; + gboolean exist = FALSE; + + index_dir = g_build_filename(g_get_home_dir(), + HISTORY_INDEX, NULL); + exist = g_file_test(index_dir, G_FILE_TEST_IS_DIR); + g_free(index_dir); + + return exist; +} Index: kazehakase/module/search/estsearch.h diff -u /dev/null kazehakase/module/search/estsearch.h:1.1 --- /dev/null Sun Dec 3 12:35:34 2006 +++ kazehakase/module/search/estsearch.h Sun Dec 3 12:35:33 2006 @@ -0,0 +1,56 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ + +/* + * Copyright (C) 2004 Hiroyuki Ikezoe + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __ESTSEARCH_H__ +#define __ESTSEARCH_H__ + +#include <glib-object.h> +#include "kz-search.h" +#include "kz-bookmark.h" + +G_BEGIN_DECLS + +#define KZ_TYPE_SEARCH_EST (kz_search_est_get_type ()) +#define KZ_SEARCH_EST(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), KZ_TYPE_SEARCH_EST, KzSearchEst)) +#define KZ_SEARCH_EST_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), KZ_TYPE_SEARCH_EST, KzSearchEstClass)) +#define KZ_IS_SEARCH_EST(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), KZ_TYPE_SEARCH_EST)) +#define KZ_IS_SEARCH_EST_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), KZ_TYPE_SEARCH_EST)) +#define KZ_SEARCH_EST_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), KZ_TYPE_SEARCH_EST, KzSearchEstClass)) + +typedef struct _KzSearchEst KzSearchEst; +typedef struct _KzSearchEstClass KzSearchEstClass; + +struct _KzSearchEst +{ + KzSearch parent; +}; + +struct _KzSearchEstClass +{ + KzSearchClass parent_class; +}; + +GType kz_search_est_get_type (void) G_GNUC_CONST; + +KzSearch *kz_search_est_get_instance (void); + +G_END_DECLS + +#endif /* __ESTSEARCH_H__ */