Kouhei Sutou
kous****@users*****
2006年 11月 30日 (木) 11:22:31 JST
Index: tomoe/lib/Makefile.am diff -u tomoe/lib/Makefile.am:1.44 tomoe/lib/Makefile.am:1.45 --- tomoe/lib/Makefile.am:1.44 Wed Nov 29 11:53:11 2006 +++ tomoe/lib/Makefile.am Thu Nov 30 11:22:31 2006 @@ -79,7 +79,9 @@ tomoe-reading.c \ tomoe-recognizer.c \ tomoe-shelf.c \ - tomoe-writing.c + tomoe-writing.c \ + tomoe-xml-parser.c \ + tomoe-xml-parser.h libtomoe_la_LDFLAGS = \ -version-info $(LT_VERSION_INFO) \ Index: tomoe/lib/tomoe-char.c diff -u tomoe/lib/tomoe-char.c:1.56 tomoe/lib/tomoe-char.c:1.57 --- tomoe/lib/tomoe-char.c:1.56 Tue Nov 28 15:14:20 2006 +++ tomoe/lib/tomoe-char.c Thu Nov 30 11:22:31 2006 @@ -18,7 +18,7 @@ * Free Software Foundation, Inc., 59 Temple Place, Suite 330, * Boston, MA 02111-1307 USA * - * $Id: tomoe-char.c,v 1.56 2006/11/28 06:14:20 ikezoe Exp $ + * $Id: tomoe-char.c,v 1.57 2006/11/30 02:22:31 kous Exp $ */ #include <stdlib.h> @@ -28,6 +28,7 @@ #include "tomoe-char.h" #include "tomoe-dict.h" +#include "tomoe-xml-parser.h" #include "glib-utils.h" #define TOMOE_CHAR_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE ((obj), TOMOE_TYPE_CHAR, TomoeCharPrivate)) @@ -128,6 +129,12 @@ return g_object_new(TOMOE_TYPE_CHAR, NULL); } +TomoeChar* +tomoe_char_new_from_xml_data (const gchar *data, gssize len) +{ + return _tomoe_xml_parser_parse_char_data (data, len); +} + static void tomoe_char_dispose (GObject *object) { Index: tomoe/lib/tomoe-char.h diff -u tomoe/lib/tomoe-char.h:1.50 tomoe/lib/tomoe-char.h:1.51 --- tomoe/lib/tomoe-char.h:1.50 Tue Nov 28 14:18:29 2006 +++ tomoe/lib/tomoe-char.h Thu Nov 30 11:22:31 2006 @@ -18,7 +18,7 @@ * Free Software Foundation, Inc., 59 Temple Place, Suite 330, * Boston, MA 02111-1307 USA * - * $Id: tomoe-char.h,v 1.50 2006/11/28 05:18:29 ikezoe Exp $ + * $Id: tomoe-char.h,v 1.51 2006/11/30 02:22:31 kous Exp $ */ /** @file tomoe-char.h @@ -62,6 +62,8 @@ * @return Pointer to newly allocated TomoeChar struct. */ TomoeChar *tomoe_char_new (void); +TomoeChar *tomoe_char_new_from_xml_data (const gchar *data, + gssize len); const gchar *tomoe_char_get_utf8 (TomoeChar *chr); void tomoe_char_set_utf8 (TomoeChar *chr, Index: tomoe/lib/tomoe-xml-parser.c diff -u /dev/null tomoe/lib/tomoe-xml-parser.c:1.1 --- /dev/null Thu Nov 30 11:22:31 2006 +++ tomoe/lib/tomoe-xml-parser.c Thu Nov 30 11:22:31 2006 @@ -0,0 +1,455 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * Copyright (C) 2006 Takuro Ashie <ashie****@homa*****> + * Copyright (C) 2006 Kouhei Sutou <kou****@cozmi*****> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place, Suite 330, + * Boston, MA 02111-1307 USA + * + * $Id: tomoe-xml-parser.c,v 1.1 2006/11/30 02:22:31 kous Exp $ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <glib.h> + +#include <tomoe-xml-parser.h> +#include <glib-utils.h> + +typedef enum { + STATE_NONE, + STATE_UTF8, + STATE_N_STROKES, + STATE_STROKES, + STATE_READINGS, + STATE_READING, + STATE_WRITING, + STATE_STROKE, + STATE_POINT, + STATE_META +} TomoeCharState; + +typedef struct _ParseData +{ + TomoeXMLParsedData *result; + gboolean in_dict; + TomoeCharState state; + + TomoeChar *chr; + TomoeWriting *writing; + gint n_points; + TomoeReadingType reading_type; + + const gchar *filename; + + /* meta data */ + gchar *key; + gchar *value; +} ParseData; + +static void +set_parse_error (GMarkupParseContext *context, GError **error, + ParseData *data) +{ + gchar buf[1024]; + gint line = 0, chr = 0; + + if (!error) return; + + g_markup_parse_context_get_position (context, &line, &chr); + + g_snprintf (buf, G_N_ELEMENTS (buf), + "Invalid content at line %d char %d of %s.", + line, chr, data->filename); + + *error = g_error_new (G_MARKUP_ERROR, + G_MARKUP_ERROR_INVALID_CONTENT, + buf); +} + +static void +start_element_handler (GMarkupParseContext *context, + const gchar *element_name, + const gchar **attr_names, + const gchar **attr_values, + gpointer user_data, + GError **error) +{ + ParseData *data = user_data; + + if (!strcmp ("dictionary", element_name)) { + gint idx; + + for (idx = 0; attr_names && attr_names[idx]; idx++) { + if (!strcmp ("name", attr_names[idx])) { + g_free (data->result->name); + data->result->name = g_strdup (attr_values[idx]); + } + } + data->in_dict = TRUE; + return; + } + + if (!data->in_dict) { + set_parse_error (context, error, data); + return; + } + + if (!strcmp ("character", element_name)) { + data->chr = tomoe_char_new (); + return; + } + + if (!data->chr) { + set_parse_error (context, error, data); + return; + } + + if (!strcmp ("utf8", element_name)) { + data->state = STATE_UTF8; + return; + } + + if (!strcmp ("number-of-strokes", element_name)) { + data->state = STATE_N_STROKES; + return; + } + + if (!strcmp ("strokes", element_name)) { + data->state = STATE_WRITING; + data->writing = tomoe_writing_new (); + return; + } + + if (!strcmp ("stroke", element_name)) { + if (!data->writing) { + set_parse_error (context, error, data); + return; + } + data->state = STATE_STROKE; + data->n_points = 0; + return; + } + + if (!strcmp ("point", element_name)) { + gint idx, x = -1, y = -1; + + if (data->state != STATE_STROKE) { + set_parse_error (context, error, data); + return; + } + + data->state = STATE_POINT; + + for (idx = 0; attr_names && attr_names[idx]; idx++) { + if (!strcmp ("x", attr_names[idx])) { + x = atoi (attr_values[idx]); + } else if (!strcmp ("y", attr_names[idx])) { + y = atoi (attr_values[idx]); + } + } + + if (x < 0 || y < 0 || x >= 1000 || y >= 1000) { + g_warning ("Invalid writing data: %s: x = %d, y = %d\n", + tomoe_char_get_utf8 (data->chr), x, y); + } + + if (data->n_points == 0) + tomoe_writing_move_to (data->writing, x, y); + else + tomoe_writing_line_to (data->writing, x, y); + + data->n_points++; + return; + } + + if (!strcmp ("readings", element_name)) { + data->state = STATE_READINGS; + return; + } + + if (!strcmp ("reading", element_name)) { + gint idx; + + if (data->state != STATE_READINGS) { + set_parse_error (context, error, data); + return; + } + + data->state = STATE_READING; + data->reading_type = TOMOE_READING_UNKNOWN; + + for (idx = 0; attr_names && attr_names[idx]; idx++) { + if (!strcmp ("type", attr_names[idx])) { + if (!strcmp ("ja_on", attr_values[idx])) + data->reading_type = TOMOE_READING_JA_ON; + else if (!strcmp ("ja_kun", attr_values[idx])) + data->reading_type = TOMOE_READING_JA_KUN; + } + } + + return; + } + + if (!strcmp ("meta", element_name)) { + data->state = STATE_META; + return; + } + + if (data->state == STATE_META) { + g_free (data->key); + g_free (data->value); + data->key = g_strdup (element_name); + data->value = NULL; + } + + /* throw error? */ +} + +static void +end_element_handler (GMarkupParseContext *context, + const gchar *element_name, + gpointer user_data, + GError **error) +{ + ParseData *data = user_data; + + if (!strcmp ("dictionary", element_name)) { + data->in_dict = FALSE; + return; + } + + if (!strcmp ("character", element_name)) { + if (tomoe_char_get_utf8 (data->chr)) + g_ptr_array_add (data->result->chars, data->chr); + else + g_object_unref (G_OBJECT (data->chr)); + data->chr = NULL; + return; + } + + if (!strcmp("utf8", element_name)) { + data->state = STATE_NONE; + return; + } + + if (!strcmp ("number-of-strokes", element_name)) { + data->state = STATE_NONE; + return; + } + + if (!strcmp ("strokes", element_name)) { + if (data->chr && data->writing) + tomoe_char_set_writing (data->chr, data->writing); + data->writing = NULL; + data->state = STATE_NONE; + return; + } + + if (!strcmp ("stroke", element_name)) { + data->state = STATE_WRITING; + data->n_points = 0; + return; + } + + if (!strcmp ("point", element_name)) { + data->state = STATE_STROKE; + return; + } + + if (!strcmp ("readings", element_name)) { + data->state = STATE_NONE; + data->reading_type = TOMOE_READING_INVALID; + return; + } + + if (!strcmp ("reading", element_name)) { + data->state = STATE_READINGS; + return; + } + + if (!strcmp ("meta", element_name)) { + data->state = STATE_NONE; + return; + } + + if (data->state == STATE_META) { + if (data->chr && data->key && data->value) + tomoe_char_register_meta_data (data->chr, data->key, data->value); + g_free (data->key); + g_free (data->value); + data->key = NULL; + data->value = NULL; + } +} + +static void +text_handler (GMarkupParseContext *context, + const gchar *text, + gsize text_len, + gpointer user_data, + GError **error) +{ + ParseData *data = user_data; + + switch (data->state) { + case STATE_UTF8: + { + tomoe_char_set_utf8 (data->chr, text); + return; + } + case STATE_N_STROKES: + { + tomoe_char_set_n_strokes (data->chr, atoi (text)); + return; + } + case STATE_READING: + { + TomoeReading *reading; + + reading = tomoe_reading_new (data->reading_type, text); + tomoe_char_add_reading (data->chr, reading); + g_object_unref (reading); + return; + } + case STATE_META: + { + g_free (data->value); + data->value = g_strdup (text); + return; + } + default: + break; + } +} + +static void +passthrough_handler (GMarkupParseContext *context, + const gchar *text, + gsize text_len, + gpointer user_data, + GError **error) +{ +} + +static void +error_handler (GMarkupParseContext *context, + GError *error, + gpointer user_data) +{ +} + +static GMarkupParser parser = { + start_element_handler, + end_element_handler, + text_handler, + passthrough_handler, + error_handler, +}; + + +static void +init_parse_data (ParseData *data, TomoeXMLParsedData *result, + const gchar *filename) +{ + data->result = result; + data->in_dict = FALSE; + data->state = STATE_NONE; + data->chr = NULL; + data->writing = NULL; + data->filename = filename; + data->key = NULL; + data->value = NULL; + data->n_points = 0; + data->reading_type = TOMOE_READING_INVALID; +} + +gboolean +_tomoe_xml_parser_parse_dictionary_file (const gchar *filename, + TomoeXMLParsedData *result) +{ + GMarkupParseContext *context; + FILE *f; + gint bytes; + gchar buf[4096]; + ParseData data; + gboolean retval = TRUE; + + f = fopen (filename, "rb"); + g_return_val_if_fail (f, FALSE); + + init_parse_data (&data, result, filename); + + context = g_markup_parse_context_new (&parser, 0, &data, NULL); + + while ((bytes = fread (buf, sizeof (gchar), 4096, f)) > 0) { + GError *error = NULL; + gboolean success; + + success = g_markup_parse_context_parse(context, buf, bytes, &error); + if (!success) { + g_warning("Tomoe XML Dictionary: %s", error->message); + g_error_free(error); + retval = FALSE; + break; + } + } + + fclose (f); + g_markup_parse_context_free (context); + + return retval; +} + +TomoeChar * +_tomoe_xml_parser_parse_char_data (const gchar *xml, gssize len) +{ + GMarkupParseContext *context; + TomoeXMLParsedData result; + TomoeChar *chr = NULL; + ParseData data; + gboolean success; + GError *error = NULL; + + result.chars = g_ptr_array_new (); + + init_parse_data (&data, &result, NULL); + data.in_dict = TRUE; + + context = g_markup_parse_context_new (&parser, 0, &data, NULL); + + if (len == -1) + len = strlen (xml); + + success = g_markup_parse_context_parse(context, xml, len, &error); + if (!success) { + g_warning("Tomoe XML Dictionary: %s", error->message); + g_error_free(error); + } + + g_markup_parse_context_free (context); + + if (success && result.chars->len > 0) { + chr = g_ptr_array_remove_index (result.chars, 0); + } + TOMOE_PTR_ARRAY_FREE_ALL (result.chars, (GFunc) g_object_unref); + + return chr; +} + +/* +vi:ts=4:nowrap:ai:expandtab +*/ Index: tomoe/lib/tomoe-xml-parser.h diff -u /dev/null tomoe/lib/tomoe-xml-parser.h:1.1 --- /dev/null Thu Nov 30 11:22:31 2006 +++ tomoe/lib/tomoe-xml-parser.h Thu Nov 30 11:22:31 2006 @@ -0,0 +1,57 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * Copyright (C) 2006 Kouhei Sutou <kou****@cozmi*****> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place, Suite 330, + * Boston, MA 02111-1307 USA + * + * $Id: tomoe-xml-parser.h,v 1.1 2006/11/30 02:22:31 kous Exp $ + */ + +/** + * @file tomoe-xml-parser.h + * @brief Provide a set of API to parse XML data. + */ + +#ifndef __TOMOE_XML_PARSER_H__ +#define __TOMOE_XML_PARSER_H__ + +#include <glib.h> + +G_BEGIN_DECLS + +#include <tomoe-char.h> + +typedef struct _TomoeXMLParsedData TomoeXMLParsedData; + +struct _TomoeXMLParsedData +{ + gchar *name; + GPtrArray *chars; +}; + +gboolean _tomoe_xml_parser_parse_dictionary_file (const gchar *filename, + TomoeXMLParsedData *result); +TomoeChar *_tomoe_xml_parser_parse_char_data (const gchar *xml, + gssize len); + + +G_END_DECLS + +#endif /* __TOMOE_XML_PARSER_H__ */ + +/* +vi:ts=4:nowrap:ai:expandtab +*/