• R/O
  • SSH
  • HTTPS

pykf: Commit


Commit MetaInfo

Révision10 (tree)
l'heure2010-04-08 00:11:54
Auteurishimoto

Message de Log

tagging 0.3.5

Change Summary

Modification

--- tags/0.3.5/ez_setup.py (nonexistent)
+++ tags/0.3.5/ez_setup.py (revision 10)
@@ -0,0 +1,284 @@
1+#!python
2+"""Bootstrap setuptools installation
3+
4+If you want to use setuptools in your package's setup.py, just include this
5+file in the same directory with it, and add this to the top of your setup.py::
6+
7+ from ez_setup import use_setuptools
8+ use_setuptools()
9+
10+If you want to require a specific version of setuptools, set a download
11+mirror, or use an alternate download directory, you can do so by supplying
12+the appropriate options to ``use_setuptools()``.
13+
14+This file can also be run as a script to install or upgrade setuptools.
15+"""
16+import sys
17+DEFAULT_VERSION = "0.6c11"
18+DEFAULT_URL = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[:3]
19+
20+md5_data = {
21+ 'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca',
22+ 'setuptools-0.6b1-py2.4.egg': 'b79a8a403e4502fbb85ee3f1941735cb',
23+ 'setuptools-0.6b2-py2.3.egg': '5657759d8a6d8fc44070a9d07272d99b',
24+ 'setuptools-0.6b2-py2.4.egg': '4996a8d169d2be661fa32a6e52e4f82a',
25+ 'setuptools-0.6b3-py2.3.egg': 'bb31c0fc7399a63579975cad9f5a0618',
26+ 'setuptools-0.6b3-py2.4.egg': '38a8c6b3d6ecd22247f179f7da669fac',
27+ 'setuptools-0.6b4-py2.3.egg': '62045a24ed4e1ebc77fe039aa4e6f7e5',
28+ 'setuptools-0.6b4-py2.4.egg': '4cb2a185d228dacffb2d17f103b3b1c4',
29+ 'setuptools-0.6c1-py2.3.egg': 'b3f2b5539d65cb7f74ad79127f1a908c',
30+ 'setuptools-0.6c1-py2.4.egg': 'b45adeda0667d2d2ffe14009364f2a4b',
31+ 'setuptools-0.6c10-py2.3.egg': 'ce1e2ab5d3a0256456d9fc13800a7090',
32+ 'setuptools-0.6c10-py2.4.egg': '57d6d9d6e9b80772c59a53a8433a5dd4',
33+ 'setuptools-0.6c10-py2.5.egg': 'de46ac8b1c97c895572e5e8596aeb8c7',
34+ 'setuptools-0.6c10-py2.6.egg': '58ea40aef06da02ce641495523a0b7f5',
35+ 'setuptools-0.6c11-py2.3.egg': '2baeac6e13d414a9d28e7ba5b5a596de',
36+ 'setuptools-0.6c11-py2.4.egg': 'bd639f9b0eac4c42497034dec2ec0c2b',
37+ 'setuptools-0.6c11-py2.5.egg': '64c94f3bf7a72a13ec83e0b24f2749b2',
38+ 'setuptools-0.6c11-py2.6.egg': 'bfa92100bd772d5a213eedd356d64086',
39+ 'setuptools-0.6c2-py2.3.egg': 'f0064bf6aa2b7d0f3ba0b43f20817c27',
40+ 'setuptools-0.6c2-py2.4.egg': '616192eec35f47e8ea16cd6a122b7277',
41+ 'setuptools-0.6c3-py2.3.egg': 'f181fa125dfe85a259c9cd6f1d7b78fa',
42+ 'setuptools-0.6c3-py2.4.egg': 'e0ed74682c998bfb73bf803a50e7b71e',
43+ 'setuptools-0.6c3-py2.5.egg': 'abef16fdd61955514841c7c6bd98965e',
44+ 'setuptools-0.6c4-py2.3.egg': 'b0b9131acab32022bfac7f44c5d7971f',
45+ 'setuptools-0.6c4-py2.4.egg': '2a1f9656d4fbf3c97bf946c0a124e6e2',
46+ 'setuptools-0.6c4-py2.5.egg': '8f5a052e32cdb9c72bcf4b5526f28afc',
47+ 'setuptools-0.6c5-py2.3.egg': 'ee9fd80965da04f2f3e6b3576e9d8167',
48+ 'setuptools-0.6c5-py2.4.egg': 'afe2adf1c01701ee841761f5bcd8aa64',
49+ 'setuptools-0.6c5-py2.5.egg': 'a8d3f61494ccaa8714dfed37bccd3d5d',
50+ 'setuptools-0.6c6-py2.3.egg': '35686b78116a668847237b69d549ec20',
51+ 'setuptools-0.6c6-py2.4.egg': '3c56af57be3225019260a644430065ab',
52+ 'setuptools-0.6c6-py2.5.egg': 'b2f8a7520709a5b34f80946de5f02f53',
53+ 'setuptools-0.6c7-py2.3.egg': '209fdf9adc3a615e5115b725658e13e2',
54+ 'setuptools-0.6c7-py2.4.egg': '5a8f954807d46a0fb67cf1f26c55a82e',
55+ 'setuptools-0.6c7-py2.5.egg': '45d2ad28f9750e7434111fde831e8372',
56+ 'setuptools-0.6c8-py2.3.egg': '50759d29b349db8cfd807ba8303f1902',
57+ 'setuptools-0.6c8-py2.4.egg': 'cba38d74f7d483c06e9daa6070cce6de',
58+ 'setuptools-0.6c8-py2.5.egg': '1721747ee329dc150590a58b3e1ac95b',
59+ 'setuptools-0.6c9-py2.3.egg': 'a83c4020414807b496e4cfbe08507c03',
60+ 'setuptools-0.6c9-py2.4.egg': '260a2be2e5388d66bdaee06abec6342a',
61+ 'setuptools-0.6c9-py2.5.egg': 'fe67c3e5a17b12c0e7c541b7ea43a8e6',
62+ 'setuptools-0.6c9-py2.6.egg': 'ca37b1ff16fa2ede6e19383e7b59245a',
63+}
64+
65+import sys, os
66+try: from hashlib import md5
67+except ImportError: from md5 import md5
68+
69+def _validate_md5(egg_name, data):
70+ if egg_name in md5_data:
71+ digest = md5(data).hexdigest()
72+ if digest != md5_data[egg_name]:
73+ print >>sys.stderr, (
74+ "md5 validation of %s failed! (Possible download problem?)"
75+ % egg_name
76+ )
77+ sys.exit(2)
78+ return data
79+
80+def use_setuptools(
81+ version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
82+ download_delay=15
83+):
84+ """Automatically find/download setuptools and make it available on sys.path
85+
86+ `version` should be a valid setuptools version number that is available
87+ as an egg for download under the `download_base` URL (which should end with
88+ a '/'). `to_dir` is the directory where setuptools will be downloaded, if
89+ it is not already available. If `download_delay` is specified, it should
90+ be the number of seconds that will be paused before initiating a download,
91+ should one be required. If an older version of setuptools is installed,
92+ this routine will print a message to ``sys.stderr`` and raise SystemExit in
93+ an attempt to abort the calling script.
94+ """
95+ was_imported = 'pkg_resources' in sys.modules or 'setuptools' in sys.modules
96+ def do_download():
97+ egg = download_setuptools(version, download_base, to_dir, download_delay)
98+ sys.path.insert(0, egg)
99+ import setuptools; setuptools.bootstrap_install_from = egg
100+ try:
101+ import pkg_resources
102+ except ImportError:
103+ return do_download()
104+ try:
105+ pkg_resources.require("setuptools>="+version); return
106+ except pkg_resources.VersionConflict, e:
107+ if was_imported:
108+ print >>sys.stderr, (
109+ "The required version of setuptools (>=%s) is not available, and\n"
110+ "can't be installed while this script is running. Please install\n"
111+ " a more recent version first, using 'easy_install -U setuptools'."
112+ "\n\n(Currently using %r)"
113+ ) % (version, e.args[0])
114+ sys.exit(2)
115+ else:
116+ del pkg_resources, sys.modules['pkg_resources'] # reload ok
117+ return do_download()
118+ except pkg_resources.DistributionNotFound:
119+ return do_download()
120+
121+def download_setuptools(
122+ version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
123+ delay = 15
124+):
125+ """Download setuptools from a specified location and return its filename
126+
127+ `version` should be a valid setuptools version number that is available
128+ as an egg for download under the `download_base` URL (which should end
129+ with a '/'). `to_dir` is the directory where the egg will be downloaded.
130+ `delay` is the number of seconds to pause before an actual download attempt.
131+ """
132+ import urllib2, shutil
133+ egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3])
134+ url = download_base + egg_name
135+ saveto = os.path.join(to_dir, egg_name)
136+ src = dst = None
137+ if not os.path.exists(saveto): # Avoid repeated downloads
138+ try:
139+ from distutils import log
140+ if delay:
141+ log.warn("""
142+---------------------------------------------------------------------------
143+This script requires setuptools version %s to run (even to display
144+help). I will attempt to download it for you (from
145+%s), but
146+you may need to enable firewall access for this script first.
147+I will start the download in %d seconds.
148+
149+(Note: if this machine does not have network access, please obtain the file
150+
151+ %s
152+
153+and place it in this directory before rerunning this script.)
154+---------------------------------------------------------------------------""",
155+ version, download_base, delay, url
156+ ); from time import sleep; sleep(delay)
157+ log.warn("Downloading %s", url)
158+ src = urllib2.urlopen(url)
159+ # Read/write all in one block, so we don't create a corrupt file
160+ # if the download is interrupted.
161+ data = _validate_md5(egg_name, src.read())
162+ dst = open(saveto,"wb"); dst.write(data)
163+ finally:
164+ if src: src.close()
165+ if dst: dst.close()
166+ return os.path.realpath(saveto)
167+
168+
169+
170+
171+
172+
173+
174+
175+
176+
177+
178+
179+
180+
181+
182+
183+
184+
185+
186+
187+
188+
189+
190+
191+
192+
193+
194+
195+
196+
197+
198+
199+
200+
201+
202+
203+def main(argv, version=DEFAULT_VERSION):
204+ """Install or upgrade setuptools and EasyInstall"""
205+ try:
206+ import setuptools
207+ except ImportError:
208+ egg = None
209+ try:
210+ egg = download_setuptools(version, delay=0)
211+ sys.path.insert(0,egg)
212+ from setuptools.command.easy_install import main
213+ return main(list(argv)+[egg]) # we're done here
214+ finally:
215+ if egg and os.path.exists(egg):
216+ os.unlink(egg)
217+ else:
218+ if setuptools.__version__ == '0.0.1':
219+ print >>sys.stderr, (
220+ "You have an obsolete version of setuptools installed. Please\n"
221+ "remove it from your system entirely before rerunning this script."
222+ )
223+ sys.exit(2)
224+
225+ req = "setuptools>="+version
226+ import pkg_resources
227+ try:
228+ pkg_resources.require(req)
229+ except pkg_resources.VersionConflict:
230+ try:
231+ from setuptools.command.easy_install import main
232+ except ImportError:
233+ from easy_install import main
234+ main(list(argv)+[download_setuptools(delay=0)])
235+ sys.exit(0) # try to force an exit
236+ else:
237+ if argv:
238+ from setuptools.command.easy_install import main
239+ main(argv)
240+ else:
241+ print "Setuptools version",version,"or greater has been installed."
242+ print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)'
243+
244+def update_md5(filenames):
245+ """Update our built-in md5 registry"""
246+
247+ import re
248+
249+ for name in filenames:
250+ base = os.path.basename(name)
251+ f = open(name,'rb')
252+ md5_data[base] = md5(f.read()).hexdigest()
253+ f.close()
254+
255+ data = [" %r: %r,\n" % it for it in md5_data.items()]
256+ data.sort()
257+ repl = "".join(data)
258+
259+ import inspect
260+ srcfile = inspect.getsourcefile(sys.modules[__name__])
261+ f = open(srcfile, 'rb'); src = f.read(); f.close()
262+
263+ match = re.search("\nmd5_data = {\n([^}]+)}", src)
264+ if not match:
265+ print >>sys.stderr, "Internal error!"
266+ sys.exit(2)
267+
268+ src = src[:match.start(1)] + repl + src[match.end(1):]
269+ f = open(srcfile,'w')
270+ f.write(src)
271+ f.close()
272+
273+
274+if __name__=='__main__':
275+ if len(sys.argv)>2 and sys.argv[1]=='--md5update':
276+ update_md5(sys.argv[2:])
277+ else:
278+ main(sys.argv[1:])
279+
280+
281+
282+
283+
284+
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
--- tags/0.3.5/setup.py (nonexistent)
+++ tags/0.3.5/setup.py (revision 10)
@@ -0,0 +1,24 @@
1+#!/usr/bin/env python
2+
3+from distutils.core import setup, Extension
4+
5+setup (name = "pykf",
6+ version = "0.3.5",
7+ description = "Japanese Kanji code filter",
8+ author = "Atsuo Ishimoto",
9+ author_email = "ishimoto@gembook.org",
10+ url = "http://sourceforge.jp/projects/pykf/",
11+ classifiers = [
12+ "Intended Audience :: Developers",
13+ "License :: OSI Approved :: MIT License",
14+ "Programming Language :: Python",
15+ "Topic :: Software Development :: Libraries :: Python Modules"],
16+ license="MIT License",
17+ ext_modules = [
18+ Extension("pykf",
19+ [
20+ "src/pykf.c",
21+ "src/converter.c",
22+ "src/jis0213.c",
23+ "src/mskanji.c",
24+ ])])
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
--- tags/0.3.5/README (nonexistent)
+++ tags/0.3.5/README (revision 10)
@@ -0,0 +1,40 @@
1+pykf --- Japanese character encoding converter
2+==============================================
3+
4+pykf is Japanese character encodings(ShiftJIS, JIS, EUC-JP) converter
5+for Python. See readme.sjis for detail.
6+
7+Installing
8+==========
9+
10+run "python setup.py install".
11+
12+Copyright
13+=========
14+
15+Japanese Kanji filter module
16+ Copyright (c) 2002-2010, Atsuo Ishimoto. All rights reserved.
17+
18+Permission to use, copy, modify, and distribute this software and its
19+documentation for any purpose and without fee is hereby granted, provided that
20+the above copyright notice appear in all copies and that both that copyright
21+notice and this permission notice appear in supporting documentation, and
22+that the name of Atsuo Ishimoto not be used in advertising or publicity
23+pertaining to distribution of the software without specific, written prior
24+permission.
25+
26+ATSUO ISHIMOTO DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
27+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
28+EVENT SHALL ATSUO ISHIMOTO BE LIABLE FOR ANY SPECIAL, INDIRECT OR
29+CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
30+USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
31+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
32+PERFORMANCE OF THIS SOFTWARE.
33+
34+---------------------------------------------------------------------
35+This module is besed on kf.c written by Haruhiko Okumura.
36+ Copyright (c) 1995-2000 Haruhiko Okumura
37+ This file may be freely modified/redistributed.
38+
39+Original kf.c:
40+ http://www.matsusaka-u.ac.jp/~okumura/kf.html
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
--- tags/0.3.5/src/convert.h (nonexistent)
+++ tags/0.3.5/src/convert.h (revision 10)
@@ -0,0 +1,19 @@
1+#define isjis(c) (((c)>=0x21 && (c)<=0x7e))
2+#define iseuc(c) (((c)>=0xa1 && (c)<=0xfe))
3+
4+#define isgaiji1(c) ((c)>=0xf0 && (c)<=0xf9)
5+#define isibmgaiji1(c) ((c)>=0xfa && (c)<=0xfc)
6+#define issjis1(c) (((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xef) || isgaiji1(c) || isibmgaiji1(c))
7+#define issjis2(c) ((c)>=0x40 && (c)<=0xfc && (c)!=0x7f)
8+
9+#define ishankana(c) ((c)>=0xa0 && (c)<=0xdf)
10+
11+#define isutf8_2byte(c) (0xc0<=c && c <= 0xdf)
12+#define isutf8_3byte(c) (0xe0<=c && c <= 0xef)
13+#define isutf8_4byte(c) (0xf0<=c && c <= 0xf7)
14+#define isutf8_5byte(c) (0xf8<=c && c <= 0xfb)
15+#define isutf8_6byte(c) (0xfc<=c && c <= 0xfd)
16+#define isutf8_trail(c) (0x80<=c && c <= 0xbf)
17+
18+#define utf8_len(c) (isutf8_2byte(c)?2:isutf8_3byte(c)?3:isutf8_4byte(c)?4:isutf8_5byte(c)?5:isutf8_6byte(c)?6:0)
19+#define CONV_FAILED 0x222e
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
--- tags/0.3.5/src/converter.c (nonexistent)
+++ tags/0.3.5/src/converter.c (revision 10)
@@ -0,0 +1,1445 @@
1+/*********************************************************************
2+
3+Japanese Kanji filter module
4+ Copyright (c) 2002, Atsuo Ishimoto. All rights reserved.
5+
6+Permission to use, copy, modify, and distribute this software and its
7+documentation for any purpose and without fee is hereby granted, provided that
8+the above copyright notice appear in all copies and that both that copyright
9+notice and this permission notice appear in supporting documentation, and that
10+the name of Atsuo Ishimoto not be used in advertising or publicity pertaining
11+to distribution of the software without specific, written prior permission.
12+
13+ATSUO ISHIMOTO DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
14+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
15+EVENT SHALL ATSUO ISHIMOTO BE LIABLE FOR ANY SPECIAL, INDIRECT OR
16+CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
17+USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
18+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
19+PERFORMANCE OF THIS SOFTWARE.
20+
21+---------------------------------------------------------------------
22+This module is besed on kf.c written by Haruhiko Okumura.
23+ Copyright (c) 1995-2000 Haruhiko Okumura
24+ This file may be freely modified/redistributed.
25+
26+Original kf.c:
27+ http://www.matsusaka-u.ac.jp/~okumura/kf.html
28+
29+*********************************************************************/
30+
31+#include <stdio.h>
32+#include <stdlib.h>
33+#include <memory.h>
34+#include <string.h>
35+#include <assert.h>
36+
37+#include "pykf.h"
38+
39+#if defined(_MSC_VER)
40+#define LOCAL_INLINE __inline static
41+#endif
42+
43+#if !defined(__cplusplus) && !defined(inline)
44+#ifdef __GNUC__
45+#define LOCAL_INLINE __inline static
46+#endif
47+#endif
48+
49+#if !defined(LOCAL_INLINE)
50+#define LOCAL_INLINE static
51+#endif
52+
53+#include "convert.h"
54+
55+
56+
57+
58+int guess(int imax, unsigned char buf[], int strict)
59+{
60+ int i, n;
61+ int ascii, euc, sjis, utf8, bad_euc, bad_sjis, bad_utf8;
62+ int jis, hankana;
63+ int sjis_error, euc_error, utf8_error;
64+
65+ ascii = 1;
66+ bad_euc=euc=0;
67+ bad_sjis=sjis=0;
68+ bad_utf8 = utf8=0;
69+ jis = 0;
70+ sjis_error = euc_error = utf8_error = 0;
71+
72+ /* check BOM */
73+ if (imax >= 2) {
74+ if (buf[0] == 0xff && buf[1] == 0xfe) {
75+ return UTF16_LE;
76+ }
77+ else if (buf[0] == 0xfe && buf[1] == 0xff) {
78+ return UTF16_BE;
79+ }
80+ }
81+ if (imax >= 3 && !memcmp(buf, "\xef\xbb\xbf", 3)) {
82+ return UTF8;
83+ }
84+
85+ // check SJIS
86+ hankana = 0;
87+ for (i = 0; i < imax; i++) {
88+
89+ if (buf[i] >= 0x80) {
90+ ascii = 0;
91+ }
92+
93+ if (buf[i] == 0x1b) {
94+ jis= 1;
95+ }
96+
97+ if (buf[i] == 0x8e ) {
98+ // looks like euc.
99+ if (i + 2 < imax) {
100+ if (buf[i+2]==0x8e && ishankana(buf[i+1])) {
101+ bad_sjis += 1;
102+ }
103+ }
104+ }
105+
106+ if (ishankana(buf[i])) {
107+ sjis += 0x10/2-1;
108+ hankana++;
109+ }
110+ else {
111+ if (hankana == 1) {
112+ // single halfwidth-kana is bad sign.
113+ bad_sjis++;
114+ }
115+ hankana = 0;
116+
117+ if (issjis1(buf[i])) {
118+ if (i+1 >= imax) {
119+ if (strict) {
120+ sjis_error = 1;
121+ break;
122+ }
123+ bad_sjis+=0x100;
124+ }
125+ else if (issjis2(buf[i+1])) {
126+ sjis += 0x10;
127+ i++;
128+ }
129+ else {
130+ if (strict) {
131+ sjis_error = 1;
132+ break;
133+ }
134+ bad_sjis += 0x100;
135+ }
136+ }
137+ else if (buf[i] >= 0x80) {
138+ if (strict) {
139+ sjis_error = 1;
140+ break;
141+ }
142+ bad_sjis += 0x100;
143+ }
144+ }
145+ }
146+
147+ if (ascii && jis) {
148+ return JIS;
149+ }
150+
151+ if (ascii) {
152+ return ASCII;
153+ }
154+
155+ // check EUC-JP
156+ hankana=0;
157+ for (i = 0; i < imax; i++) {
158+ if (buf[i] == 0x8e) {
159+ if (i+1 >= imax) {
160+ if (strict) {
161+ euc_error = 1;
162+ break;
163+ }
164+ bad_euc += 0x100;
165+ }
166+ else if (ishankana(buf[i+1])) {
167+ euc+=10;
168+ i++;
169+ hankana++;
170+ }
171+ else {
172+ if (strict) {
173+ euc_error = 1;
174+ break;
175+ }
176+ bad_euc += 0x100;
177+ }
178+ }
179+ else {
180+ if (hankana == 1) {
181+ bad_euc++;
182+ }
183+ hankana = 0;
184+ if (iseuc(buf[i])) {
185+ if (i+1 >= imax) {
186+ if (strict) {
187+ euc_error = 1;
188+ break;
189+ }
190+ bad_euc+=0x100;
191+ }
192+ else if (iseuc(buf[i+1])) {
193+ i++;
194+ euc+=0x10;
195+ }
196+ else {
197+ if (strict) {
198+ euc_error = 1;
199+ break;
200+ }
201+ bad_euc+=0x100;
202+ }
203+ }
204+ else if (buf[i] == 0x8f) {
205+ if (i+2 >= imax) {
206+ if (strict) {
207+ euc_error = 1;
208+ break;
209+ }
210+ bad_euc+=0x100;
211+ }
212+ else if (iseuc(buf[i+1]) && iseuc(buf[i+2])) {
213+ i+=2;
214+ euc+=0x10;
215+ }
216+ else {
217+ if (strict) {
218+ euc_error = 1;
219+ break;
220+ }
221+ bad_euc+=100;
222+ }
223+ }
224+ else if (buf[i] >= 0x80) {
225+ if (strict) {
226+ euc_error = 1;
227+ break;
228+ }
229+ bad_euc+=0x100;
230+ }
231+ }
232+ }
233+
234+ // check UTF-8
235+ for (i = 0; i < imax; i++) {
236+ int c_len;
237+ c_len = utf8_len(buf[i]);
238+ if (c_len) {
239+ if (i+c_len-1 >= imax) {
240+ if (strict) {
241+ utf8_error = 1;
242+ break;
243+ }
244+ bad_utf8 += 1000;
245+ }
246+ i++;
247+ for (n=0; n < c_len-1; n++) {
248+ if (!isutf8_trail(buf[i+n])) {
249+ if (strict) {
250+ utf8_error = 1;
251+ }
252+ else {
253+ bad_utf8 += 1000;
254+ }
255+ break;
256+ }
257+ }
258+
259+ if (utf8_error) {
260+ break;
261+ }
262+
263+ if (n == (c_len-1)) {
264+ /* no error */
265+ utf8 += (int)(0x10 * c_len/2.0+1); /* prefer utf-8 over SJIS/EUC a bit....*/
266+ i += (c_len-2);
267+ }
268+ } else if (buf[i] >= 0x80) {
269+ if (strict) {
270+ utf8_error = 1;
271+ break;
272+ }
273+ bad_utf8 += 1000;
274+ }
275+ }
276+/*
277+ printf("sjis_error:%d euc_error:%d, utf8_error:%d\n", sjis_error, euc_error, utf8_error);
278+ printf("sjis:%d euc:%d, utf8:%d\n", sjis, euc, utf8);
279+ printf("bad_sjis:%d bad_euc:%d, bad_utf8:%d\n", bad_sjis, bad_euc, bad_utf8);
280+*/
281+
282+ if (sjis_error && euc_error && utf8_error) {
283+ return ERROR;
284+ }
285+
286+ if (sjis_error) {
287+ if (euc_error) {
288+ return UTF8;
289+ }
290+ if (utf8_error) {
291+ return EUC;
292+ }
293+ if (euc-bad_euc > utf8-bad_utf8)
294+ return EUC;
295+ else if (euc-bad_euc < utf8-bad_utf8)
296+ return UTF8;
297+ }
298+
299+ if (euc_error) {
300+ if (sjis_error) {
301+ return UTF8;
302+ }
303+ if (utf8_error) {
304+ return SJIS;
305+ }
306+ if (sjis-bad_sjis > utf8-bad_utf8)
307+ return SJIS;
308+ else if (sjis-bad_sjis < utf8-bad_utf8)
309+ return UTF8;
310+ }
311+
312+ if (utf8_error) {
313+ if (sjis_error) {
314+ return EUC;
315+ }
316+ if (euc_error) {
317+ return SJIS;
318+ }
319+ if (sjis-bad_sjis > euc-bad_euc)
320+ return SJIS;
321+ else
322+ return EUC;
323+ }
324+
325+ if (sjis-bad_sjis > euc-bad_euc) {
326+ if (sjis-bad_sjis > utf8-bad_utf8)
327+ return SJIS;
328+ else if (sjis-bad_sjis < utf8-bad_utf8)
329+ return UTF8;
330+ }
331+
332+ if (sjis-bad_sjis < euc-bad_euc) {
333+ if (euc-bad_euc > utf8-bad_utf8)
334+ return EUC;
335+ else if (euc-bad_euc < utf8-bad_utf8)
336+ return UTF8;
337+ }
338+ return UNKNOWN;
339+}
340+
341+LOCAL_INLINE
342+void jis_to_sjis2(unsigned char *ph, unsigned char *pl);
343+
344+LOCAL_INLINE
345+int isjis0213(unsigned char h, unsigned char l) {
346+ int *p;
347+ int jis = (h << 8 | l) & 0xffff;
348+
349+ for (p=tbl_jis0213; *(p+2) < jis; p+=2);
350+
351+ if (*p <= jis && (jis < (p[0] + p[1]))) {
352+ return 1;
353+ }
354+ else {
355+ return 0;
356+ }
357+}
358+
359+
360+LOCAL_INLINE
361+int mskanji_to_jis(unsigned char *ph, unsigned char *pl) {
362+ int *p;
363+ int sjis = (*ph << 8 | *pl) & 0xffff;
364+
365+ if (isgaiji1(*ph)) {
366+ *ph = (CONV_FAILED >> 8) & 0xff;
367+ *pl = CONV_FAILED & 0xff;
368+ return 1;
369+ }
370+
371+ for (p=tbl_sjis2jis; *p < sjis; p+=2);
372+
373+ if (*p == sjis) {
374+ *ph = (*(p+1)) >> 8;
375+ *pl = (*(p+1)) & 0xff;
376+ return 1;
377+ }
378+ return 0;
379+}
380+
381+LOCAL_INLINE
382+void sjis_to_jis(unsigned char *ph, unsigned char *pl)
383+{
384+ if (*ph <= 0x9f) {
385+ if (*pl < 0x9f)
386+ *ph = (*ph << 1) - 0xe1;
387+ else
388+ *ph = (*ph << 1) - 0xe0;
389+ } else {
390+ if (*pl < 0x9f)
391+ *ph = (*ph << 1) - 0x161;
392+ else
393+ *ph = (*ph << 1) - 0x160;
394+ }
395+ if (*pl < 0x7f)
396+ *pl -= 0x1f;
397+ else if (*pl < 0x9f)
398+ *pl -= 0x20;
399+ else
400+ *pl -= 0x7e;
401+}
402+
403+LOCAL_INLINE
404+void sjis_to_jis2(unsigned char *ph, unsigned char *pl)
405+{
406+ if (mskanji_to_jis(ph, pl))
407+ return;
408+ else
409+ sjis_to_jis(ph, pl);
410+}
411+
412+
413+LOCAL_INLINE
414+void jis_to_sjis(unsigned char *ph, unsigned char *pl)
415+{
416+ if (*ph & 1) {
417+ if (*pl < 0x60)
418+ *pl += 0x1f;
419+ else
420+ *pl += 0x20;
421+ } else
422+ *pl += 0x7e;
423+
424+ if (*ph < 0x5f)
425+ *ph = (*ph + 0xe1) >> 1;
426+ else
427+ *ph = (*ph + 0x161) >> 1;
428+}
429+
430+
431+LOCAL_INLINE
432+int jis_to_mskanji(unsigned char *ph, unsigned char *pl) {
433+ int *p;
434+ int jis = (*ph << 8 | *pl) & 0xffff;
435+
436+ for (p=tbl_jis2sjis; *p < jis; p+=2);
437+
438+ if (*p == jis) {
439+ *ph = (*(p+1)) >> 8;
440+ *pl = (*(p+1)) & 0xff;
441+ return 1;
442+ }
443+ return 0;
444+}
445+
446+
447+
448+LOCAL_INLINE
449+void jis_to_sjis2(unsigned char *ph, unsigned char *pl)
450+{
451+ if (jis_to_mskanji(ph, pl))
452+ return;
453+ else
454+ jis_to_sjis(ph, pl);
455+}
456+
457+
458+
459+
460+
461+int sjistojis(int len, unsigned char *buf, unsigned char **ret, int *retlen, int j0208)
462+{
463+ unsigned char c, d;
464+ int pos, tmplen, retpos=0;
465+ char tmp[10];
466+ char *newbuf;
467+ enum {NORMAL, KANJI, HANKANA, JIS0213} mode = NORMAL;
468+
469+ if (!len) {
470+ *retlen = 0;
471+ return 1;
472+ }
473+
474+ *retlen = len;
475+ *ret = malloc(*retlen);
476+ if (!*ret) {
477+ return 0;
478+ }
479+
480+
481+ for (pos = 0; pos < len; pos++) {
482+ tmplen = 0;
483+ if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) {
484+ c = buf[pos];
485+ d = buf[pos+1];
486+ pos += 1;
487+ sjis_to_jis2(&c, &d);
488+
489+ if (j0208 || !isjis0213(c, d)) {
490+ if (mode != KANJI) {
491+ mode = KANJI;
492+ tmp[tmplen++] = 0x1b;
493+ tmp[tmplen++] = '$';
494+ tmp[tmplen++] = 'B';
495+ }
496+ }
497+ else {
498+ if (mode != JIS0213) {
499+ mode = JIS0213;
500+ tmp[tmplen++] = 0x1b;
501+ tmp[tmplen++] = '$';
502+ tmp[tmplen++] = '(';
503+ tmp[tmplen++] = 'O';
504+ }
505+ }
506+ tmp[tmplen++] = c;
507+ tmp[tmplen++] = d;
508+ } else if (ishankana(buf[pos])) {
509+ if (mode != HANKANA) {
510+ mode = HANKANA;
511+ tmp[tmplen++] = 0x1b;
512+ tmp[tmplen++] = '(';
513+ tmp[tmplen++] = 'I';
514+ }
515+ tmp[tmplen++] = buf[pos] & 0x7f;
516+ } else {
517+ if (mode != NORMAL) {
518+ mode = NORMAL;
519+ tmp[tmplen++] = 0x1b;
520+ tmp[tmplen++] = '(';
521+ tmp[tmplen++] = 'B';
522+ }
523+ tmp[tmplen++] = buf[pos];
524+ }
525+
526+ if (tmplen) {
527+ if (retpos + tmplen > *retlen) {
528+ *retlen = *retlen + len / 2 + 16;
529+ newbuf = realloc(*ret, *retlen);
530+ if (!newbuf) {
531+ free(*ret);
532+ return 0;
533+ }
534+ *ret = newbuf;
535+ }
536+ memcpy(*ret+retpos, tmp, tmplen);
537+ retpos += tmplen;
538+ }
539+ }
540+
541+ if (!retpos) {
542+ *retlen = 0;
543+ free(*ret);
544+ return 1;
545+ }
546+
547+ if (mode != NORMAL) {
548+ if (retpos + 3 > *retlen) {
549+ *retlen = retpos + 3;
550+ newbuf = realloc(*ret, *retlen);
551+ if (!newbuf) {
552+ free(*ret);
553+ return 0;
554+ }
555+ *ret = newbuf;
556+ }
557+ *(*ret + retpos) = 0x1b;
558+ *(*ret + retpos+1) = '(';
559+ *(*ret + retpos+2) = 'B';
560+ retpos += 3;
561+ }
562+
563+ newbuf = realloc(*ret, retpos);
564+ if (!newbuf) {
565+ free(*ret);
566+ return 0;
567+ }
568+ *ret = newbuf;
569+ *retlen = retpos;
570+ return 1;
571+}
572+
573+int euctojis(int len, unsigned char *buf, unsigned char **ret, int *retlen, int j0208)
574+{
575+ unsigned char c, d;
576+ int pos, tmplen, retpos=0;
577+ char tmp[10];
578+ char *newbuf;
579+ enum {NORMAL, KANJI, HANKANA, JIS0213} mode = NORMAL;
580+
581+ if (!len) {
582+ *retlen = 0;
583+ return 1;
584+ }
585+
586+ *retlen = len;
587+ *ret = malloc(*retlen);
588+ if (!*ret) {
589+ return 0;
590+ }
591+
592+ for (pos = 0; pos < len; pos++) {
593+ tmplen = 0;
594+ if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) {
595+ c = buf[pos] & 0x7f;
596+ d = buf[pos+1] & 0x7f;
597+ pos += 1;
598+
599+ if (j0208 || !isjis0213(c, d)) {
600+ if (mode != KANJI) {
601+ mode = KANJI;
602+ tmp[tmplen++] = 0x1b;
603+ tmp[tmplen++] = '$';
604+ tmp[tmplen++] = 'B';
605+ }
606+ }
607+ else {
608+ if (mode != JIS0213) {
609+ mode = JIS0213;
610+ tmp[tmplen++] = 0x1b;
611+ tmp[tmplen++] = '$';
612+ tmp[tmplen++] = '(';
613+ tmp[tmplen++] = 'O';
614+ }
615+ }
616+ tmp[tmplen++] = c;
617+ tmp[tmplen++] = d;
618+ } else if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) {
619+
620+
621+ if (mode != HANKANA) {
622+ mode = HANKANA;
623+ tmp[tmplen++] = 0x1b;
624+ tmp[tmplen++] = '(';
625+ tmp[tmplen++] = 'I';
626+ }
627+ tmp[tmplen++] = buf[pos+1] & 0x7f;
628+ pos += 1;
629+
630+ } else {
631+ if (mode != NORMAL) {
632+ mode = NORMAL;
633+ tmp[tmplen++] = 0x1b;
634+ tmp[tmplen++] = '(';
635+ tmp[tmplen++] = 'B';
636+ }
637+ tmp[tmplen++] = buf[pos];
638+ }
639+
640+ if (tmplen) {
641+ if (retpos + tmplen > *retlen) {
642+ *retlen = *retlen + len / 2 + 16;
643+ newbuf = realloc(*ret, *retlen);
644+ if (!newbuf) {
645+ free(*ret);
646+ return 0;
647+ }
648+ *ret = newbuf;
649+ }
650+ memcpy(*ret+retpos, tmp, tmplen);
651+ retpos += tmplen;
652+ }
653+ }
654+
655+ if (!retpos) {
656+ *retlen = 0;
657+ free(*ret);
658+ return 1;
659+ }
660+
661+ if (mode != NORMAL) {
662+ if (retpos + 3 > *retlen) {
663+ *retlen = retpos + 3;
664+ newbuf = realloc(*ret, *retlen);
665+ if (!newbuf) {
666+ free(*ret);
667+ return 0;
668+ }
669+ *ret = newbuf;
670+ }
671+ *(*ret + retpos) = 0x1b;
672+ *(*ret + retpos+1) = '(';
673+ *(*ret + retpos+2) = 'B';
674+ retpos += 3;
675+ }
676+
677+ newbuf = realloc(*ret, retpos);
678+ if (!newbuf) {
679+ free(*ret);
680+ return 0;
681+ }
682+ *ret = newbuf;
683+ *retlen = retpos;
684+ return 1;
685+}
686+
687+
688+int sjistoeuc(int len, unsigned char *buf, unsigned char **ret, int *retlen)
689+{
690+ unsigned char c, d;
691+ int pos, tmplen, retpos=0;
692+ char tmp[10];
693+ char *newbuf;
694+
695+ if (!len) {
696+ *retlen = 0;
697+ return 1;
698+ }
699+
700+ *retlen = len;
701+ *ret = malloc(*retlen);
702+ if (!*ret) {
703+ return 0;
704+ }
705+
706+ for (pos = 0; pos < len; pos++) {
707+ tmplen=0;
708+
709+ if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) {
710+ c = buf[pos];
711+ d = buf[pos+1];
712+ pos += 1;
713+
714+ sjis_to_jis2(&c, &d);
715+ tmp[tmplen++] = c | 0x80;
716+ tmp[tmplen++] = d | 0x80;
717+ } else if (ishankana(buf[pos])) {
718+ tmp[tmplen++] = '\x8e';
719+ tmp[tmplen++] = buf[pos];
720+ } else {
721+ tmp[tmplen++] = buf[pos];
722+ }
723+
724+ if (tmplen) {
725+ if (retpos + tmplen > *retlen) {
726+ *retlen = *retlen + len / 2 + 16;
727+ newbuf = realloc(*ret, *retlen);
728+ if (!newbuf) {
729+ free(*ret);
730+ return 0;
731+ }
732+ *ret = newbuf;
733+ }
734+ memcpy(*ret+retpos, tmp, tmplen);
735+ retpos += tmplen;
736+ }
737+ }
738+
739+ if (!retpos) {
740+ *retlen = 0;
741+ free(*ret);
742+ return 1;
743+ }
744+
745+ newbuf = realloc(*ret, retpos);
746+ if (!newbuf) {
747+ free(*ret);
748+ return 0;
749+ }
750+ *ret = newbuf;
751+ *retlen = retpos;
752+
753+ return 1;
754+}
755+
756+int jistoeuc(int len, unsigned char *buf, unsigned char **ret, int *retlen)
757+{
758+ int pos, tmplen, retpos=0;
759+ char tmp[10];
760+ char *newbuf;
761+
762+ enum {NORMAL, KANJI, HANKANA} mode = NORMAL;
763+
764+ if (!len) {
765+ *retlen = 0;
766+ return 1;
767+ }
768+
769+ *retlen = len;
770+ *ret = malloc(*retlen);
771+ if (!*ret) {
772+ return 0;
773+ }
774+
775+ for (pos = 0; pos < len; pos++) {
776+ tmplen=0;
777+
778+ if ((pos + 2 < len) &&
779+ (!memcmp(buf+pos, "\x1b$@", 3) ||
780+ !memcmp(buf+pos, "\x1b$B", 3))) {
781+
782+ mode = KANJI;
783+ pos += 2;
784+ }
785+ else if ((pos + 3 < len) && !memcmp(buf+pos, "\x1b$(O", 4)) {
786+ mode = KANJI;
787+ pos += 3;
788+ }
789+ else if ((pos + 2 < len) &&
790+ (!memcmp(buf+pos, "\x1b(B", 3) ||
791+ !memcmp(buf+pos, "\x1b(J", 3))) {
792+
793+ mode = NORMAL;
794+ pos += 2;
795+ }
796+ else if ((pos + 2 < len) && !memcmp(buf+pos, "\x1b(I", 3)) {
797+ mode = HANKANA;
798+ pos += 2;
799+ }
800+ else if (buf[pos] == '\x0e') {
801+ mode = HANKANA;
802+ }
803+ else if (buf[pos] == '\x0f') {
804+ mode = NORMAL;
805+ }
806+ else if (mode == KANJI && isjis(buf[pos]) && (pos+1 < len) && isjis(buf[pos+1])) {
807+ tmp[tmplen++] = buf[pos] | 0x80;
808+ tmp[tmplen++] = buf[pos+1] | 0x80;
809+ pos++;
810+ } else if (mode == HANKANA && buf[pos] >= 0x20 && buf[pos] <= 0x5f) {
811+ tmp[tmplen++] = '\x8e';
812+ tmp[tmplen++] = buf[pos] | 0x80;
813+ } else {
814+ tmp[tmplen++] = buf[pos];
815+ }
816+
817+ if (tmplen) {
818+ if (retpos + tmplen > *retlen) {
819+ *retlen = *retlen + len / 2 + 16;
820+ newbuf = realloc(*ret, *retlen);
821+ if (!newbuf) {
822+ free(*ret);
823+ return 0;
824+ }
825+ *ret = newbuf;
826+ }
827+ memcpy(*ret+retpos, tmp, tmplen);
828+ retpos += tmplen;
829+ }
830+ }
831+
832+ if (!retpos) {
833+ *retlen = 0;
834+ free(*ret);
835+ return 1;
836+ }
837+
838+ newbuf = realloc(*ret, retpos);
839+ if (!newbuf) {
840+ free(*ret);
841+ return 0;
842+ }
843+ *ret = newbuf;
844+ *retlen = retpos;
845+ return 1;
846+}
847+
848+
849+int jistosjis(int len, unsigned char *buf, unsigned char **ret, int *retlen)
850+{
851+ unsigned char c, d;
852+ int pos, tmplen, retpos=0;
853+ char tmp[10];
854+ char *newbuf;
855+
856+ enum {NORMAL, KANJI, HANKANA} mode = NORMAL;
857+
858+ if (!len) {
859+ *retlen = 0;
860+ return 1;
861+ }
862+
863+ *retlen = len;
864+ *ret = malloc(*retlen);
865+ if (!*ret) {
866+ return 0;
867+ }
868+
869+ for (pos = 0; pos < len; pos++) {
870+ tmplen=0;
871+
872+ if ((pos + 2 < len) &&
873+ (!memcmp(buf+pos, "\x1b$@", 3) ||
874+ !memcmp(buf+pos, "\x1b$B", 3))) {
875+
876+ mode = KANJI;
877+ pos += 2;
878+ }
879+ else if ((pos + 3 < len) && !memcmp(buf+pos, "\x1b$(O", 4)) {
880+ mode = KANJI;
881+ pos += 3;
882+ }
883+ else if ((pos + 2 < len) &&
884+ (!memcmp(buf+pos, "\x1b(B", 3) ||
885+ !memcmp(buf+pos, "\x1b(J", 3))) {
886+
887+ mode = NORMAL;
888+ pos += 2;
889+ }
890+ else if ((pos + 2 < len) && !memcmp(buf+pos, "\x1b(I", 3)) {
891+ mode = HANKANA;
892+ pos += 2;
893+ }
894+ else if (buf[pos] == '\x0e') {
895+ mode = HANKANA;
896+ }
897+ else if (buf[pos] == '\x0f') {
898+ mode = NORMAL;
899+ }
900+ else if (mode == KANJI && isjis(buf[pos]) && (pos+1 < len) && isjis(buf[pos+1])) {
901+ c = buf[pos];
902+ d = buf[pos+1];
903+ pos++;
904+
905+ jis_to_sjis2(&c, &d);
906+ tmp[tmplen++] = c;
907+ tmp[tmplen++] = d;
908+ } else if (mode == HANKANA && buf[pos] >= 0x20 && buf[pos] <= 0x5f) {
909+ tmp[tmplen++] = buf[pos] | 0x80;
910+ } else {
911+ tmp[tmplen++] = buf[pos];
912+ }
913+
914+ if (tmplen) {
915+ if (retpos + tmplen > *retlen) {
916+ *retlen = *retlen + len / 2 + 16;
917+ newbuf = realloc(*ret, *retlen);
918+ if (!newbuf) {
919+ free(*ret);
920+ return 0;
921+ }
922+ *ret = newbuf;
923+ }
924+ memcpy(*ret+retpos, tmp, tmplen);
925+ retpos += tmplen;
926+ }
927+ }
928+
929+ if (!retpos) {
930+ *retlen = 0;
931+ free(*ret);
932+ return 1;
933+ }
934+
935+ newbuf = realloc(*ret, retpos);
936+ if (!newbuf) {
937+ free(*ret);
938+ return 0;
939+ }
940+ *ret = newbuf;
941+ *retlen = retpos;
942+ return 1;
943+}
944+
945+int euctosjis(int len, unsigned char *buf, unsigned char **ret, int *retlen)
946+{
947+ unsigned char c, d;
948+ int pos, tmplen, retpos=0;
949+ char tmp[10];
950+ char *newbuf;
951+
952+ if (!len) {
953+ *retlen = 0;
954+ return 1;
955+ }
956+
957+ *retlen = len;
958+ *ret = malloc(*retlen);
959+ if (!*ret) {
960+ return 0;
961+ }
962+
963+ for (pos = 0; pos < len; pos++) {
964+ tmplen=0;
965+
966+ if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) {
967+ c = buf[pos] & 0x7f;
968+ d = buf[pos+1] & 0x7f;
969+ pos += 1;
970+
971+ jis_to_sjis2(&c, &d);
972+ tmp[tmplen++] = c;
973+ tmp[tmplen++] = d;
974+ } else if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) {
975+ tmp[tmplen++] = buf[pos+1];
976+ pos++;
977+ } else {
978+ tmp[tmplen++] = buf[pos];
979+ }
980+
981+ if (tmplen) {
982+ if (retpos + tmplen > *retlen) {
983+ *retlen = *retlen + len / 2 + 16;
984+ newbuf = realloc(*ret, *retlen);
985+ if (!newbuf) {
986+ free(*ret);
987+ return 0;
988+ }
989+ *ret = newbuf;
990+ }
991+ memcpy(*ret+retpos, tmp, tmplen);
992+ retpos += tmplen;
993+ }
994+ }
995+
996+ if (!retpos) {
997+ *retlen = 0;
998+ free(*ret);
999+ return 1;
1000+ }
1001+
1002+ newbuf = realloc(*ret, retpos);
1003+ if (!newbuf) {
1004+ free(*ret);
1005+ return 0;
1006+ }
1007+ *ret = newbuf;
1008+ *retlen = retpos;
1009+ return 1;
1010+}
1011+
1012+static const unsigned char *h_kana[] = {
1013+"\xdd", "\xdc", "\xdb", "\xda", "\xd9", "\xd8", "\xd7", "\xd6", "\xd5", "\xd4",
1014+"\xd3", "\xd2", "\xd1", "\xd0", "\xcf", "\xce\xdf", "\xce\xde", "\xce", "\xcd\xdf",
1015+"\xcd\xde", "\xcd", "\xcc\xdf", "\xcc\xde", "\xcc", "\xcb\xdf", "\xcb\xde",
1016+"\xcb", "\xca\xdf", "\xca\xde", "\xca", "\xc9", "\xc8", "\xc7", "\xc6", "\xc5",
1017+"\xc4\xde", "\xc4", "\xc3\xde", "\xc3", "\xc2\xde", "\xc2", "\xc1\xde", "\xc1",
1018+"\xc0\xde", "\xc0", "\xbf\xde", "\xbf", "\xbe\xde", "\xbe", "\xbd\xde", "\xbd",
1019+"\xbc\xde", "\xbc", "\xbb\xde", "\xbb", "\xba\xde", "\xba", "\xb9\xde", "\xb9",
1020+"\xb8\xde", "\xb8", "\xb7\xde", "\xb7", "\xb6\xde", "\xb6", "\xb5", "\xb4", "\xb3\xde",
1021+"\xb3", "\xb2", "\xb1", "\xb0", "\xaf", "\xae", "\xad", "\xac", "\xab",
1022+"\xaa", "\xa9", "\xa8", "\xa7", "\xa6", "\xa5", "\xa4", "\xa3", "\xa2", "\xa1", NULL};
1023+
1024+static const unsigned char *euc_h_kana[] = {
1025+"\x8e\xdd", "\x8e\xdc", "\x8e\xdb", "\x8e\xda", "\x8e\xd9", "\x8e\xd8", "\x8e\xd7", "\x8e\xd6", "\x8e\xd5", "\x8e\xd4",
1026+"\x8e\xd3", "\x8e\xd2", "\x8e\xd1", "\x8e\xd0", "\x8e\xcf", "\x8e\xce\x8e\xdf", "\x8e\xce\x8e\xde", "\x8e\xce", "\x8e\xcd\x8e\xdf",
1027+"\x8e\xcd\x8e\xde", "\x8e\xcd", "\x8e\xcc\x8e\xdf", "\x8e\xcc\x8e\xde", "\x8e\xcc", "\x8e\xcb\x8e\xdf", "\x8e\xcb\x8e\xde",
1028+"\x8e\xcb", "\x8e\xca\x8e\xdf", "\x8e\xca\x8e\xde", "\x8e\xca", "\x8e\xc9", "\x8e\xc8", "\x8e\xc7", "\x8e\xc6", "\x8e\xc5",
1029+"\x8e\xc4\x8e\xde", "\x8e\xc4", "\x8e\xc3\x8e\xde", "\x8e\xc3", "\x8e\xc2\x8e\xde", "\x8e\xc2", "\x8e\xc1\x8e\xde", "\x8e\xc1",
1030+"\x8e\xc0\x8e\xde", "\x8e\xc0", "\x8e\xbf\x8e\xde", "\x8e\xbf", "\x8e\xbe\x8e\xde", "\x8e\xbe", "\x8e\xbd\x8e\xde", "\x8e\xbd",
1031+"\x8e\xbc\x8e\xde", "\x8e\xbc", "\x8e\xbb\x8e\xde", "\x8e\xbb", "\x8e\xba\x8e\xde", "\x8e\xba", "\x8e\xb9\x8e\xde", "\x8e\xb9",
1032+"\x8e\xb8\x8e\xde", "\x8e\xb8", "\x8e\xb7\x8e\xde", "\x8e\xb7", "\x8e\xb6\x8e\xde", "\x8e\xb6", "\x8e\xb5", "\x8e\xb4", "\x8e\xb3\x8e\xde",
1033+"\x8e\xb3", "\x8e\xb2", "\x8e\xb1", "\x8e\xb0", "\x8e\xaf", "\x8e\xae", "\x8e\xad", "\x8e\xac", "\x8e\xab",
1034+"\x8e\xaa", "\x8e\xa9", "\x8e\xa8", "\x8e\xa7", "\x8e\xa6", "\x8e\xa5", "\x8e\xa4", "\x8e\xa3", "\x8e\xa2", "\x8e\xa1", NULL};
1035+
1036+static const unsigned char *sjis_f_kana[] = {
1037+ "\x83\x93", "\x83\x8f", "\x83\x8d", "\x83\x8c", "\x83\x8b", "\x83\x8a",
1038+ "\x83\x89", "\x83\x88", "\x83\x86", "\x83\x84", "\x83\x82", "\x83\x81",
1039+ "\x83\x80", "\x83\x7e", "\x83\x7d", "\x83\x7c", "\x83\x7b", "\x83\x7a",
1040+ "\x83\x79", "\x83\x78", "\x83\x77", "\x83\x76", "\x83\x75", "\x83\x74",
1041+ "\x83\x73", "\x83\x72", "\x83\x71", "\x83\x70", "\x83\x6f", "\x83\x6e",
1042+ "\x83\x6d", "\x83\x6c", "\x83\x6b", "\x83\x6a", "\x83\x69", "\x83\x68",
1043+ "\x83\x67", "\x83\x66", "\x83\x65", "\x83\x64", "\x83\x63", "\x83\x61",
1044+ "\x83\x60", "\x83\x5f", "\x83\x5e", "\x83\x5d", "\x83\x5c", "\x83\x5b",
1045+ "\x83\x5a", "\x83\x59", "\x83\x58", "\x83\x57", "\x83\x56", "\x83\x55",
1046+ "\x83\x54", "\x83\x53", "\x83\x52", "\x83\x51", "\x83\x50", "\x83\x4f",
1047+ "\x83\x4e", "\x83\x4d", "\x83\x4c", "\x83\x4b", "\x83\x4a", "\x83\x49",
1048+ "\x83\x47", "\x83\x94", "\x83\x45", "\x83\x43", "\x83\x41", "\x81\x5b",
1049+ "\x83\x62", "\x83\x87", "\x83\x85", "\x83\x83", "\x83\x48", "\x83\x46",
1050+ "\x83\x44", "\x83\x42", "\x83\x40", "\x83\x92", "\x81\x45", "\x81\x41",
1051+ "\x81\x76", "\x81\x75", "\x81\x42", NULL};
1052+
1053+
1054+static const unsigned char *euc_f_kana[] = {
1055+ "\xa5\xf3", "\xa5\xef", "\xa5\xed", "\xa5\xec", "\xa5\xeb", "\xa5\xea",
1056+ "\xa5\xe9", "\xa5\xe8", "\xa5\xe6", "\xa5\xe4", "\xa5\xe2", "\xa5\xe1",
1057+ "\xa5\xe0", "\xa5\xdf", "\xa5\xde", "\xa5\xdd", "\xa5\xdc", "\xa5\xdb",
1058+ "\xa5\xda", "\xa5\xd9", "\xa5\xd8", "\xa5\xd7", "\xa5\xd6", "\xa5\xd5",
1059+ "\xa5\xd4", "\xa5\xd3", "\xa5\xd2", "\xa5\xd1", "\xa5\xd0", "\xa5\xcf",
1060+ "\xa5\xce", "\xa5\xcd", "\xa5\xcc", "\xa5\xcb", "\xa5\xca", "\xa5\xc9",
1061+ "\xa5\xc8", "\xa5\xc7", "\xa5\xc6", "\xa5\xc5", "\xa5\xc4", "\xa5\xc2",
1062+ "\xa5\xc1", "\xa5\xc0", "\xa5\xbf", "\xa5\xbe", "\xa5\xbd", "\xa5\xbc",
1063+ "\xa5\xbb", "\xa5\xba", "\xa5\xb9", "\xa5\xb8", "\xa5\xb7", "\xa5\xb6",
1064+ "\xa5\xb5", "\xa5\xb4", "\xa5\xb3", "\xa5\xb2", "\xa5\xb1", "\xa5\xb0",
1065+ "\xa5\xaf", "\xa5\xae", "\xa5\xad", "\xa5\xac", "\xa5\xab", "\xa5\xaa",
1066+ "\xa5\xa8", "\xa5\xf4", "\xa5\xa6", "\xa5\xa4", "\xa5\xa2", "\xa1\xbc",
1067+ "\xa5\xc3", "\xa5\xe7", "\xa5\xe5", "\xa5\xe3", "\xa5\xa9", "\xa5\xa7",
1068+ "\xa5\xa5", "\xa5\xa3", "\xa5\xa1", "\xa5\xf2", "\xa1\xa6", "\xa1\xa2",
1069+ "\xa1\xd7", "\xa1\xd6", "\xa1\xa3", NULL};
1070+
1071+
1072+int sjistohankana(int len, unsigned char *buf, unsigned char **ret, int *retlen) {
1073+ int pos, tmplen, retpos=0;
1074+ char tmp[10];
1075+ char *newbuf;
1076+ int i;
1077+
1078+ if (!len) {
1079+ *retlen = 0;
1080+ return 1;
1081+ }
1082+
1083+ *retlen = len;
1084+ *ret = malloc(*retlen);
1085+ if (!*ret) {
1086+ return 0;
1087+ }
1088+
1089+ for (pos = 0; pos < len; pos++) {
1090+ tmplen=0;
1091+
1092+ if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) {
1093+ for (i = 0; sjis_f_kana[i]; i++) {
1094+ if (buf[pos] == sjis_f_kana[i][0] && buf[pos+1] == sjis_f_kana[i][1]) {
1095+ tmp[tmplen++] = h_kana[i][0];
1096+ if (h_kana[i][1]) {
1097+ tmp[tmplen++] = h_kana[i][1];
1098+ }
1099+ break;
1100+ }
1101+ }
1102+ if (!sjis_f_kana[i]) {
1103+ tmp[tmplen++] = buf[pos];
1104+ tmp[tmplen++] = buf[pos+1];
1105+ }
1106+
1107+ pos++;
1108+ } else {
1109+ tmp[tmplen++] = buf[pos];
1110+ }
1111+
1112+ if (tmplen) {
1113+ if (retpos + tmplen > *retlen) {
1114+ *retlen = *retlen + len / 2 + 16;
1115+ newbuf = realloc(*ret, *retlen);
1116+ if (!newbuf) {
1117+ free(*ret);
1118+ return 0;
1119+ }
1120+ *ret = newbuf;
1121+ }
1122+ memcpy(*ret+retpos, tmp, tmplen);
1123+ retpos += tmplen;
1124+ }
1125+ }
1126+
1127+ if (!retpos) {
1128+ *retlen = 0;
1129+ free(*ret);
1130+ return 1;
1131+ }
1132+
1133+ newbuf = realloc(*ret, retpos);
1134+ if (!newbuf) {
1135+ free(*ret);
1136+ return 0;
1137+ }
1138+ *ret = newbuf;
1139+ *retlen = retpos;
1140+
1141+ return 1;
1142+}
1143+
1144+
1145+int sjistofullkana(int len, unsigned char *buf, unsigned char **ret, int *retlen) {
1146+ int pos, tmplen, retpos=0;
1147+ char tmp[10];
1148+ char *newbuf;
1149+ int i, j;
1150+
1151+ if (!len) {
1152+ *retlen = 0;
1153+ return 1;
1154+ }
1155+
1156+ *retlen = len;
1157+ *ret = malloc(*retlen);
1158+ if (!*ret) {
1159+ return 0;
1160+ }
1161+
1162+ for (pos = 0; pos < len; pos++) {
1163+ tmplen=0;
1164+
1165+ if (ishankana(buf[pos])) {
1166+ for (i = 0; h_kana[i]; i++) {
1167+ for (j = 0; h_kana[i][j] && buf[pos+j]; j++) {
1168+ if (h_kana[i][j] != buf[pos+j]) {
1169+ break;
1170+ }
1171+ }
1172+ if (!h_kana[i][j]) {
1173+ const char *p;
1174+ for (p = sjis_f_kana[i]; *p; p++) {
1175+ tmp[tmplen++] = *p;
1176+ }
1177+ pos += j-1;
1178+ break;
1179+ }
1180+ }
1181+
1182+ if (!h_kana[i]) {
1183+ tmp[tmplen++] = buf[pos];
1184+ }
1185+ }
1186+ else if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) {
1187+ tmp[tmplen++] = buf[pos];
1188+ tmp[tmplen++] = buf[pos+1];
1189+ pos += 1;
1190+ } else {
1191+ tmp[tmplen++] = buf[pos];
1192+ }
1193+
1194+ if (tmplen) {
1195+ if (retpos + tmplen > *retlen) {
1196+ *retlen = *retlen + len / 2 + 16;
1197+ newbuf = realloc(*ret, *retlen);
1198+ if (!newbuf) {
1199+ free(*ret);
1200+ return 0;
1201+ }
1202+ *ret = newbuf;
1203+ }
1204+ memcpy(*ret+retpos, tmp, tmplen);
1205+ retpos += tmplen;
1206+ }
1207+ }
1208+
1209+ if (!retpos) {
1210+ *retlen = 0;
1211+ free(*ret);
1212+ return 1;
1213+ }
1214+
1215+ newbuf = realloc(*ret, retpos);
1216+ if (!newbuf) {
1217+ free(*ret);
1218+ return 0;
1219+ }
1220+ *ret = newbuf;
1221+ *retlen = retpos;
1222+
1223+ return 1;
1224+}
1225+
1226+int euctohankana(int len, unsigned char *buf, unsigned char **ret, int *retlen) {
1227+ int pos, tmplen, retpos=0;
1228+ char tmp[10];
1229+ char *newbuf;
1230+ int i;
1231+
1232+ if (!len) {
1233+ *retlen = 0;
1234+ return 1;
1235+ }
1236+
1237+ *retlen = len;
1238+ *ret = malloc(*retlen);
1239+ if (!*ret) {
1240+ return 0;
1241+ }
1242+
1243+ for (pos = 0; pos < len; pos++) {
1244+ tmplen=0;
1245+
1246+ if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) {
1247+ for (i = 0; euc_f_kana[i]; i++) {
1248+ if (buf[pos] == euc_f_kana[i][0] && buf[pos+1] == euc_f_kana[i][1]) {
1249+ tmp[tmplen++] = '\x8e';
1250+ tmp[tmplen++] = h_kana[i][0];
1251+ if (h_kana[i][1]) {
1252+ tmp[tmplen++] = '\x8e';
1253+ tmp[tmplen++] = h_kana[i][1];
1254+ }
1255+ break;
1256+ }
1257+ }
1258+ if (!euc_f_kana[i]) {
1259+ tmp[tmplen++] = buf[pos];
1260+ tmp[tmplen++] = buf[pos+1];
1261+ }
1262+ pos++;
1263+ }
1264+ else if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) {
1265+ tmp[tmplen++] = buf[pos];
1266+ tmp[tmplen++] = buf[pos+1];
1267+ pos++;
1268+ } else {
1269+ tmp[tmplen++] = buf[pos];
1270+ }
1271+
1272+ if (tmplen) {
1273+ if (retpos + tmplen > *retlen) {
1274+ *retlen = *retlen + len / 2 + 16;
1275+ newbuf = realloc(*ret, *retlen);
1276+ if (!newbuf) {
1277+ free(*ret);
1278+ return 0;
1279+ }
1280+ *ret = newbuf;
1281+ }
1282+ memcpy(*ret+retpos, tmp, tmplen);
1283+ retpos += tmplen;
1284+ }
1285+ }
1286+
1287+ if (!retpos) {
1288+ *retlen = 0;
1289+ free(*ret);
1290+ return 1;
1291+ }
1292+
1293+ newbuf = realloc(*ret, retpos);
1294+ if (!newbuf) {
1295+ free(*ret);
1296+ return 0;
1297+ }
1298+ *ret = newbuf;
1299+ *retlen = retpos;
1300+
1301+ return 1;
1302+}
1303+
1304+
1305+int euctofullkana(int len, unsigned char *buf, unsigned char **ret, int *retlen) {
1306+ int pos, tmplen, retpos=0;
1307+ char tmp[10];
1308+ char *newbuf;
1309+ int i, j;
1310+
1311+ if (!len) {
1312+ *retlen = 0;
1313+ return 1;
1314+ }
1315+
1316+ *retlen = len;
1317+ *ret = malloc(*retlen);
1318+ if (!*ret) {
1319+ return 0;
1320+ }
1321+
1322+ for (pos = 0; pos < len; pos++) {
1323+ tmplen=0;
1324+
1325+ if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) {
1326+ for (i = 0; euc_h_kana[i]; i++) {
1327+ for (j = 0; euc_h_kana[i][j] && buf[pos+j]; j++) {
1328+ if (euc_h_kana[i][j] != buf[pos+j]) {
1329+ break;
1330+ }
1331+ }
1332+ if (!euc_h_kana[i][j]) {
1333+ const char *p;
1334+ for (p = euc_f_kana[i]; *p; p++) {
1335+ tmp[tmplen++] = *p;
1336+ }
1337+ pos += j-1;
1338+ break;
1339+ }
1340+ }
1341+
1342+ if (!h_kana[i]) {
1343+ tmp[tmplen++] = buf[pos];
1344+ }
1345+ }
1346+ else if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) {
1347+ tmp[tmplen++] = buf[pos];
1348+ tmp[tmplen++] = buf[pos+1];
1349+ pos += 1;
1350+ } else {
1351+ tmp[tmplen++] = buf[pos];
1352+ }
1353+
1354+ if (tmplen) {
1355+ if (retpos + tmplen > *retlen) {
1356+ *retlen = *retlen + len / 2 + 16;
1357+ newbuf = realloc(*ret, *retlen);
1358+ if (!newbuf) {
1359+ free(*ret);
1360+ return 0;
1361+ }
1362+ *ret = newbuf;
1363+ }
1364+ memcpy(*ret+retpos, tmp, tmplen);
1365+ retpos += tmplen;
1366+ }
1367+ }
1368+
1369+ if (!retpos) {
1370+ *retlen = 0;
1371+ free(*ret);
1372+ return 1;
1373+ }
1374+
1375+ newbuf = realloc(*ret, retpos);
1376+ if (!newbuf) {
1377+ free(*ret);
1378+ return 0;
1379+ }
1380+ *ret = newbuf;
1381+ *retlen = retpos;
1382+
1383+ return 1;
1384+}
1385+
1386+
1387+#ifdef PYKF_MAIN
1388+
1389+
1390+void main() {
1391+/*
1392+
1393+ char *ret, *ret2, *ret3, *ret4, *ret5, *ret6, *ret7, *ret8;
1394+ int retlen, retlen2, retlen3, retlen4, retlen5, retlen6, retlen7, retlen8;
1395+ char *s1 = "\x82\xa0\xb1\x88\x9f\x61\x82\xa2\xb2\x8b\x8f\x62\x82\xa4\xb3\x89\x4b\x63\x82\xa6\xb4\x93\xbe\x64\x82\xa8\xb5\x94\xf6\x6f";
1396+ char *s2 = "アイウエオ";
1397+ char *gaiji = "\xf0\x40";
1398+ char *s3 = "あいうえお\x81";
1399+ char *s4 = "アイウエオカ";
1400+ char *s5 = "アイ";
1401+ int guessed;
1402+
1403+ guess(strlen(s1), s1, 1);
1404+ sjistohankana(strlen(s2), s2, &ret7, &retlen7);
1405+
1406+
1407+ sjistojis(strlen(s1), s1, &ret, &retlen);
1408+ jistoeuc(retlen, ret, &ret2, &retlen2);
1409+ guess(retlen2, ret2, 1);
1410+
1411+ euctosjis(retlen2, ret2, &ret3, &retlen3);
1412+ assert(strncmp(s1, ret3, strlen(s1))==0);
1413+
1414+ euctojis(retlen2, ret2, &ret4, &retlen4);
1415+ assert(strncmp(ret, ret4, retlen)==0);
1416+
1417+ sjistoeuc(strlen(s1), s1, &ret5, &retlen5);
1418+ assert(strncmp(ret2, ret5, strlen(ret2))==0);
1419+
1420+ jistosjis(retlen4, ret4, &ret6, &retlen6);
1421+ assert(strncmp(s1, ret6, strlen(s1))==0);
1422+
1423+ sjistoeuc(strlen(gaiji), gaiji, &ret7, &retlen7);
1424+
1425+ sjistojis(strlen(s5), s5, &ret8, &retlen8);
1426+
1427+ guessed = guess(strlen(s3), s3, 1);
1428+ assert(guessed == ERROR);
1429+
1430+ guessed = guess(strlen(s3), s3, 0);
1431+ assert(guessed == SJIS);
1432+
1433+ guessed = guess(strlen(s4), s4, 0);
1434+
1435+
1436+*/
1437+ char *s = "?";
1438+ char *ret;
1439+ int retlen;
1440+
1441+ sjistojis(strlen(s), s, &ret, &retlen, 0);
1442+
1443+}
1444+
1445+#endif
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
--- tags/0.3.5/src/pykf.h (nonexistent)
+++ tags/0.3.5/src/pykf.h (revision 10)
@@ -0,0 +1,25 @@
1+
2+
3+/* kanji conversion tables */
4+extern unsigned int tbl_jis0213[];
5+extern int tbl_sjis2jis[];
6+extern int tbl_jis2sjis[];
7+
8+
9+/* Japanese character encodings */
10+enum {ERROR=-1, UNKNOWN=0, ASCII=1, SJIS=2, EUC=3, JIS=4, UTF8=5, UTF16_LE=7, UTF16_BE=8};
11+
12+int guess(int imax, unsigned char buf[], int strict);
13+int sjistojis(int len, unsigned char *buf, unsigned char **ret, int *retlen, int jis0208);
14+int euctojis(int len, unsigned char *buf, unsigned char **ret, int *retlen, int jis0208);
15+int sjistoeuc(int len, unsigned char *buf, unsigned char **ret, int *retlen);
16+int jistoeuc(int len, unsigned char *buf, unsigned char **ret, int *retlen);
17+int jistosjis(int len, unsigned char *buf, unsigned char **ret, int *retlen);
18+int euctosjis(int len, unsigned char *buf, unsigned char **ret, int *retlen);
19+
20+int sjistohankana(int len, unsigned char *buf, unsigned char **ret, int *retlen);
21+int euctohankana(int len, unsigned char *buf, unsigned char **ret, int *retlen);
22+int sjistofullkana(int len, unsigned char *buf, unsigned char **ret, int *retlen);
23+int euctofullkana(int len, unsigned char *buf, unsigned char **ret, int *retlen);
24+
25+
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
--- tags/0.3.5/src/pykf.c (nonexistent)
+++ tags/0.3.5/src/pykf.c (revision 10)
@@ -0,0 +1,650 @@
1+/*********************************************************************
2+
3+Japanese Kanji filter module
4+ Copyright (c) 2002, Atsuo Ishimoto. All rights reserved.
5+
6+Permission to use, copy, modify, and distribute this software and its
7+documentation for any purpose and without fee is hereby granted, provided that
8+the above copyright notice appear in all copies and that both that copyright
9+notice and this permission notice appear in supporting documentation, and that
10+the name of Atsuo Ishimoto not be used in advertising or publicity pertaining
11+to distribution of the software without specific, written prior permission.
12+
13+ATSUO ISHIMOTO DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
14+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
15+EVENT SHALL ATSUO ISHIMOTO BE LIABLE FOR ANY SPECIAL, INDIRECT OR
16+CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
17+USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
18+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
19+PERFORMANCE OF THIS SOFTWARE.
20+
21+---------------------------------------------------------------------
22+This module is besed on kf.c written by Haruhiko Okumura.
23+ Copyright (c) 1995-2000 Haruhiko Okumura
24+ This file may be freely modified/redistributed.
25+
26+Original kf.c:
27+ http://www.matsusaka-u.ac.jp/~okumura/kf.html
28+*********************************************************************/
29+
30+#include <Python.h>
31+#include "pykf.h"
32+#include "convert.h"
33+
34+static PyObject *EncodingError;
35+#define BADENCODING(d) {PyErr_Format(EncodingError, "%d", d);}
36+#define GUESSFAILED() {PyErr_Format(EncodingError, "Failed to detect encodnig");}
37+
38+
39+
40+#if defined(MS_WIN32) || defined(macintosh)
41+static int default_enc = SJIS;
42+#else
43+static int default_enc = EUC;
44+#endif
45+
46+#define SETDEFAULT_DOC "setdefault(enc) -> None\n\
47+\tSet default input encoding"
48+
49+static PyObject*
50+pykf_setdefault(PyObject* self, PyObject* args)
51+{
52+ int enc;
53+ if (!PyArg_ParseTuple(args, "i:setdefalult", &enc))
54+ return NULL;
55+
56+ switch (enc) {
57+ case UNKNOWN: case ASCII: case SJIS: case EUC: case JIS:
58+ default_enc = enc;
59+ break;
60+ default:
61+ BADENCODING(enc); return NULL;
62+ }
63+ Py_INCREF(Py_None);
64+ return Py_None;
65+}
66+
67+
68+#define GETDEFAULT_DOC "getdefault() -> enc\n\
69+\tGet default input encoding"
70+
71+static PyObject*
72+pykf_getdefault(PyObject* self, PyObject* args)
73+{
74+ if (!PyArg_ParseTuple(args, ":getdefault"))
75+ return NULL;
76+
77+ return PyInt_FromLong(default_enc);
78+}
79+
80+static int check_strict = 0;
81+
82+#define SETSTRICT_DOC "setstrict(True/False) -> None\n\
83+\tSet strict check mode."
84+
85+static PyObject*
86+pykf_setstrict(PyObject* self, PyObject* args)
87+{
88+ if (!PyArg_ParseTuple(args, "i:setstrict", &check_strict))
89+ return NULL;
90+ Py_INCREF(Py_None);
91+ return Py_None;
92+}
93+
94+#define GETSTRICT_DOC "getstrict() -> int\n\
95+\tGet strict check mode."
96+
97+static PyObject*
98+pykf_getstrict(PyObject* self, PyObject* args)
99+{
100+ if (!PyArg_ParseTuple(args, ":getstrict"))
101+ return NULL;
102+
103+ return PyInt_FromLong(check_strict);
104+}
105+
106+
107+#define GUESS_DOC "guess(s) -> encoding\n\
108+\tGuess string encoding"
109+
110+static PyObject*
111+pykf_guess(PyObject* self, PyObject* args)
112+{
113+ char *s;
114+ int ret, len;
115+ int strict = check_strict;
116+
117+ if (!PyArg_ParseTuple(args, "s#|i:guess", &s, &len, &strict))
118+ return NULL;
119+
120+ ret = guess(len, s, strict);
121+ return PyInt_FromLong(ret);
122+}
123+
124+
125+
126+#define TOJIS_DOC "tojis(s[, enc]) -> converted string\n\
127+\tConvet string to JIS encoding"
128+
129+static PyObject*
130+pykf_tojis(PyObject* self, PyObject* args, PyObject* kwds)
131+{
132+ unsigned char *s, *conv;
133+ int enc=UNKNOWN, len, convlen;
134+ PyObject *ret;
135+ int strict = check_strict;
136+ int j0208 = 0;
137+ static char *kwlist[] = {"s", "enc", "strict", "j0208", NULL};
138+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|iii:tojis", kwlist, &s, &len, &enc, &strict, &j0208))
139+ return NULL;
140+
141+// if (!PyArg_ParseTuple(args, "s#|ii:tojis", &s, &len, &enc, &strict))
142+
143+ if (enc == UNKNOWN) {
144+ enc = guess(len, s, strict);
145+ if (strict && enc == ERROR) {
146+ GUESSFAILED(); return NULL;
147+ }
148+ if (enc == UNKNOWN)
149+ enc = default_enc;
150+ if (enc == UNKNOWN) {
151+ GUESSFAILED(); return NULL;
152+ }
153+ }
154+
155+ switch (enc) {
156+ case SJIS:
157+ if (sjistojis(len, s, &conv, &convlen, j0208)) {
158+ if (convlen) {
159+ ret = PyString_FromStringAndSize(conv, convlen);
160+ free(conv);
161+ }
162+ else {
163+ ret = PyString_FromStringAndSize("", 0);
164+ }
165+ return ret;
166+ }
167+ break;
168+ case EUC:
169+ if (euctojis(len, s, &conv, &convlen, j0208)) {
170+ if (convlen) {
171+ ret = PyString_FromStringAndSize(conv, convlen);
172+ free(conv);
173+ }
174+ else {
175+ ret = PyString_FromStringAndSize("", 0);
176+ }
177+ return ret;
178+ }
179+ break;
180+ case JIS:
181+ case ASCII:
182+ return PyString_FromStringAndSize(s, len);
183+ default:
184+ BADENCODING(enc); return NULL;
185+ }
186+ return PyErr_NoMemory();
187+}
188+
189+
190+#define TOEUC_DOC "toeuc(s[, enc]) -> converted string\n\
191+\tConvet string to EUC encoding"
192+
193+static PyObject*
194+pykf_toeuc(PyObject* self, PyObject* args, PyObject* kwds)
195+{
196+ unsigned char *s, *conv;
197+ int enc=UNKNOWN, len, convlen;
198+ PyObject *ret;
199+ int strict = check_strict;
200+
201+ static char *kwlist[] = {"s", "enc", "strict", NULL};
202+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|ii:toeuc", kwlist, &s, &len, &enc, &strict))
203+ return NULL;
204+
205+ if (enc == UNKNOWN) {
206+ enc = guess(len, s, strict);
207+ if (strict && enc == ERROR) {
208+ GUESSFAILED(); return NULL;
209+ }
210+ if (enc == UNKNOWN)
211+ enc = default_enc;
212+ if (enc == UNKNOWN) {
213+ GUESSFAILED(); return NULL;
214+ }
215+ }
216+
217+ switch (enc) {
218+ case SJIS:
219+ if (sjistoeuc(len, s, &conv, &convlen)) {
220+ if (convlen) {
221+ ret = PyString_FromStringAndSize(conv, convlen);
222+ free(conv);
223+ }
224+ else {
225+ ret = PyString_FromStringAndSize("", 0);
226+ }
227+ return ret;
228+ }
229+ break;
230+ case JIS:
231+ if (jistoeuc(len, s, &conv, &convlen)) {
232+ if (convlen) {
233+ ret = PyString_FromStringAndSize(conv, convlen);
234+ free(conv);
235+ }
236+ else {
237+ ret = PyString_FromStringAndSize("", 0);
238+ }
239+ return ret;
240+ }
241+ break;
242+ case EUC:
243+ case ASCII:
244+ return PyString_FromStringAndSize(s, len);
245+ default:
246+ BADENCODING(enc); return NULL;
247+ }
248+
249+ return PyErr_NoMemory();
250+}
251+
252+
253+#define TOSJIS_DOC "tosjis(s[, enc]) -> converted string\n\
254+\tConvet string to SJIS encoding"
255+
256+static PyObject*
257+pykf_tosjis(PyObject* self, PyObject* args, PyObject *kwds)
258+{
259+ unsigned char *s, *conv;
260+ int enc=UNKNOWN, len, convlen;
261+ PyObject *ret;
262+ int strict = check_strict;
263+
264+ static char *kwlist[] = {"s", "enc", "strict", NULL};
265+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|ii:tosjis", kwlist, &s, &len, &enc, &strict))
266+ return NULL;
267+
268+
269+ if (enc == UNKNOWN) {
270+ enc = guess(len, s, strict);
271+ if (strict && enc == ERROR) {
272+ GUESSFAILED(); return NULL;
273+ }
274+ if (enc == UNKNOWN)
275+ enc = default_enc;
276+ if (enc == UNKNOWN) {
277+ GUESSFAILED(); return NULL;
278+ }
279+ }
280+
281+ switch (enc) {
282+ case SJIS:
283+ case ASCII:
284+ return PyString_FromStringAndSize(s, len);
285+ case JIS:
286+ if (jistosjis(len, s, &conv, &convlen)) {
287+ if (convlen) {
288+ ret = PyString_FromStringAndSize(conv, convlen);
289+ free(conv);
290+ }
291+ else {
292+ ret = PyString_FromStringAndSize("", 0);
293+ }
294+ return ret;
295+ }
296+ break;
297+ case EUC:
298+ if (euctosjis(len, s, &conv, &convlen)) {
299+ if (convlen) {
300+ ret = PyString_FromStringAndSize(conv, convlen);
301+ free(conv);
302+ }
303+ else {
304+ ret = PyString_FromStringAndSize("", 0);
305+ }
306+ return ret;
307+ }
308+ break;
309+ default:
310+ BADENCODING(enc); return NULL;
311+ }
312+
313+ return PyErr_NoMemory();
314+}
315+
316+#define TOHALF_DOC "tohalf(s[, enc]) -> converted string\n\
317+\tConvet string to half width character"
318+
319+static PyObject*
320+pykf_tohalfkana(PyObject* self, PyObject* args, PyObject *kwds)
321+{
322+ unsigned char *s, *conv;
323+ int enc=UNKNOWN, len, convlen;
324+ PyObject *ret;
325+ int strict = check_strict;
326+
327+ static char *kwlist[] = {"s", "enc", "strict", NULL};
328+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|ii:tohalf", kwlist, &s, &len, &enc, &strict))
329+ return NULL;
330+
331+ if (enc == UNKNOWN) {
332+ enc = guess(len, s, strict);
333+ if (strict && enc == ERROR) {
334+ GUESSFAILED(); return NULL;
335+ }
336+ if (enc == UNKNOWN)
337+ enc = default_enc;
338+ if (enc == UNKNOWN) {
339+ GUESSFAILED(); return NULL;
340+ }
341+ }
342+
343+ switch (enc) {
344+ case SJIS:
345+ if (sjistohankana(len, s, &conv, &convlen)) {
346+ if (convlen) {
347+ ret = PyString_FromStringAndSize(conv, convlen);
348+ free(conv);
349+ }
350+ else {
351+ ret = PyString_FromStringAndSize("", 0);
352+ }
353+ return ret;
354+ }
355+ break;
356+ case EUC:
357+ if (euctohankana(len, s, &conv, &convlen)) {
358+ if (convlen) {
359+ ret = PyString_FromStringAndSize(conv, convlen);
360+ free(conv);
361+ }
362+ else {
363+ ret = PyString_FromStringAndSize("", 0);
364+ }
365+ return ret;
366+ }
367+ break;
368+ default:
369+ BADENCODING(enc); return NULL;
370+ }
371+
372+ return PyErr_NoMemory();
373+}
374+
375+
376+#define TOFULL_DOC "tofull(s[, enc]) -> converted string\n\
377+\tConvet string to full width character"
378+
379+static PyObject*
380+pykf_tofullkana(PyObject* self, PyObject* args, PyObject *kwds)
381+{
382+ unsigned char *s, *conv;
383+ int enc=UNKNOWN, len, convlen;
384+ int strict = check_strict;
385+ PyObject *ret;
386+
387+ static char *kwlist[] = {"s", "enc", "strict", NULL};
388+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|ii:tofull", kwlist, &s, &len, &enc, &strict))
389+ return NULL;
390+
391+
392+ if (enc == UNKNOWN) {
393+ enc = guess(len, s, strict);
394+ if (strict && enc == ERROR) {
395+ GUESSFAILED(); return NULL;
396+ }
397+ if (enc == UNKNOWN)
398+ enc = default_enc;
399+ if (enc == UNKNOWN) {
400+ GUESSFAILED(); return NULL;
401+ }
402+ }
403+
404+ switch (enc) {
405+ case SJIS:
406+ if (sjistofullkana(len, s, &conv, &convlen)) {
407+ if (convlen) {
408+ ret = PyString_FromStringAndSize(conv, convlen);
409+ free(conv);
410+ }
411+ else {
412+ ret = PyString_FromStringAndSize("", 0);
413+ }
414+ return ret;
415+ }
416+ break;
417+ case EUC:
418+ if (euctofullkana(len, s, &conv, &convlen)) {
419+ if (convlen) {
420+ ret = PyString_FromStringAndSize(conv, convlen);
421+ free(conv);
422+ }
423+ else {
424+ ret = PyString_FromStringAndSize("", 0);
425+ }
426+ return ret;
427+ }
428+ break;
429+ default:
430+ BADENCODING(enc); return NULL;
431+ }
432+
433+ return PyErr_NoMemory();
434+}
435+
436+
437+#define SPLIT_DOC "tosjis(s[, enc]) -> list of chars\n\
438+\tConvet string to list of chars"
439+
440+static PyObject*
441+pykf_split(PyObject* self, PyObject* args, PyObject *kwds)
442+{
443+ unsigned char *s;
444+ int enc=UNKNOWN, len;
445+ int pos;
446+ PyObject *ret, *o;
447+ int strict = check_strict;
448+ enum {NORMAL, KANJI, HANKANA} mode = NORMAL;
449+
450+ static char *kwlist[] = {"s", "enc", "strict", NULL};
451+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|ii:split", kwlist, &s, &len, &enc, &strict))
452+ return NULL;
453+
454+ if (enc == UNKNOWN) {
455+ enc = guess(len, s, strict);
456+ if (strict && enc == ERROR) {
457+ GUESSFAILED(); return NULL;
458+ }
459+ if (enc == UNKNOWN)
460+ enc = default_enc;
461+ if (enc == UNKNOWN) {
462+ GUESSFAILED(); return NULL;
463+ }
464+ }
465+
466+ ret = PyList_New(0);
467+ if (!ret) {
468+ return NULL;
469+ }
470+ switch (enc) {
471+ case SJIS:
472+ for (pos = 0; pos < len; pos++) {
473+ if (issjis1(s[pos]) && (pos + 1 < len) && issjis2(s[pos+1])) {
474+ o = PyString_FromStringAndSize(s+pos, 2);
475+ pos++;
476+ }
477+ else {
478+ o = PyString_FromStringAndSize(s+pos, 1);
479+ }
480+ if (!o) {
481+ Py_DECREF(ret);
482+ return NULL;
483+ }
484+ if (-1 == PyList_Append(ret, o)) {
485+ Py_DECREF(ret);
486+ return NULL;
487+ }
488+ Py_DECREF(o);
489+ }
490+ return ret;
491+ case ASCII:
492+ for (pos = 0; pos < len; pos++) {
493+ o = PyString_FromStringAndSize(s+pos, 1);
494+ if (!o) {
495+ Py_DECREF(ret);
496+ return NULL;
497+ }
498+ if (-1 == PyList_Append(ret, o)) {
499+ Py_DECREF(ret);
500+ return NULL;
501+ }
502+ Py_DECREF(o);
503+ }
504+ return ret;
505+ case JIS:
506+ for (pos = 0; pos < len; pos++) {
507+
508+ if ((pos + 2 < len) &&
509+ (!memcmp(s+pos, "\x1b$@", 3) ||
510+ !memcmp(s+pos, "\x1b$B", 3))) {
511+
512+ mode = KANJI;
513+ o = PyString_FromStringAndSize(s+pos, 3);
514+ pos += 2;
515+ }
516+ else if ((pos + 3 < len) && !memcmp(s+pos, "\x1b$(O", 4)) {
517+ mode = KANJI;
518+ o = PyString_FromStringAndSize(s+pos, 3);
519+ pos += 3;
520+ }
521+ else if ((pos + 2 < len) &&
522+ (!memcmp(s+pos, "\x1b(B", 3) ||
523+ !memcmp(s+pos, "\x1b(J", 3))) {
524+
525+ mode = NORMAL;
526+ o = PyString_FromStringAndSize(s+pos, 3);
527+ pos += 2;
528+ }
529+ else if ((pos + 2 < len) && !memcmp(s+pos, "\x1b(I", 3)) {
530+ mode = HANKANA;
531+ o = PyString_FromStringAndSize(s+pos, 3);
532+ pos += 2;
533+ }
534+ else if (s[pos] == '\x0e') {
535+ mode = HANKANA;
536+ o = PyString_FromStringAndSize(s+pos, 1);
537+ }
538+ else if (s[pos] == '\x0f') {
539+ mode = NORMAL;
540+ o = PyString_FromStringAndSize(s+pos, 1);
541+ }
542+ else if (mode == KANJI && isjis(s[pos]) && (pos+1 < len) && isjis(s[pos+1])) {
543+ o = PyString_FromStringAndSize(s+pos, 2);
544+ pos++;
545+ } else if (mode == HANKANA && s[pos] >= 0x20 && s[pos] <= 0x5f) {
546+ o = PyString_FromStringAndSize(s+pos, 1);
547+ } else {
548+ o = PyString_FromStringAndSize(s+pos, 1);
549+ }
550+ if (!o) {
551+ Py_DECREF(ret);
552+ return NULL;
553+ }
554+ if (-1 == PyList_Append(ret, o)) {
555+ Py_DECREF(ret);
556+ return NULL;
557+ }
558+ Py_DECREF(o);
559+ }
560+ return ret;
561+ case EUC:
562+ for (pos = 0; pos < len; pos++) {
563+ if (iseuc(s[pos]) && (pos + 1 < len) && iseuc(s[pos+1])) {
564+ o = PyString_FromStringAndSize(s+pos, 2);
565+ pos++;
566+ } else if ((s[pos] == 0x8e) && (pos + 1 < len) && ishankana(s[pos+1])) {
567+ o = PyString_FromStringAndSize(s+pos, 2);
568+ pos++;
569+ }
570+ else {
571+ o = PyString_FromStringAndSize(s+pos, 1);
572+ }
573+ if (!o) {
574+ Py_DECREF(ret);
575+ return NULL;
576+ }
577+ if (-1 == PyList_Append(ret, o)) {
578+ Py_DECREF(ret);
579+ return NULL;
580+ }
581+ Py_DECREF(o);
582+ }
583+ return ret;
584+ default:
585+ BADENCODING(enc); return NULL;
586+ }
587+
588+ return PyErr_NoMemory();
589+}
590+
591+
592+
593+
594+static PyMethodDef pykf_methods[] = {
595+ {"setdefault", (PyCFunction)pykf_setdefault, METH_VARARGS, SETDEFAULT_DOC},
596+ {"getdefault", (PyCFunction)pykf_getdefault, METH_VARARGS, GETDEFAULT_DOC},
597+ {"guess", (PyCFunction)pykf_guess, METH_VARARGS, GUESS_DOC},
598+ {"tojis", (PyCFunction)pykf_tojis, METH_VARARGS|METH_KEYWORDS, TOJIS_DOC},
599+ {"tosjis", (PyCFunction)pykf_tosjis, METH_VARARGS|METH_KEYWORDS, TOSJIS_DOC},
600+ {"toeuc", (PyCFunction)pykf_toeuc, METH_VARARGS|METH_KEYWORDS, TOEUC_DOC},
601+ {"tohalf_kana", (PyCFunction)pykf_tohalfkana, METH_VARARGS|METH_KEYWORDS, TOHALF_DOC},
602+ {"tofull_kana", (PyCFunction)pykf_tofullkana, METH_VARARGS|METH_KEYWORDS, TOFULL_DOC},
603+ {"split", (PyCFunction)pykf_split, METH_VARARGS|METH_KEYWORDS, SPLIT_DOC},
604+ {"setstrict", (PyCFunction)pykf_setstrict, METH_VARARGS|METH_KEYWORDS, SETSTRICT_DOC},
605+ {"getstrict", (PyCFunction)pykf_getstrict, METH_VARARGS|METH_KEYWORDS, GETSTRICT_DOC},
606+ {NULL, NULL} /* sentinel */
607+};
608+
609+
610+static void _setint(PyObject* dict, char *name, int value)
611+{
612+ PyObject* v;
613+ v = PyInt_FromLong((long) value);
614+ PyDict_SetItemString(dict, name, v);
615+ Py_XDECREF(v);
616+}
617+
618+
619+DL_EXPORT(void) initpykf(void)
620+{
621+ PyObject *m, *d;
622+ int one = 1;
623+ int is_little_endian = (int)*(char*)&one;
624+
625+ m = Py_InitModule("pykf", pykf_methods);
626+ d = PyModule_GetDict(m);
627+
628+ EncodingError = PyErr_NewException("pykf.IllegalEncoding", NULL, NULL);
629+ PyDict_SetItemString(d, "IllegalEncoding", EncodingError);
630+
631+ _setint(d, "ERROR", ERROR);
632+ _setint(d, "UNKNOWN", UNKNOWN);
633+ _setint(d, "ASCII", ASCII);
634+ _setint(d, "SJIS", SJIS);
635+ _setint(d, "EUC", EUC);
636+ _setint(d, "JIS", JIS);
637+ _setint(d, "UTF8", UTF8);
638+ _setint(d, "UTF16_LE", UTF16_LE);
639+ _setint(d, "UTF16_BE", UTF16_BE);
640+ if (is_little_endian) {
641+ _setint(d, "UTF16", UTF16_LE);
642+ }
643+ else {
644+ _setint(d, "UTF16", UTF16_BE);
645+ }
646+
647+}
648+
649+
650+
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
--- tags/0.3.5/src/jis0213.c (nonexistent)
+++ tags/0.3.5/src/jis0213.c (revision 10)
@@ -0,0 +1,49 @@
1+
2+/* JIS X 0213 char table */
3+
4+unsigned int tbl_jis0213[] = {
5+
6+ 0x222f, 11,
7+ 0x2242, 8,
8+ 0x2251, 11,
9+ 0x226b, 7,
10+ 0x227a, 4,
11+ 0x2321, 15,
12+ 0x233a, 7,
13+ 0x235b, 6,
14+ 0x237b, 4,
15+ 0x2474, 8,
16+ 0x2577, 8,
17+ 0x2639, 8,
18+ 0x2659, 38,
19+ 0x2742, 15,
20+ 0x2772, 13,
21+ 0x2841, 30,
22+ 0x2867, 22,
23+ 0x2921, 94,
24+ 0x2a21, 94,
25+ 0x2b21, 94,
26+ 0x2c21, 83,
27+ 0x2c7d, 2,
28+ 0x2d21, 55,
29+ 0x2d5f, 17,
30+ 0x2d73, 1,
31+ 0x2d78, 2,
32+ 0x2d7d, 2,
33+ 0x2e22, 93,
34+ 0x2f21, 93,
35+ 0x4f55, 41,
36+ 0x7428, 87,
37+ 0x7521, 94,
38+ 0x7621, 94,
39+ 0x7721, 94,
40+ 0x7821, 94,
41+ 0x7921, 94,
42+ 0x7a21, 94,
43+ 0x7b21, 94,
44+ 0x7c21, 94,
45+ 0x7d21, 94,
46+
47+ 0xffff, 0xffff
48+};
49+
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
--- tags/0.3.5/src/mskanji.c (nonexistent)
+++ tags/0.3.5/src/mskanji.c (revision 10)
@@ -0,0 +1,808 @@
1+
2+/* SJIS - JIS conversion table */
3+
4+unsigned int tbl_sjis2jis[] = {
5+
6+ 0x8790, 0x2262,
7+ 0x8791, 0x2261,
8+ 0x8792, 0x2269,
9+ 0x8795, 0x2265,
10+ 0x8796, 0x225d,
11+ 0x8797, 0x225c,
12+ 0x879a, 0x2268,
13+ 0x879b, 0x2241,
14+ 0x879c, 0x2240,
15+ 0xeef9, 0x224c,
16+ 0xfa40, 0x7c71,
17+ 0xfa41, 0x7c72,
18+ 0xfa42, 0x7c73,
19+ 0xfa43, 0x7c74,
20+ 0xfa44, 0x7c75,
21+ 0xfa45, 0x7c76,
22+ 0xfa46, 0x7c77,
23+ 0xfa47, 0x7c78,
24+ 0xfa48, 0x7c79,
25+ 0xfa49, 0x7c7a,
26+ 0xfa4a, 0x2d35,
27+ 0xfa4b, 0x2d36,
28+ 0xfa4c, 0x2d37,
29+ 0xfa4d, 0x2d38,
30+ 0xfa4e, 0x2d39,
31+ 0xfa4f, 0x2d3a,
32+ 0xfa50, 0x2d3b,
33+ 0xfa51, 0x2d3c,
34+ 0xfa52, 0x2d3d,
35+ 0xfa53, 0x2d3e,
36+ 0xfa54, 0x224c,
37+ 0xfa55, 0x7c7c,
38+ 0xfa56, 0x7c7d,
39+ 0xfa57, 0x7c7e,
40+ 0xfa58, 0x2d6a,
41+ 0xfa59, 0x2d62,
42+ 0xfa5a, 0x2d64,
43+ 0xfa5b, 0x2268,
44+ 0xfa5c, 0x7921,
45+ 0xfa5d, 0x7922,
46+ 0xfa5e, 0x7923,
47+ 0xfa5f, 0x7924,
48+ 0xfa60, 0x7925,
49+ 0xfa61, 0x7926,
50+ 0xfa62, 0x7927,
51+ 0xfa63, 0x7928,
52+ 0xfa64, 0x7929,
53+ 0xfa65, 0x792a,
54+ 0xfa66, 0x792b,
55+ 0xfa67, 0x792c,
56+ 0xfa68, 0x792d,
57+ 0xfa69, 0x792e,
58+ 0xfa6a, 0x792f,
59+ 0xfa6b, 0x7930,
60+ 0xfa6c, 0x7931,
61+ 0xfa6d, 0x7932,
62+ 0xfa6e, 0x7933,
63+ 0xfa6f, 0x7934,
64+ 0xfa70, 0x7935,
65+ 0xfa71, 0x7936,
66+ 0xfa72, 0x7937,
67+ 0xfa73, 0x7938,
68+ 0xfa74, 0x7939,
69+ 0xfa75, 0x793a,
70+ 0xfa76, 0x793b,
71+ 0xfa77, 0x793c,
72+ 0xfa78, 0x793d,
73+ 0xfa79, 0x793e,
74+ 0xfa7a, 0x793f,
75+ 0xfa7b, 0x7940,
76+ 0xfa7c, 0x7941,
77+ 0xfa7d, 0x7942,
78+ 0xfa7e, 0x7943,
79+ 0xfa80, 0x7944,
80+ 0xfa81, 0x7945,
81+ 0xfa82, 0x7946,
82+ 0xfa83, 0x7947,
83+ 0xfa84, 0x7948,
84+ 0xfa85, 0x7949,
85+ 0xfa86, 0x794a,
86+ 0xfa87, 0x794b,
87+ 0xfa88, 0x794c,
88+ 0xfa89, 0x794d,
89+ 0xfa8a, 0x794e,
90+ 0xfa8b, 0x794f,
91+ 0xfa8c, 0x7950,
92+ 0xfa8d, 0x7951,
93+ 0xfa8e, 0x7952,
94+ 0xfa8f, 0x7953,
95+ 0xfa90, 0x7954,
96+ 0xfa91, 0x7955,
97+ 0xfa92, 0x7956,
98+ 0xfa93, 0x7957,
99+ 0xfa94, 0x7958,
100+ 0xfa95, 0x7959,
101+ 0xfa96, 0x795a,
102+ 0xfa97, 0x795b,
103+ 0xfa98, 0x795c,
104+ 0xfa99, 0x795d,
105+ 0xfa9a, 0x795e,
106+ 0xfa9b, 0x795f,
107+ 0xfa9c, 0x7960,
108+ 0xfa9d, 0x7961,
109+ 0xfa9e, 0x7962,
110+ 0xfa9f, 0x7963,
111+ 0xfaa0, 0x7964,
112+ 0xfaa1, 0x7965,
113+ 0xfaa2, 0x7966,
114+ 0xfaa3, 0x7967,
115+ 0xfaa4, 0x7968,
116+ 0xfaa5, 0x7969,
117+ 0xfaa6, 0x796a,
118+ 0xfaa7, 0x796b,
119+ 0xfaa8, 0x796c,
120+ 0xfaa9, 0x796d,
121+ 0xfaaa, 0x796e,
122+ 0xfaab, 0x796f,
123+ 0xfaac, 0x7970,
124+ 0xfaad, 0x7971,
125+ 0xfaae, 0x7972,
126+ 0xfaaf, 0x7973,
127+ 0xfab0, 0x7974,
128+ 0xfab1, 0x7975,
129+ 0xfab2, 0x7976,
130+ 0xfab3, 0x7977,
131+ 0xfab4, 0x7978,
132+ 0xfab5, 0x7979,
133+ 0xfab6, 0x797a,
134+ 0xfab7, 0x797b,
135+ 0xfab8, 0x797c,
136+ 0xfab9, 0x797d,
137+ 0xfaba, 0x797e,
138+ 0xfabb, 0x7a21,
139+ 0xfabc, 0x7a22,
140+ 0xfabd, 0x7a23,
141+ 0xfabe, 0x7a24,
142+ 0xfabf, 0x7a25,
143+ 0xfac0, 0x7a26,
144+ 0xfac1, 0x7a27,
145+ 0xfac2, 0x7a28,
146+ 0xfac3, 0x7a29,
147+ 0xfac4, 0x7a2a,
148+ 0xfac5, 0x7a2b,
149+ 0xfac6, 0x7a2c,
150+ 0xfac7, 0x7a2d,
151+ 0xfac8, 0x7a2e,
152+ 0xfac9, 0x7a2f,
153+ 0xfaca, 0x7a30,
154+ 0xfacb, 0x7a31,
155+ 0xfacc, 0x7a32,
156+ 0xfacd, 0x7a33,
157+ 0xface, 0x7a34,
158+ 0xfacf, 0x7a35,
159+ 0xfad0, 0x7a36,
160+ 0xfad1, 0x7a37,
161+ 0xfad2, 0x7a38,
162+ 0xfad3, 0x7a39,
163+ 0xfad4, 0x7a3a,
164+ 0xfad5, 0x7a3b,
165+ 0xfad6, 0x7a3c,
166+ 0xfad7, 0x7a3d,
167+ 0xfad8, 0x7a3e,
168+ 0xfad9, 0x7a3f,
169+ 0xfada, 0x7a40,
170+ 0xfadb, 0x7a41,
171+ 0xfadc, 0x7a42,
172+ 0xfadd, 0x7a43,
173+ 0xfade, 0x7a44,
174+ 0xfadf, 0x7a45,
175+ 0xfae0, 0x7a46,
176+ 0xfae1, 0x7a47,
177+ 0xfae2, 0x7a48,
178+ 0xfae3, 0x7a49,
179+ 0xfae4, 0x7a4a,
180+ 0xfae5, 0x7a4b,
181+ 0xfae6, 0x7a4c,
182+ 0xfae7, 0x7a4d,
183+ 0xfae8, 0x7a4e,
184+ 0xfae9, 0x7a4f,
185+ 0xfaea, 0x7a50,
186+ 0xfaeb, 0x7a51,
187+ 0xfaec, 0x7a52,
188+ 0xfaed, 0x7a53,
189+ 0xfaee, 0x7a54,
190+ 0xfaef, 0x7a55,
191+ 0xfaf0, 0x7a56,
192+ 0xfaf1, 0x7a57,
193+ 0xfaf2, 0x7a58,
194+ 0xfaf3, 0x7a59,
195+ 0xfaf4, 0x7a5a,
196+ 0xfaf5, 0x7a5b,
197+ 0xfaf6, 0x7a5c,
198+ 0xfaf7, 0x7a5d,
199+ 0xfaf8, 0x7a5e,
200+ 0xfaf9, 0x7a5f,
201+ 0xfafa, 0x7a60,
202+ 0xfafb, 0x7a61,
203+ 0xfafc, 0x7a62,
204+ 0xfb40, 0x7a63,
205+ 0xfb41, 0x7a64,
206+ 0xfb42, 0x7a65,
207+ 0xfb43, 0x7a66,
208+ 0xfb44, 0x7a67,
209+ 0xfb45, 0x7a68,
210+ 0xfb46, 0x7a69,
211+ 0xfb47, 0x7a6a,
212+ 0xfb48, 0x7a6b,
213+ 0xfb49, 0x7a6c,
214+ 0xfb4a, 0x7a6d,
215+ 0xfb4b, 0x7a6e,
216+ 0xfb4c, 0x7a6f,
217+ 0xfb4d, 0x7a70,
218+ 0xfb4e, 0x7a71,
219+ 0xfb4f, 0x7a72,
220+ 0xfb50, 0x7a73,
221+ 0xfb51, 0x7a74,
222+ 0xfb52, 0x7a75,
223+ 0xfb53, 0x7a76,
224+ 0xfb54, 0x7a77,
225+ 0xfb55, 0x7a78,
226+ 0xfb56, 0x7a79,
227+ 0xfb57, 0x7a7a,
228+ 0xfb58, 0x7a7b,
229+ 0xfb59, 0x7a7c,
230+ 0xfb5a, 0x7a7d,
231+ 0xfb5b, 0x7a7e,
232+ 0xfb5c, 0x7b21,
233+ 0xfb5d, 0x7b22,
234+ 0xfb5e, 0x7b23,
235+ 0xfb5f, 0x7b24,
236+ 0xfb60, 0x7b25,
237+ 0xfb61, 0x7b26,
238+ 0xfb62, 0x7b27,
239+ 0xfb63, 0x7b28,
240+ 0xfb64, 0x7b29,
241+ 0xfb65, 0x7b2a,
242+ 0xfb66, 0x7b2b,
243+ 0xfb67, 0x7b2c,
244+ 0xfb68, 0x7b2d,
245+ 0xfb69, 0x7b2e,
246+ 0xfb6a, 0x7b2f,
247+ 0xfb6b, 0x7b30,
248+ 0xfb6c, 0x7b31,
249+ 0xfb6d, 0x7b32,
250+ 0xfb6e, 0x7b33,
251+ 0xfb6f, 0x7b34,
252+ 0xfb70, 0x7b35,
253+ 0xfb71, 0x7b36,
254+ 0xfb72, 0x7b37,
255+ 0xfb73, 0x7b38,
256+ 0xfb74, 0x7b39,
257+ 0xfb75, 0x7b3a,
258+ 0xfb76, 0x7b3b,
259+ 0xfb77, 0x7b3c,
260+ 0xfb78, 0x7b3d,
261+ 0xfb79, 0x7b3e,
262+ 0xfb7a, 0x7b3f,
263+ 0xfb7b, 0x7b40,
264+ 0xfb7c, 0x7b41,
265+ 0xfb7d, 0x7b42,
266+ 0xfb7e, 0x7b43,
267+ 0xfb80, 0x7b44,
268+ 0xfb81, 0x7b45,
269+ 0xfb82, 0x7b46,
270+ 0xfb83, 0x7b47,
271+ 0xfb84, 0x7b48,
272+ 0xfb85, 0x7b49,
273+ 0xfb86, 0x7b4a,
274+ 0xfb87, 0x7b4b,
275+ 0xfb88, 0x7b4c,
276+ 0xfb89, 0x7b4d,
277+ 0xfb8a, 0x7b4e,
278+ 0xfb8b, 0x7b4f,
279+ 0xfb8c, 0x7b50,
280+ 0xfb8d, 0x7b51,
281+ 0xfb8e, 0x7b52,
282+ 0xfb8f, 0x7b53,
283+ 0xfb90, 0x7b54,
284+ 0xfb91, 0x7b55,
285+ 0xfb92, 0x7b56,
286+ 0xfb93, 0x7b57,
287+ 0xfb94, 0x7b58,
288+ 0xfb95, 0x7b59,
289+ 0xfb96, 0x7b5a,
290+ 0xfb97, 0x7b5b,
291+ 0xfb98, 0x7b5c,
292+ 0xfb99, 0x7b5d,
293+ 0xfb9a, 0x7b5e,
294+ 0xfb9b, 0x7b5f,
295+ 0xfb9c, 0x7b60,
296+ 0xfb9d, 0x7b61,
297+ 0xfb9e, 0x7b62,
298+ 0xfb9f, 0x7b63,
299+ 0xfba0, 0x7b64,
300+ 0xfba1, 0x7b65,
301+ 0xfba2, 0x7b66,
302+ 0xfba3, 0x7b67,
303+ 0xfba4, 0x7b68,
304+ 0xfba5, 0x7b69,
305+ 0xfba6, 0x7b6a,
306+ 0xfba7, 0x7b6b,
307+ 0xfba8, 0x7b6c,
308+ 0xfba9, 0x7b6d,
309+ 0xfbaa, 0x7b6e,
310+ 0xfbab, 0x7b6f,
311+ 0xfbac, 0x7b70,
312+ 0xfbad, 0x7b71,
313+ 0xfbae, 0x7b72,
314+ 0xfbaf, 0x7b73,
315+ 0xfbb0, 0x7b74,
316+ 0xfbb1, 0x7b75,
317+ 0xfbb2, 0x7b76,
318+ 0xfbb3, 0x7b77,
319+ 0xfbb4, 0x7b78,
320+ 0xfbb5, 0x7b79,
321+ 0xfbb6, 0x7b7a,
322+ 0xfbb7, 0x7b7b,
323+ 0xfbb8, 0x7b7c,
324+ 0xfbb9, 0x7b7d,
325+ 0xfbba, 0x7b7e,
326+ 0xfbbb, 0x7c21,
327+ 0xfbbc, 0x7c22,
328+ 0xfbbd, 0x7c23,
329+ 0xfbbe, 0x7c24,
330+ 0xfbbf, 0x7c25,
331+ 0xfbc0, 0x7c26,
332+ 0xfbc1, 0x7c27,
333+ 0xfbc2, 0x7c28,
334+ 0xfbc3, 0x7c29,
335+ 0xfbc4, 0x7c2a,
336+ 0xfbc5, 0x7c2b,
337+ 0xfbc6, 0x7c2c,
338+ 0xfbc7, 0x7c2d,
339+ 0xfbc8, 0x7c2e,
340+ 0xfbc9, 0x7c2f,
341+ 0xfbca, 0x7c30,
342+ 0xfbcb, 0x7c31,
343+ 0xfbcc, 0x7c32,
344+ 0xfbcd, 0x7c33,
345+ 0xfbce, 0x7c34,
346+ 0xfbcf, 0x7c35,
347+ 0xfbd0, 0x7c36,
348+ 0xfbd1, 0x7c37,
349+ 0xfbd2, 0x7c38,
350+ 0xfbd3, 0x7c39,
351+ 0xfbd4, 0x7c3a,
352+ 0xfbd5, 0x7c3b,
353+ 0xfbd6, 0x7c3c,
354+ 0xfbd7, 0x7c3d,
355+ 0xfbd8, 0x7c3e,
356+ 0xfbd9, 0x7c3f,
357+ 0xfbda, 0x7c40,
358+ 0xfbdb, 0x7c41,
359+ 0xfbdc, 0x7c42,
360+ 0xfbdd, 0x7c43,
361+ 0xfbde, 0x7c44,
362+ 0xfbdf, 0x7c45,
363+ 0xfbe0, 0x7c46,
364+ 0xfbe1, 0x7c47,
365+ 0xfbe2, 0x7c48,
366+ 0xfbe3, 0x7c49,
367+ 0xfbe4, 0x7c4a,
368+ 0xfbe5, 0x7c4b,
369+ 0xfbe6, 0x7c4c,
370+ 0xfbe7, 0x7c4d,
371+ 0xfbe8, 0x7c4e,
372+ 0xfbe9, 0x7c4f,
373+ 0xfbea, 0x7c50,
374+ 0xfbeb, 0x7c51,
375+ 0xfbec, 0x7c52,
376+ 0xfbed, 0x7c53,
377+ 0xfbee, 0x7c54,
378+ 0xfbef, 0x7c55,
379+ 0xfbf0, 0x7c56,
380+ 0xfbf1, 0x7c57,
381+ 0xfbf2, 0x7c58,
382+ 0xfbf3, 0x7c59,
383+ 0xfbf4, 0x7c5a,
384+ 0xfbf5, 0x7c5b,
385+ 0xfbf6, 0x7c5c,
386+ 0xfbf7, 0x7c5d,
387+ 0xfbf8, 0x7c5e,
388+ 0xfbf9, 0x7c5f,
389+ 0xfbfa, 0x7c60,
390+ 0xfbfb, 0x7c61,
391+ 0xfbfc, 0x7c62,
392+ 0xfc40, 0x7c63,
393+ 0xfc41, 0x7c64,
394+ 0xfc42, 0x7c65,
395+ 0xfc43, 0x7c66,
396+ 0xfc44, 0x7c67,
397+ 0xfc45, 0x7c68,
398+ 0xfc46, 0x7c69,
399+ 0xfc47, 0x7c6a,
400+ 0xfc48, 0x7c6b,
401+ 0xfc49, 0x7c6c,
402+ 0xfc4a, 0x7c6d,
403+ 0xfc4b, 0x7c6e,
404+ 0xffff, 0xffff,
405+};
406+
407+/* JIS - SJIS conversion table */
408+
409+unsigned int tbl_jis2sjis[] = {
410+
411+ 0x2240, 0x879c,
412+ 0x2241, 0x879b,
413+ 0x224c, 0xeef9,
414+ 0x225c, 0x8797,
415+ 0x225d, 0x8796,
416+ 0x2261, 0x8791,
417+ 0x2262, 0x8790,
418+ 0x2265, 0x8795,
419+ 0x2268, 0x879a,
420+ 0x2269, 0x8792,
421+ 0x2d35, 0x8754,
422+ 0x2d36, 0x8755,
423+ 0x2d37, 0x8756,
424+ 0x2d38, 0x8757,
425+ 0x2d39, 0x8758,
426+ 0x2d3a, 0x8759,
427+ 0x2d3b, 0x875a,
428+ 0x2d3c, 0x875b,
429+ 0x2d3d, 0x875c,
430+ 0x2d3e, 0x875d,
431+ 0x2d62, 0x8782,
432+ 0x2d64, 0x8784,
433+ 0x2d6a, 0x878a,
434+ 0x7921, 0xed40,
435+ 0x7922, 0xed41,
436+ 0x7923, 0xed42,
437+ 0x7924, 0xed43,
438+ 0x7925, 0xed44,
439+ 0x7926, 0xed45,
440+ 0x7927, 0xed46,
441+ 0x7928, 0xed47,
442+ 0x7929, 0xed48,
443+ 0x792a, 0xed49,
444+ 0x792b, 0xed4a,
445+ 0x792c, 0xed4b,
446+ 0x792d, 0xed4c,
447+ 0x792e, 0xed4d,
448+ 0x792f, 0xed4e,
449+ 0x7930, 0xed4f,
450+ 0x7931, 0xed50,
451+ 0x7932, 0xed51,
452+ 0x7933, 0xed52,
453+ 0x7934, 0xed53,
454+ 0x7935, 0xed54,
455+ 0x7936, 0xed55,
456+ 0x7937, 0xed56,
457+ 0x7938, 0xed57,
458+ 0x7939, 0xed58,
459+ 0x793a, 0xed59,
460+ 0x793b, 0xed5a,
461+ 0x793c, 0xed5b,
462+ 0x793d, 0xed5c,
463+ 0x793e, 0xed5d,
464+ 0x793f, 0xed5e,
465+ 0x7940, 0xed5f,
466+ 0x7941, 0xed60,
467+ 0x7942, 0xed61,
468+ 0x7943, 0xed62,
469+ 0x7944, 0xed63,
470+ 0x7945, 0xed64,
471+ 0x7946, 0xed65,
472+ 0x7947, 0xed66,
473+ 0x7948, 0xed67,
474+ 0x7949, 0xed68,
475+ 0x794a, 0xed69,
476+ 0x794b, 0xed6a,
477+ 0x794c, 0xed6b,
478+ 0x794d, 0xed6c,
479+ 0x794e, 0xed6d,
480+ 0x794f, 0xed6e,
481+ 0x7950, 0xed6f,
482+ 0x7951, 0xed70,
483+ 0x7952, 0xed71,
484+ 0x7953, 0xed72,
485+ 0x7954, 0xed73,
486+ 0x7955, 0xed74,
487+ 0x7956, 0xed75,
488+ 0x7957, 0xed76,
489+ 0x7958, 0xed77,
490+ 0x7959, 0xed78,
491+ 0x795a, 0xed79,
492+ 0x795b, 0xed7a,
493+ 0x795c, 0xed7b,
494+ 0x795d, 0xed7c,
495+ 0x795e, 0xed7d,
496+ 0x795f, 0xed7e,
497+ 0x7960, 0xed80,
498+ 0x7961, 0xed81,
499+ 0x7962, 0xed82,
500+ 0x7963, 0xed83,
501+ 0x7964, 0xed84,
502+ 0x7965, 0xed85,
503+ 0x7966, 0xed86,
504+ 0x7967, 0xed87,
505+ 0x7968, 0xed88,
506+ 0x7969, 0xed89,
507+ 0x796a, 0xed8a,
508+ 0x796b, 0xed8b,
509+ 0x796c, 0xed8c,
510+ 0x796d, 0xed8d,
511+ 0x796e, 0xed8e,
512+ 0x796f, 0xed8f,
513+ 0x7970, 0xed90,
514+ 0x7971, 0xed91,
515+ 0x7972, 0xed92,
516+ 0x7973, 0xed93,
517+ 0x7974, 0xed94,
518+ 0x7975, 0xed95,
519+ 0x7976, 0xed96,
520+ 0x7977, 0xed97,
521+ 0x7978, 0xed98,
522+ 0x7979, 0xed99,
523+ 0x797a, 0xed9a,
524+ 0x797b, 0xed9b,
525+ 0x797c, 0xed9c,
526+ 0x797d, 0xed9d,
527+ 0x797e, 0xed9e,
528+ 0x7a21, 0xed9f,
529+ 0x7a22, 0xeda0,
530+ 0x7a23, 0xeda1,
531+ 0x7a24, 0xeda2,
532+ 0x7a25, 0xeda3,
533+ 0x7a26, 0xeda4,
534+ 0x7a27, 0xeda5,
535+ 0x7a28, 0xeda6,
536+ 0x7a29, 0xeda7,
537+ 0x7a2a, 0xeda8,
538+ 0x7a2b, 0xeda9,
539+ 0x7a2c, 0xedaa,
540+ 0x7a2d, 0xedab,
541+ 0x7a2e, 0xedac,
542+ 0x7a2f, 0xedad,
543+ 0x7a30, 0xedae,
544+ 0x7a31, 0xedaf,
545+ 0x7a32, 0xedb0,
546+ 0x7a33, 0xedb1,
547+ 0x7a34, 0xedb2,
548+ 0x7a35, 0xedb3,
549+ 0x7a36, 0xedb4,
550+ 0x7a37, 0xedb5,
551+ 0x7a38, 0xedb6,
552+ 0x7a39, 0xedb7,
553+ 0x7a3a, 0xedb8,
554+ 0x7a3b, 0xedb9,
555+ 0x7a3c, 0xedba,
556+ 0x7a3d, 0xedbb,
557+ 0x7a3e, 0xedbc,
558+ 0x7a3f, 0xedbd,
559+ 0x7a40, 0xedbe,
560+ 0x7a41, 0xedbf,
561+ 0x7a42, 0xedc0,
562+ 0x7a43, 0xedc1,
563+ 0x7a44, 0xedc2,
564+ 0x7a45, 0xedc3,
565+ 0x7a46, 0xedc4,
566+ 0x7a47, 0xedc5,
567+ 0x7a48, 0xedc6,
568+ 0x7a49, 0xedc7,
569+ 0x7a4a, 0xedc8,
570+ 0x7a4b, 0xedc9,
571+ 0x7a4c, 0xedca,
572+ 0x7a4d, 0xedcb,
573+ 0x7a4e, 0xedcc,
574+ 0x7a4f, 0xedcd,
575+ 0x7a50, 0xedce,
576+ 0x7a51, 0xedcf,
577+ 0x7a52, 0xedd0,
578+ 0x7a53, 0xedd1,
579+ 0x7a54, 0xedd2,
580+ 0x7a55, 0xedd3,
581+ 0x7a56, 0xedd4,
582+ 0x7a57, 0xedd5,
583+ 0x7a58, 0xedd6,
584+ 0x7a59, 0xedd7,
585+ 0x7a5a, 0xedd8,
586+ 0x7a5b, 0xedd9,
587+ 0x7a5c, 0xedda,
588+ 0x7a5d, 0xeddb,
589+ 0x7a5e, 0xeddc,
590+ 0x7a5f, 0xeddd,
591+ 0x7a60, 0xedde,
592+ 0x7a61, 0xeddf,
593+ 0x7a62, 0xede0,
594+ 0x7a63, 0xede1,
595+ 0x7a64, 0xede2,
596+ 0x7a65, 0xede3,
597+ 0x7a66, 0xede4,
598+ 0x7a67, 0xede5,
599+ 0x7a68, 0xede6,
600+ 0x7a69, 0xede7,
601+ 0x7a6a, 0xede8,
602+ 0x7a6b, 0xede9,
603+ 0x7a6c, 0xedea,
604+ 0x7a6d, 0xedeb,
605+ 0x7a6e, 0xedec,
606+ 0x7a6f, 0xeded,
607+ 0x7a70, 0xedee,
608+ 0x7a71, 0xedef,
609+ 0x7a72, 0xedf0,
610+ 0x7a73, 0xedf1,
611+ 0x7a74, 0xedf2,
612+ 0x7a75, 0xedf3,
613+ 0x7a76, 0xedf4,
614+ 0x7a77, 0xedf5,
615+ 0x7a78, 0xedf6,
616+ 0x7a79, 0xedf7,
617+ 0x7a7a, 0xedf8,
618+ 0x7a7b, 0xedf9,
619+ 0x7a7c, 0xedfa,
620+ 0x7a7d, 0xedfb,
621+ 0x7a7e, 0xedfc,
622+ 0x7b21, 0xee40,
623+ 0x7b22, 0xee41,
624+ 0x7b23, 0xee42,
625+ 0x7b24, 0xee43,
626+ 0x7b25, 0xee44,
627+ 0x7b26, 0xee45,
628+ 0x7b27, 0xee46,
629+ 0x7b28, 0xee47,
630+ 0x7b29, 0xee48,
631+ 0x7b2a, 0xee49,
632+ 0x7b2b, 0xee4a,
633+ 0x7b2c, 0xee4b,
634+ 0x7b2d, 0xee4c,
635+ 0x7b2e, 0xee4d,
636+ 0x7b2f, 0xee4e,
637+ 0x7b30, 0xee4f,
638+ 0x7b31, 0xee50,
639+ 0x7b32, 0xee51,
640+ 0x7b33, 0xee52,
641+ 0x7b34, 0xee53,
642+ 0x7b35, 0xee54,
643+ 0x7b36, 0xee55,
644+ 0x7b37, 0xee56,
645+ 0x7b38, 0xee57,
646+ 0x7b39, 0xee58,
647+ 0x7b3a, 0xee59,
648+ 0x7b3b, 0xee5a,
649+ 0x7b3c, 0xee5b,
650+ 0x7b3d, 0xee5c,
651+ 0x7b3e, 0xee5d,
652+ 0x7b3f, 0xee5e,
653+ 0x7b40, 0xee5f,
654+ 0x7b41, 0xee60,
655+ 0x7b42, 0xee61,
656+ 0x7b43, 0xee62,
657+ 0x7b44, 0xee63,
658+ 0x7b45, 0xee64,
659+ 0x7b46, 0xee65,
660+ 0x7b47, 0xee66,
661+ 0x7b48, 0xee67,
662+ 0x7b49, 0xee68,
663+ 0x7b4a, 0xee69,
664+ 0x7b4b, 0xee6a,
665+ 0x7b4c, 0xee6b,
666+ 0x7b4d, 0xee6c,
667+ 0x7b4e, 0xee6d,
668+ 0x7b4f, 0xee6e,
669+ 0x7b50, 0xee6f,
670+ 0x7b51, 0xee70,
671+ 0x7b52, 0xee71,
672+ 0x7b53, 0xee72,
673+ 0x7b54, 0xee73,
674+ 0x7b55, 0xee74,
675+ 0x7b56, 0xee75,
676+ 0x7b57, 0xee76,
677+ 0x7b58, 0xee77,
678+ 0x7b59, 0xee78,
679+ 0x7b5a, 0xee79,
680+ 0x7b5b, 0xee7a,
681+ 0x7b5c, 0xee7b,
682+ 0x7b5d, 0xee7c,
683+ 0x7b5e, 0xee7d,
684+ 0x7b5f, 0xee7e,
685+ 0x7b60, 0xee80,
686+ 0x7b61, 0xee81,
687+ 0x7b62, 0xee82,
688+ 0x7b63, 0xee83,
689+ 0x7b64, 0xee84,
690+ 0x7b65, 0xee85,
691+ 0x7b66, 0xee86,
692+ 0x7b67, 0xee87,
693+ 0x7b68, 0xee88,
694+ 0x7b69, 0xee89,
695+ 0x7b6a, 0xee8a,
696+ 0x7b6b, 0xee8b,
697+ 0x7b6c, 0xee8c,
698+ 0x7b6d, 0xee8d,
699+ 0x7b6e, 0xee8e,
700+ 0x7b6f, 0xee8f,
701+ 0x7b70, 0xee90,
702+ 0x7b71, 0xee91,
703+ 0x7b72, 0xee92,
704+ 0x7b73, 0xee93,
705+ 0x7b74, 0xee94,
706+ 0x7b75, 0xee95,
707+ 0x7b76, 0xee96,
708+ 0x7b77, 0xee97,
709+ 0x7b78, 0xee98,
710+ 0x7b79, 0xee99,
711+ 0x7b7a, 0xee9a,
712+ 0x7b7b, 0xee9b,
713+ 0x7b7c, 0xee9c,
714+ 0x7b7d, 0xee9d,
715+ 0x7b7e, 0xee9e,
716+ 0x7c21, 0xee9f,
717+ 0x7c22, 0xeea0,
718+ 0x7c23, 0xeea1,
719+ 0x7c24, 0xeea2,
720+ 0x7c25, 0xeea3,
721+ 0x7c26, 0xeea4,
722+ 0x7c27, 0xeea5,
723+ 0x7c28, 0xeea6,
724+ 0x7c29, 0xeea7,
725+ 0x7c2a, 0xeea8,
726+ 0x7c2b, 0xeea9,
727+ 0x7c2c, 0xeeaa,
728+ 0x7c2d, 0xeeab,
729+ 0x7c2e, 0xeeac,
730+ 0x7c2f, 0xeead,
731+ 0x7c30, 0xeeae,
732+ 0x7c31, 0xeeaf,
733+ 0x7c32, 0xeeb0,
734+ 0x7c33, 0xeeb1,
735+ 0x7c34, 0xeeb2,
736+ 0x7c35, 0xeeb3,
737+ 0x7c36, 0xeeb4,
738+ 0x7c37, 0xeeb5,
739+ 0x7c38, 0xeeb6,
740+ 0x7c39, 0xeeb7,
741+ 0x7c3a, 0xeeb8,
742+ 0x7c3b, 0xeeb9,
743+ 0x7c3c, 0xeeba,
744+ 0x7c3d, 0xeebb,
745+ 0x7c3e, 0xeebc,
746+ 0x7c3f, 0xeebd,
747+ 0x7c40, 0xeebe,
748+ 0x7c41, 0xeebf,
749+ 0x7c42, 0xeec0,
750+ 0x7c43, 0xeec1,
751+ 0x7c44, 0xeec2,
752+ 0x7c45, 0xeec3,
753+ 0x7c46, 0xeec4,
754+ 0x7c47, 0xeec5,
755+ 0x7c48, 0xeec6,
756+ 0x7c49, 0xeec7,
757+ 0x7c4a, 0xeec8,
758+ 0x7c4b, 0xeec9,
759+ 0x7c4c, 0xeeca,
760+ 0x7c4d, 0xeecb,
761+ 0x7c4e, 0xeecc,
762+ 0x7c4f, 0xeecd,
763+ 0x7c50, 0xeece,
764+ 0x7c51, 0xeecf,
765+ 0x7c52, 0xeed0,
766+ 0x7c53, 0xeed1,
767+ 0x7c54, 0xeed2,
768+ 0x7c55, 0xeed3,
769+ 0x7c56, 0xeed4,
770+ 0x7c57, 0xeed5,
771+ 0x7c58, 0xeed6,
772+ 0x7c59, 0xeed7,
773+ 0x7c5a, 0xeed8,
774+ 0x7c5b, 0xeed9,
775+ 0x7c5c, 0xeeda,
776+ 0x7c5d, 0xeedb,
777+ 0x7c5e, 0xeedc,
778+ 0x7c5f, 0xeedd,
779+ 0x7c60, 0xeede,
780+ 0x7c61, 0xeedf,
781+ 0x7c62, 0xeee0,
782+ 0x7c63, 0xeee1,
783+ 0x7c64, 0xeee2,
784+ 0x7c65, 0xeee3,
785+ 0x7c66, 0xeee4,
786+ 0x7c67, 0xeee5,
787+ 0x7c68, 0xeee6,
788+ 0x7c69, 0xeee7,
789+ 0x7c6a, 0xeee8,
790+ 0x7c6b, 0xeee9,
791+ 0x7c6c, 0xeeea,
792+ 0x7c6d, 0xeeeb,
793+ 0x7c6e, 0xeeec,
794+ 0x7c71, 0xeeef,
795+ 0x7c72, 0xeef0,
796+ 0x7c73, 0xeef1,
797+ 0x7c74, 0xeef2,
798+ 0x7c75, 0xeef3,
799+ 0x7c76, 0xeef4,
800+ 0x7c77, 0xeef5,
801+ 0x7c78, 0xeef6,
802+ 0x7c79, 0xeef7,
803+ 0x7c7a, 0xeef8,
804+ 0x7c7c, 0xeefa,
805+ 0x7c7d, 0xeefb,
806+ 0x7c7e, 0xeefc,
807+ 0xffff, 0xffff,
808+};
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
--- tags/0.3.5/MANIFEST.in (nonexistent)
+++ tags/0.3.5/MANIFEST.in (revision 10)
@@ -0,0 +1,3 @@
1+include readme.sjis
2+include src/convert.h
3+include src/pykf.h
--- tags/0.3.5/test/hankana.txt (nonexistent)
+++ tags/0.3.5/test/hankana.txt (revision 10)
@@ -0,0 +1,94 @@
1+= pykf: Kanji code filter =
2+ 2002/3/3 Atsuo Ishimoto<ishimoto@gembook.org>
3+------------------------------------------------------------------------
4+
5+pykfハ、ShiftJIS, EUC-JP, JISコードヲ相互ニ変換スルタメノモジュールデス。
6+
7+== インストール方法 ==
8+
9+python setup.py install
10+
11+== リファレンス ==
12+
13+SJIS, EUC, JIS, UNKNWON:
14+ 変換元ノエンコーディングヲ指定スル際ニ使用シマス。UNKNOWNヲ指定スルト、
15+ 入力文字列カラエンコーディングヲ推定シテ変換シマス。
16+
17+tojis(encoding, s):
18+ 入力文字列sヲJISニ変換シマス。
19+
20+toeuc(encoding, s):
21+ 入力文字列sヲEUC-JPニ変換シマス。
22+
23+tosjis(encoding, s):
24+ 入力文字列sヲShiftJISニ変換シマス。
25+
26+== sample ==
27+
28+import pykf
29+s = "アイウエオ"
30+s1 = pykf.tojis(pykf.UNKNOWN, s)
31+s2 = pykf.tosjis(pykf.JIS, s1)
32+assert(s2 == s)
33+
34+
35+== 実装メモ ==
36+
37+pykfハ、マイクロソフト漢字コードノ拡張文字ヲ変換スルタメ、JIS X 0213ヲ
38+中途半端ニサポートシテイマス。規格票ヲ見ナイデ書イテマスノデ、問題点モ
39+多イカト思イマスガ...
40+尚、JIS X 0213ノコード表ハ、JISX0213 InfoCenter
41+ http://www.jca.apc.org/~earthian/aozora/0213.html
42+ノデータヲ使ワセテイタダキマシタ。
43+
44+
45+・SJISノIBM拡張漢字等ハ、主要WebブラウザデアルInternet Explorer/Mozillaニ
46+ 準拠シ、JIS X 0213 1面(第三水準)ニ変換シマス。コノタメ、OSF 日本ベンダ
47+ 協議会ノ「日本語 EUC ・シフト JIS 間コード変換仕様トコード系 実態調査」
48+ (*1) ニ準拠シタライブラリ・アプリケーションナドトハ変換結果ガ異ナリマス。
49+
50+・EUC/JIS->SJIS変換時、NEC選定IBM拡張文字ハ使用セズ、スベテIBM拡張文字ト
51+ シテ変換サレマス。
52+
53+・SJIS/EUC->JIS変換時、JIS X 0208デ定義サレタ文字ハ、JIS X 0213 附属書2
54+ 4.1(e)デ禁止サレテイル文字デアッテモ、ESC $ B デ呼ビ出シマス。
55+
56+・第四水準ノ変換ハサポートシテイマセン。
57+
58+・ユーザ定義外字(SJIS 0xF040-0xF9FC)ハ、全テ'〓'ニ変換シマス。
59+
60+(*1) http://www.opengroup.or.jp/jvc/cde/sjis-euc.html
61+
62+
63+== バージョン情報 ==
64+2002/03/02 0.1.0 初期公開
65+
66+
67+== ライセンス ==
68+Japanese Kanji filter module
69+ Copyright (c) 2002, Atsuo Ishimoto. All rights reserved.
70+
71+Permission to use, copy, modify, and distribute this software and its
72+documentation for any purpose and without fee is hereby granted, provided that
73+the above copyright notice appear in all copies and that both that copyright
74+notice and this permission notice appear in supporting documentation, and
75+that the name of Atsuo Ishimoto not be used in advertising or publicity
76+pertaining to distribution of the software without specific, written prior
77+permission.
78+
79+ATSUO ISHIMOTO DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
80+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
81+EVENT SHALL ATSUO ISHIMOTO BE LIABLE FOR ANY SPECIAL, INDIRECT OR
82+CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
83+USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
84+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
85+PERFORMANCE OF THIS SOFTWARE.
86+
87+---------------------------------------------------------------------
88+This module is besed on kf.c written by Haruhiko Okumura.
89+ Copyright (c) 1995-2000 Haruhiko Okumura
90+ This file may be freely modified/redistributed.
91+
92+Original kf.c:
93+ http://www.matsusaka-u.ac.jp/~okumura/kf.html
94+
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
--- tags/0.3.5/test/test_kf.py (nonexistent)
+++ tags/0.3.5/test/test_kf.py (revision 10)
@@ -0,0 +1,221 @@
1+from __future__ import nested_scopes
2+
3+import unittest
4+from pykf import *
5+
6+def readfile(fname):
7+ lines = [l for l in open(fname).readlines() if l[0:1] != '#']
8+ sjis = [int(l.split(",")[0], 16) for l in lines]
9+ sjis = "".join([chr(s >> 8)+chr(s & 0xff) for s in sjis])
10+ euc = [int(l.split(",")[1], 16) for l in lines]
11+ euc = "".join([chr(s >> 8)+chr(s & 0xff) for s in euc])
12+
13+ return sjis, euc
14+
15+class test_kf(unittest.TestCase):
16+
17+ def readtbl(self, fname):
18+ f = open(fname)
19+ lines = [l for l in f.readlines() if l[0:1] != '#']
20+ sjis = [int(l.split(",")[0], 16) for l in lines]
21+ sjis = "".join([chr(s >> 8)+chr(s & 0xff) for s in sjis])
22+ euc = [int(l.split(",")[1], 16) for l in lines]
23+ euc = "".join([chr(s >> 8)+chr(s & 0xff) for s in euc])
24+ return sjis, euc
25+
26+ def conv(self, sjis):
27+ euc1 = toeuc(sjis)
28+ jis1 = tojis(sjis)
29+ euc2 = toeuc(jis1)
30+ jis2 = tojis(euc1)
31+ sjis1 = tosjis(jis1)
32+ sjis2 = tosjis(euc1)
33+
34+ for i in range(0, len(sjis), 2):
35+ s = sjis[i:i+2]
36+ e1 = sjis1[i:i+2]
37+ if s != e1:
38+ print "%s(%x%x) %s(%x%x)" % (s, ord(s[0]), ord(s[1]), e1, ord(e1[0]), ord(e1[1]))
39+
40+
41+ assert sjis==sjis1
42+ assert sjis1==sjis2
43+ assert (max(sjis) < '\x80') or sjis2 != euc1
44+ assert (max(sjis) < '\x80') or sjis2 != jis1
45+ assert euc1==euc2
46+ assert (max(sjis) < '\x80') or euc1 !=jis1
47+ assert jis1==jis2
48+
49+ assert (max(sjis) < '\x80') or guess(sjis1) == SJIS
50+ assert (max(sjis) < '\x80') or guess(euc1) == EUC
51+ assert (max(sjis) < '\x80') or guess(jis1) == JIS
52+
53+
54+ def testBasic(self):
55+ sjis = open("../readme.sjis").read()
56+ self.conv(sjis)
57+
58+ def testHankana(self):
59+ sjis = open("hankana.txt").read()
60+ self.conv(sjis)
61+
62+ def testNEC(self):
63+ sjis, euc = self.readtbl("../misc/nectoeuc.txt")
64+ assert toeuc(sjis) == euc
65+ assert toeuc(tojis(sjis)) == euc
66+ assert tosjis(euc) == sjis
67+
68+ def testNECIBM(self):
69+ sjis, euc = self.readtbl("../misc/necibmtoeuc.txt")
70+ assert toeuc(sjis) == euc
71+ assert toeuc(tojis(sjis)) == euc
72+ assert tosjis(euc) == sjis
73+
74+ def testIBM(self):
75+ sjis, euc = self.readtbl("../misc/ibmtoeuc.txt")
76+ assert toeuc(sjis) == euc
77+ assert toeuc(tojis(sjis)) == euc
78+ assert tosjis(euc) != sjis
79+ assert unicode(tosjis(euc), "japanese.ms932") == unicode(sjis, "japanese.ms932")
80+
81+ def testGaiji(self):
82+ sjis = "".join([chr(x)+chr(y) for x in range(0xf0, 0xfa) for y in range(0x40, 0x7e)])
83+ assert tosjis(toeuc(sjis)) == "\x81\xac" * (len(sjis)/2)
84+ assert tosjis(tojis(sjis)) == "\x81\xac" * (len(sjis)/2)
85+
86+ sjis = "".join([chr(x)+chr(y) for x in range(0xf0, 0xfa) for y in range(0x80, 0xfd)])
87+ assert tosjis(toeuc(sjis)) == "\x81\xac" * (len(sjis)/2)
88+ assert tosjis(tojis(sjis)) == "\x81\xac" * (len(sjis)/2)
89+
90+ def testUtf8(self):
91+ utf8 = "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a"
92+ assert guess(utf8) == UTF8
93+ assert guess("\xef\xbb\xbf") == UTF8
94+
95+ def testJisNormalize(self):
96+ sjis = "\x82\xa0"
97+ jis = tojis(sjis, SJIS)
98+ assert jis[-3:] == '\x1b(B'
99+ assert tosjis(jis, JIS) == sjis
100+
101+ euc = toeuc("\x82\xa0", SJIS)
102+ jis = tojis(euc, EUC)
103+ assert jis[-3:] == '\x1b(B'
104+ assert toeuc(jis, JIS) == euc
105+
106+class test_zerolen(unittest.TestCase):
107+ def test_zerolen(self):
108+ src = ""
109+ assert tosjis(src) == ""
110+ assert toeuc(src) == ""
111+ assert tojis(src) == ""
112+
113+ assert tosjis(src, EUC) == ""
114+ assert tosjis(src, JIS) == ""
115+ assert tosjis("\x1b(I", JIS) == ""
116+ assert toeuc(src, SJIS) == ""
117+ assert toeuc(src, JIS) == ""
118+ assert toeuc("\x1b(I", JIS) == ""
119+ assert tojis(src, SJIS) == ""
120+ assert tojis(src, EUC) == ""
121+
122+class test_split(unittest.TestCase):
123+ def test_split(self):
124+ ascii = "abcdefg"
125+ sjis = "abc\x82\xa0\x82\xa1\x82\xa2\xb1\xb2\xb3abc\x82\xa0"
126+
127+ assert "".join(split(ascii)) == ascii
128+ assert "".join(split(sjis)) == sjis
129+ assert "".join(split(toeuc(sjis))) == toeuc(sjis)
130+ assert "".join(split(tojis(sjis))) == tojis(sjis)
131+
132+
133+class test_tohalf(unittest.TestCase):
134+ sjis = 'abc\x83A\x83C\x83E\x83G\x83I\x83K\x83M\x83O\x83Q\x83S\x82`\x82a\x82b'
135+ sjis_half = 'abc\xb1\xb2\xb3\xb4\xb5\xb6\xde\xb7\xde\xb8\xde\xb9\xde\xba\xde\x82`\x82a\x82b'
136+ all_half = '\xa1\xa2\xa3\xa4\xa5\xa7\xb1\xa8\xb2\xa9\xb3\xaa\xb4\xab\xb5\xb6\xb6\xde\xb7\xb7\xde\xb8\xb8\xde\xb9\xb9\xde\xba\xba\xde\xbb\xbb\xde\xbc\xbc\xde\xbd\xbd\xde\xbe\xbe\xde\xbf\xbf\xde\xc0\xc0\xde\xc1\xc1\xde\xaf\xc2\xc2\xde\xc3\xc3\xde\xc4\xc4\xde\xc5\xc6\xc7\xc8\xc9\xca\xca\xde\xca\xdf\xcb\xcb\xde\xcb\xdf\xcc\xcc\xde\xcc\xdf\xcd\xcd\xde\xcd\xdf\xce\xce\xde\xce\xdf\xcf\xd0\xd1\xd2\xd3\xac\xd4\xad\xd5\xae\xd6\xd7\xd8\xd9\xda\xdb\x83\x8e\xdc\x83\x90\x83\x91\xa6\xdd\xb3\xde\x83\x95\xb0'
137+ all_full = "\x81B\x81u\x81v\x81A\x81E\x83@\x83A\x83B\x83C\x83D\x83E\x83F\x83G\x83H\x83I\x83J\x83K\x83L\x83M\x83N\x83O\x83P\x83Q\x83R\x83S\x83T\x83U\x83V\x83W\x83X\x83Y\x83Z\x83[\x83\\\x83]\x83^\x83_\x83`\x83a\x83b\x83c\x83d\x83e\x83f\x83g\x83h\x83i\x83j\x83k\x83l\x83m\x83n\x83o\x83p\x83q\x83r\x83s\x83t\x83u\x83v\x83w\x83x\x83y\x83z\x83{\x83|\x83}\x83~\x83\x80\x83\x81\x83\x82\x83\x83\x83\x84\x83\x85\x83\x86\x83\x87\x83\x88\x83\x89\x83\x8a\x83\x8b\x83\x8c\x83\x8d\x83\x8e\x83\x8f\x83\x90\x83\x91\x83\x92\x83\x93\x83\x94\x83\x95\x81["
138+
139+ def test_sjis(self):
140+ assert tohalf_kana(self.sjis, SJIS) == self.sjis_half
141+ assert tohalf_kana(self.all_full, SJIS) == self.all_half
142+
143+ def test_euc(self):
144+ e = toeuc(self.sjis, SJIS)
145+ assert tohalf_kana(e, EUC) == toeuc(self.sjis_half, SJIS)
146+
147+ e = toeuc(self.all_full, SJIS)
148+ assert tohalf_kana(e, EUC) == toeuc(self.all_half, SJIS)
149+
150+
151+class test_tofull(unittest.TestCase):
152+ sjis = 'abc\x83A\x83C\x83E\x83G\x83I\x83K\x83M\x83O\x83Q\x83S\x82`\x82a\x82b'
153+ sjis_half = 'abc\xb1\xb2\xb3\xb4\xb5\xb6\xde\xb7\xde\xb8\xde\xb9\xde\xba\xde\x82`\x82a\x82b'
154+ all_half = '\xa1\xa2\xa3\xa4\xa5\xa7\xb1\xa8\xb2\xa9\xb3\xaa\xb4\xab\xb5\xb6\xb6\xde\xb7\xb7\xde\xb8\xb8\xde\xb9\xb9\xde\xba\xba\xde\xbb\xbb\xde\xbc\xbc\xde\xbd\xbd\xde\xbe\xbe\xde\xbf\xbf\xde\xc0\xc0\xde\xc1\xc1\xde\xaf\xc2\xc2\xde\xc3\xc3\xde\xc4\xc4\xde\xc5\xc6\xc7\xc8\xc9\xca\xca\xde\xca\xdf\xcb\xcb\xde\xcb\xdf\xcc\xcc\xde\xcc\xdf\xcd\xcd\xde\xcd\xdf\xce\xce\xde\xce\xdf\xcf\xd0\xd1\xd2\xd3\xac\xd4\xad\xd5\xae\xd6\xd7\xd8\xd9\xda\xdb\x83\x8e\xdc\x83\x90\x83\x91\xa6\xdd\xb3\xde\x83\x95\xb0'
155+ all_full = "\x81B\x81u\x81v\x81A\x81E\x83@\x83A\x83B\x83C\x83D\x83E\x83F\x83G\x83H\x83I\x83J\x83K\x83L\x83M\x83N\x83O\x83P\x83Q\x83R\x83S\x83T\x83U\x83V\x83W\x83X\x83Y\x83Z\x83[\x83\\\x83]\x83^\x83_\x83`\x83a\x83b\x83c\x83d\x83e\x83f\x83g\x83h\x83i\x83j\x83k\x83l\x83m\x83n\x83o\x83p\x83q\x83r\x83s\x83t\x83u\x83v\x83w\x83x\x83y\x83z\x83{\x83|\x83}\x83~\x83\x80\x83\x81\x83\x82\x83\x83\x83\x84\x83\x85\x83\x86\x83\x87\x83\x88\x83\x89\x83\x8a\x83\x8b\x83\x8c\x83\x8d\x83\x8e\x83\x8f\x83\x90\x83\x91\x83\x92\x83\x93\x83\x94\x83\x95\x81["
156+
157+ def test_sjis(self):
158+# print tofull_kana(self.sjis_half, SJIS)
159+ assert tofull_kana(self.sjis_half, SJIS) == self.sjis
160+ assert tofull_kana(self.all_half, SJIS) == self.all_full
161+
162+ def test_euc(self):
163+ e = toeuc(self.sjis_half, SJIS)
164+ assert tofull_kana(e, EUC) == toeuc(self.sjis, SJIS)
165+
166+ e = toeuc(self.all_half, SJIS)
167+ assert tofull_kana(e, EUC) == toeuc(self.all_full, SJIS)
168+
169+class test_strict(unittest.TestCase):
170+ def test_sjis(self):
171+ s1 = "あいうえお"
172+ assert guess(s1, True) == SJIS
173+ assert guess(s1, False) == SJIS
174+ s2 = "あいうえおかきくけこ"*1000 + "\xf0\x01"
175+ assert guess(s2, False) == SJIS
176+ assert guess(s2, True) == ERROR
177+
178+ def test_euc(self):
179+ s1 = toeuc("あいうえお", SJIS)
180+ assert guess(s1, True) == EUC
181+ assert guess(s1, False) == EUC
182+ s2 = toeuc("あいうえおかきくけこ"*1000 + "\xf0\x01", SJIS)
183+ assert guess(s2, False) == EUC
184+ assert guess(s2, True) == ERROR
185+
186+ def test_jis(self):
187+ s1 = tojis("あいうえお", SJIS)
188+ assert guess(s1, True) == JIS
189+ assert guess(s1, False) == JIS
190+ s2 = tojis("あいうえおかきくけこ" + "\xf0\x01", SJIS)
191+ assert guess(s2, False) == UNKNOWN
192+ assert guess(s2, True) == ERROR
193+
194+ def test_flag(self):
195+ setstrict(True)
196+ assert getstrict()
197+
198+ setstrict(False)
199+ assert not getstrict()
200+
201+ s2 = "あいうえおかきくけこ"*1000 + "\xf0\x01"
202+ assert guess(s2) == SJIS
203+ setstrict(True)
204+ assert guess(s2) == ERROR
205+ setstrict(False)
206+
207+
208+class test_j0208(unittest.TestCase):
209+ def test_sjis(self):
210+ s1 = "?"
211+
212+ assert tojis(s1, SJIS, j0208=False) == '\x1b$(O-j\x1b(B'
213+ assert tojis(s1, SJIS, j0208=True) == '\x1b$B-j\x1b(B'
214+
215+ assert tosjis(tojis(s1, SJIS, j0208=False)) == s1
216+ assert tosjis(tojis(s1, SJIS, j0208=True)) == s1
217+
218+if __name__ == '__main__':
219+ unittest.main()
220+
221+
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
--- tags/0.3.5/test/test_ibm.py (nonexistent)
+++ tags/0.3.5/test/test_ibm.py (revision 10)
@@ -0,0 +1,397 @@
1+IBM = [
2+"?", # 0xfa40
3+"?", # 0xfa41
4+"?", # 0xfa42
5+"?", # 0xfa43
6+"?", # 0xfa44
7+"?", # 0xfa45
8+"?", # 0xfa46
9+"?", # 0xfa47
10+"?", # 0xfa48
11+"?", # 0xfa49
12+"?", # 0xfa4a
13+"?", # 0xfa4b
14+"?", # 0xfa4c
15+"?", # 0xfa4d
16+"?", # 0xfa4e
17+"?", # 0xfa4f
18+"?", # 0xfa50
19+"?", # 0xfa51
20+"?", # 0xfa52
21+"?", # 0xfa53
22+"?", # 0xfa54
23+"?", # 0xfa55
24+"?", # 0xfa56
25+"?", # 0xfa57
26+"?", # 0xfa58
27+"?", # 0xfa59
28+"?", # 0xfa5a
29+"?", # 0xfa5b
30+"?", # 0xfa5c
31+"?", # 0xfa5d
32+"?", # 0xfa5e
33+"?", # 0xfa5f
34+"?", # 0xfa60
35+"?", # 0xfa61
36+"?", # 0xfa62
37+"?", # 0xfa63
38+"?", # 0xfa64
39+"?", # 0xfa65
40+"?", # 0xfa66
41+"?", # 0xfa67
42+"?", # 0xfa68
43+"?", # 0xfa69
44+"?", # 0xfa6a
45+"?", # 0xfa6b
46+"?", # 0xfa6c
47+"?", # 0xfa6d
48+"?", # 0xfa6e
49+"?", # 0xfa6f
50+"?", # 0xfa70
51+"?", # 0xfa71
52+"?", # 0xfa72
53+"?", # 0xfa73
54+"?", # 0xfa74
55+"?", # 0xfa75
56+"?", # 0xfa76
57+"?", # 0xfa77
58+"?", # 0xfa78
59+"?", # 0xfa79
60+"?", # 0xfa7a
61+"?", # 0xfa7b
62+"?", # 0xfa7c
63+"?", # 0xfa7d
64+"?", # 0xfa7e
65+"?", # 0xfa80
66+"?", # 0xfa81
67+"?", # 0xfa82
68+"?", # 0xfa83
69+"?", # 0xfa84
70+"?", # 0xfa85
71+"?", # 0xfa86
72+"?", # 0xfa87
73+"?", # 0xfa88
74+"?", # 0xfa89
75+"?", # 0xfa8a
76+"?", # 0xfa8b
77+"?", # 0xfa8c
78+"?", # 0xfa8d
79+"?", # 0xfa8e
80+"?", # 0xfa8f
81+"?", # 0xfa90
82+"?", # 0xfa91
83+"?", # 0xfa92
84+"?", # 0xfa93
85+"?", # 0xfa94
86+"?", # 0xfa95
87+"?", # 0xfa96
88+"?", # 0xfa97
89+"?", # 0xfa98
90+"?", # 0xfa99
91+"?", # 0xfa9a
92+"?", # 0xfa9b
93+"?", # 0xfa9c
94+"?", # 0xfa9d
95+"?", # 0xfa9e
96+"?", # 0xfa9f
97+"?", # 0xfaa0
98+"?", # 0xfaa1
99+"?", # 0xfaa2
100+"?", # 0xfaa3
101+"?", # 0xfaa4
102+"?", # 0xfaa5
103+"?", # 0xfaa6
104+"?", # 0xfaa7
105+"?", # 0xfaa8
106+"?", # 0xfaa9
107+"?", # 0xfaaa
108+"?", # 0xfaab
109+"?", # 0xfaac
110+"?", # 0xfaad
111+"?", # 0xfaae
112+"?", # 0xfaaf
113+"?", # 0xfab0
114+"?", # 0xfab1
115+"?", # 0xfab2
116+"?", # 0xfab3
117+"?", # 0xfab4
118+"?", # 0xfab5
119+"?", # 0xfab6
120+"?", # 0xfab7
121+"?", # 0xfab8
122+"?", # 0xfab9
123+"?", # 0xfaba
124+"?", # 0xfabb
125+"?", # 0xfabc
126+"?", # 0xfabd
127+"?", # 0xfabe
128+"?", # 0xfabf
129+"?", # 0xfac0
130+"?", # 0xfac1
131+"?", # 0xfac2
132+"?", # 0xfac3
133+"?", # 0xfac4
134+"?", # 0xfac5
135+"?", # 0xfac6
136+"?", # 0xfac7
137+"?", # 0xfac8
138+"?", # 0xfac9
139+"?", # 0xfaca
140+"?", # 0xfacb
141+"?", # 0xfacc
142+"?", # 0xfacd
143+"?", # 0xface
144+"?", # 0xfacf
145+"?", # 0xfad0
146+"?", # 0xfad1
147+"?", # 0xfad2
148+"?", # 0xfad3
149+"?", # 0xfad4
150+"?", # 0xfad5
151+"?", # 0xfad6
152+"?", # 0xfad7
153+"?", # 0xfad8
154+"?", # 0xfad9
155+"?", # 0xfada
156+"?", # 0xfadb
157+"?", # 0xfadc
158+"?", # 0xfadd
159+"?", # 0xfade
160+"?", # 0xfadf
161+"?", # 0xfae0
162+"?", # 0xfae1
163+"?", # 0xfae2
164+"?", # 0xfae3
165+"?", # 0xfae4
166+"?", # 0xfae5
167+"?", # 0xfae6
168+"?", # 0xfae7
169+"?", # 0xfae8
170+"?", # 0xfae9
171+"?", # 0xfaea
172+"?", # 0xfaeb
173+"?", # 0xfaec
174+"?", # 0xfaed
175+"?", # 0xfaee
176+"?", # 0xfaef
177+"?", # 0xfaf0
178+"?", # 0xfaf1
179+"?", # 0xfaf2
180+"?", # 0xfaf3
181+"?", # 0xfaf4
182+"?", # 0xfaf5
183+"?", # 0xfaf6
184+"?", # 0xfaf7
185+"?", # 0xfaf8
186+"?", # 0xfaf9
187+"?", # 0xfafa
188+"?", # 0xfafb
189+"?", # 0xfafc
190+"?", # 0xfb40
191+"?", # 0xfb41
192+"?", # 0xfb42
193+"?", # 0xfb43
194+"?", # 0xfb44
195+"?", # 0xfb45
196+"?", # 0xfb46
197+"?", # 0xfb47
198+"?", # 0xfb48
199+"?", # 0xfb49
200+"?", # 0xfb4a
201+"?", # 0xfb4b
202+"?", # 0xfb4c
203+"?", # 0xfb4d
204+"?", # 0xfb4e
205+"?", # 0xfb4f
206+"?", # 0xfb50
207+"?", # 0xfb51
208+"?", # 0xfb52
209+"?", # 0xfb53
210+"?", # 0xfb54
211+"?", # 0xfb55
212+"?", # 0xfb56
213+"?", # 0xfb57
214+"?", # 0xfb58
215+"?", # 0xfb59
216+"?", # 0xfb5a
217+"?", # 0xfb5b
218+"?", # 0xfb5c
219+"?", # 0xfb5d
220+"?", # 0xfb5e
221+"?", # 0xfb5f
222+"?", # 0xfb60
223+"?", # 0xfb61
224+"?", # 0xfb62
225+"?", # 0xfb63
226+"?", # 0xfb64
227+"?", # 0xfb65
228+"?", # 0xfb66
229+"?", # 0xfb67
230+"?", # 0xfb68
231+"?", # 0xfb69
232+"?", # 0xfb6a
233+"?", # 0xfb6b
234+"?", # 0xfb6c
235+"?", # 0xfb6d
236+"?", # 0xfb6e
237+"?", # 0xfb6f
238+"?", # 0xfb70
239+"?", # 0xfb71
240+"?", # 0xfb72
241+"?", # 0xfb73
242+"?", # 0xfb74
243+"?", # 0xfb75
244+"?", # 0xfb76
245+"?", # 0xfb77
246+"?", # 0xfb78
247+"?", # 0xfb79
248+"?", # 0xfb7a
249+"?", # 0xfb7b
250+"?", # 0xfb7c
251+"?", # 0xfb7d
252+"?", # 0xfb7e
253+"?", # 0xfb80
254+"?", # 0xfb81
255+"?", # 0xfb82
256+"?", # 0xfb83
257+"?", # 0xfb84
258+"?", # 0xfb85
259+"?", # 0xfb86
260+"?", # 0xfb87
261+"?", # 0xfb88
262+"?", # 0xfb89
263+"?", # 0xfb8a
264+"?", # 0xfb8b
265+"?", # 0xfb8c
266+"?", # 0xfb8d
267+"?", # 0xfb8e
268+"?", # 0xfb8f
269+"?", # 0xfb90
270+"?", # 0xfb91
271+"?", # 0xfb92
272+"?", # 0xfb93
273+"?", # 0xfb94
274+"?", # 0xfb95
275+"?", # 0xfb96
276+"?", # 0xfb97
277+"?", # 0xfb98
278+"?", # 0xfb99
279+"?", # 0xfb9a
280+"?", # 0xfb9b
281+"?", # 0xfb9c
282+"?", # 0xfb9d
283+"?", # 0xfb9e
284+"?", # 0xfb9f
285+"?", # 0xfba0
286+"?", # 0xfba1
287+"?", # 0xfba2
288+"?", # 0xfba3
289+"?", # 0xfba4
290+"?", # 0xfba5
291+"?", # 0xfba6
292+"?", # 0xfba7
293+"?", # 0xfba8
294+"?", # 0xfba9
295+"?", # 0xfbaa
296+"?", # 0xfbab
297+"?", # 0xfbac
298+"?", # 0xfbad
299+"?", # 0xfbae
300+"?", # 0xfbaf
301+"?", # 0xfbb0
302+"?", # 0xfbb1
303+"?", # 0xfbb2
304+"?", # 0xfbb3
305+"?", # 0xfbb4
306+"?", # 0xfbb5
307+"?", # 0xfbb6
308+"?", # 0xfbb7
309+"?", # 0xfbb8
310+"?", # 0xfbb9
311+"?", # 0xfbba
312+"?", # 0xfbbb
313+"?", # 0xfbbc
314+"?", # 0xfbbd
315+"?", # 0xfbbe
316+"?", # 0xfbbf
317+"?", # 0xfbc0
318+"?", # 0xfbc1
319+"?", # 0xfbc2
320+"?", # 0xfbc3
321+"?", # 0xfbc4
322+"?", # 0xfbc5
323+"?", # 0xfbc6
324+"?", # 0xfbc7
325+"?", # 0xfbc8
326+"?", # 0xfbc9
327+"?", # 0xfbca
328+"?", # 0xfbcb
329+"?", # 0xfbcc
330+"?", # 0xfbcd
331+"?", # 0xfbce
332+"?", # 0xfbcf
333+"?", # 0xfbd0
334+"?", # 0xfbd1
335+"?", # 0xfbd2
336+"?", # 0xfbd3
337+"?", # 0xfbd4
338+"?", # 0xfbd5
339+"?", # 0xfbd6
340+"?", # 0xfbd7
341+"?", # 0xfbd8
342+"?", # 0xfbd9
343+"?", # 0xfbda
344+"?", # 0xfbdb
345+"?", # 0xfbdc
346+"?", # 0xfbdd
347+"?", # 0xfbde
348+"?", # 0xfbdf
349+"?", # 0xfbe0
350+"?", # 0xfbe1
351+"?", # 0xfbe2
352+"?", # 0xfbe3
353+"?", # 0xfbe4
354+"?", # 0xfbe5
355+"?", # 0xfbe6
356+"?", # 0xfbe7
357+"?", # 0xfbe8
358+"?", # 0xfbe9
359+"?", # 0xfbea
360+"?", # 0xfbeb
361+"?", # 0xfbec
362+"?", # 0xfbed
363+"?", # 0xfbee
364+"?", # 0xfbef
365+"?", # 0xfbf0
366+"?", # 0xfbf1
367+"?", # 0xfbf2
368+"?", # 0xfbf3
369+"?", # 0xfbf4
370+"?", # 0xfbf5
371+"?", # 0xfbf6
372+"?", # 0xfbf7
373+"?", # 0xfbf8
374+"?", # 0xfbf9
375+"?", # 0xfbfa
376+"?", # 0xfbfb
377+"?", # 0xfbfc
378+"?", # 0xfc40
379+"?", # 0xfc41
380+"?", # 0xfc42
381+"?", # 0xfc43
382+"?", # 0xfc44
383+"?", # 0xfc45
384+"?", # 0xfc46
385+"?", # 0xfc47
386+"?", # 0xfc48
387+"?", # 0xfc49
388+"?", # 0xfc4a
389+"?", # 0xfc4b
390+]
391+
392+import pykf
393+for c in IBM:
394+ e = pykf.toeuc(c, pykf.EUC)
395+ s = pykf.tosjis(e, pykf.SJIS)
396+ print c, s, hex(ord(c[0])<< 8 | ord(c[1])), hex(ord(s[0])<< 8 | ord(s[1]))
397+ assert c != s
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Afficher sur ancien navigateur de dépôt.