• R/O
  • SSH
  • HTTPS

pykf: Commit


Commit MetaInfo

Révision8 (tree)
l'heure2008-01-18 17:36:50
Auteurishimoto

Message de Log

Better UTF-8 detection.

Change Summary

Modification

--- trunk/setup.py (revision 7)
+++ trunk/setup.py (revision 8)
@@ -3,7 +3,7 @@
33 from distutils.core import setup, Extension
44
55 setup (name = "pykf",
6- version = "0.3.4",
6+ version = "0.3.5",
77 description = "Japanese Kanji code filter",
88 author = "Atsuo Ishimoto",
99 author_email = "ishimoto@gembook.org",
--- trunk/src/convert.h (revision 7)
+++ trunk/src/convert.h (revision 8)
@@ -10,5 +10,10 @@
1010
1111 #define isutf8_2byte(c) (0xc0<=c && c <= 0xdf)
1212 #define isutf8_3byte(c) (0xe0<=c && c <= 0xef)
13+#define isutf8_4byte(c) (0xf0<=c && c <= 0xf7)
14+#define isutf8_5byte(c) (0xf8<=c && c <= 0xfb)
15+#define isutf8_6byte(c) (0xfc<=c && c <= 0xfd)
1316 #define isutf8_trail(c) (0x80<=c && c <= 0xbf)
17+
18+#define utf8_len(c) (isutf8_2byte(c)?2:isutf8_3byte(c)?3:isutf8_4byte(c)?4:isutf8_5byte(c)?5:isutf8_6byte(c)?6:0)
1419 #define CONV_FAILED 0x222e
--- trunk/src/converter.c (revision 7)
+++ trunk/src/converter.c (revision 8)
@@ -1,1447 +1,1445 @@
1-/*********************************************************************
2-
3-Japanese Kanji filter module
4- Copyright (c) 2002, Atsuo Ishimoto. All rights reserved.
5-
6-Permission to use, copy, modify, and distribute this software and its
7-documentation for any purpose and without fee is hereby granted, provided that
8-the above copyright notice appear in all copies and that both that copyright
9-notice and this permission notice appear in supporting documentation, and that
10-the name of Atsuo Ishimoto not be used in advertising or publicity pertaining
11-to distribution of the software without specific, written prior permission.
12-
13-ATSUO ISHIMOTO DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
14-INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
15-EVENT SHALL ATSUO ISHIMOTO BE LIABLE FOR ANY SPECIAL, INDIRECT OR
16-CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
17-USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
18-OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
19-PERFORMANCE OF THIS SOFTWARE.
20-
21----------------------------------------------------------------------
22-This module is besed on kf.c written by Haruhiko Okumura.
23- Copyright (c) 1995-2000 Haruhiko Okumura
24- This file may be freely modified/redistributed.
25-
26-Original kf.c:
27- http://www.matsusaka-u.ac.jp/~okumura/kf.html
28-
29-*********************************************************************/
30-
31-#include <stdio.h>
32-#include <stdlib.h>
33-#include <memory.h>
34-#include <string.h>
35-#include <assert.h>
36-
37-#include "pykf.h"
38-
39-#if defined(_MSC_VER)
40-#define LOCAL_INLINE __inline static
41-#endif
42-
43-#if !defined(__cplusplus) && !defined(inline)
44-#ifdef __GNUC__
45-#define LOCAL_INLINE __inline static
46-#endif
47-#endif
48-
49-#if !defined(LOCAL_INLINE)
50-#define LOCAL_INLINE static
51-#endif
52-
53-#include "convert.h"
54-
55-
56-
57-
58-int guess(int imax, unsigned char buf[], int strict)
59-{
60- int i;
61- int ascii, euc, sjis, utf8, bad_euc, bad_sjis, bad_utf8;
62- int jis, hankana;
63- int sjis_error, euc_error, utf8_error;
64-
65- ascii = 1;
66- bad_euc=euc=0;
67- bad_sjis=sjis=0;
68- bad_utf8 = utf8=0;
69- jis = 0;
70- sjis_error = euc_error = utf8_error = 0;
71-
72- /* check BOM */
73- if (imax >= 2) {
74- if (buf[0] == 0xff && buf[1] == 0xfe) {
75- return UTF16_LE;
76- }
77- else if (buf[0] == 0xfe && buf[1] == 0xff) {
78- return UTF16_BE;
79- }
80- }
81- if (imax >= 3 && !memcmp(buf, "\xef\xbb\xbf", 3)) {
82- return UTF8;
83- }
84-
85- // check SJIS
86- hankana = 0;
87- for (i = 0; i < imax; i++) {
88-
89- if (buf[i] >= 0x80) {
90- ascii = 0;
91- }
92-
93- if (buf[i] == 0x1b) {
94- jis= 1;
95- }
96-
97- if (buf[i] == 0x8e ) {
98- // looks like euc.
99- if (i + 2 < imax) {
100- if (buf[i+2]==0x8e && ishankana(buf[i+1])) {
101- bad_sjis += 1;
102- }
103- }
104- }
105-
106- if (ishankana(buf[i])) {
107- sjis += 0x10/2-1;
108- hankana++;
109- }
110- else {
111- if (hankana == 1) {
112- // single halfwidth-kana is bad sign.
113- bad_sjis++;
114- }
115- hankana = 0;
116-
117- if (issjis1(buf[i])) {
118- if (i+1 >= imax) {
119- if (strict) {
120- sjis_error = 1;
121- break;
122- }
123- bad_sjis+=0x100;
124- }
125- else if (issjis2(buf[i+1])) {
126- sjis += 0x10;
127- i++;
128- }
129- else {
130- if (strict) {
131- sjis_error = 1;
132- break;
133- }
134- bad_sjis += 0x100;
135- }
136- }
137- else if (buf[i] >= 0x80) {
138- if (strict) {
139- sjis_error = 1;
140- break;
141- }
142- bad_sjis += 0x100;
143- }
144- }
145- }
146-
147- if (ascii && jis) {
148- return JIS;
149- }
150-
151- if (ascii) {
152- return ASCII;
153- }
154-
155- // check EUC-JP
156- hankana=0;
157- for (i = 0; i < imax; i++) {
158- if (buf[i] == 0x8e) {
159- if (i+1 >= imax) {
160- if (strict) {
161- euc_error = 1;
162- break;
163- }
164- bad_euc += 0x100;
165- }
166- else if (ishankana(buf[i+1])) {
167- euc+=10;
168- i++;
169- hankana++;
170- }
171- else {
172- if (strict) {
173- euc_error = 1;
174- break;
175- }
176- bad_euc += 0x100;
177- }
178- }
179- else {
180- if (hankana == 1) {
181- bad_euc++;
182- }
183- hankana = 0;
184- if (iseuc(buf[i])) {
185- if (i+1 >= imax) {
186- if (strict) {
187- euc_error = 1;
188- break;
189- }
190- bad_euc+=0x100;
191- }
192- else if (iseuc(buf[i+1])) {
193- i++;
194- euc+=0x10;
195- }
196- else {
197- if (strict) {
198- euc_error = 1;
199- break;
200- }
201- bad_euc+=0x100;
202- }
203- }
204- else if (buf[i] == 0x8f) {
205- if (i+2 >= imax) {
206- if (strict) {
207- euc_error = 1;
208- break;
209- }
210- bad_euc+=0x100;
211- }
212- else if (iseuc(buf[i+1]) && iseuc(buf[i+2])) {
213- i+=2;
214- euc+=0x10;
215- }
216- else {
217- if (strict) {
218- euc_error = 1;
219- break;
220- }
221- bad_euc+=100;
222- }
223- }
224- else if (buf[i] >= 0x80) {
225- if (strict) {
226- euc_error = 1;
227- break;
228- }
229- bad_euc+=0x100;
230- }
231- }
232- }
233-
234- // check UTF-8
235- for (i = 0; i < imax; i++) {
236- if (isutf8_2byte(buf[i])) {
237- if (i+1 >= imax) {
238- if (strict) {
239- utf8_error = 1;
240- break;
241- }
242- bad_utf8 += 1000;
243- }
244- else if (isutf8_trail(buf[i+1])) {
245- utf8+=10;
246- i++;
247- }
248- else {
249- if (strict) {
250- utf8_error = 1;
251- break;
252- }
253- bad_utf8+=100;
254- }
255- }
256- else if (isutf8_3byte(buf[i])) {
257- if (i+2 >= imax) {
258- if (strict) {
259- utf8_error = 1;
260- break;
261- }
262- bad_utf8 += 1000;
263- }
264- else if (isutf8_trail(buf[i+1]) && isutf8_trail(buf[i+2])) {
265- utf8+=15;
266- i+=2;
267- }
268- else {
269- if (strict) {
270- utf8_error = 1;
271- break;
272- }
273- bad_utf8+=1000;
274- }
275- } else if (buf[i] >= 0x80) {
276- if (strict) {
277- utf8_error = 1;
278- break;
279- }
280- bad_utf8 += 1000;
281- }
282- }
283-
284- if (sjis_error && euc_error && utf8_error) {
285- return ERROR;
286- }
287-
288- if (sjis_error) {
289- if (euc_error) {
290- return UTF8;
291- }
292- if (utf8_error) {
293- return EUC;
294- }
295- if (euc-bad_euc > utf8-bad_utf8)
296- return EUC;
297- else if (euc-bad_euc < utf8-bad_utf8)
298- return UTF8;
299- }
300-
301- if (euc_error) {
302- if (sjis_error) {
303- return UTF8;
304- }
305- if (utf8_error) {
306- return SJIS;
307- }
308- if (sjis-bad_sjis > utf8-bad_utf8)
309- return SJIS;
310- else if (sjis-bad_sjis < utf8-bad_utf8)
311- return UTF8;
312- }
313-
314- if (utf8_error) {
315- if (sjis_error) {
316- return EUC;
317- }
318- if (euc_error) {
319- return SJIS;
320- }
321- if (sjis-bad_sjis > euc-bad_euc)
322- return SJIS;
323- else
324- return EUC;
325- }
326-
327- if (sjis-bad_sjis > euc-bad_euc) {
328- if (sjis-bad_sjis > utf8-bad_utf8)
329- return SJIS;
330- else if (sjis-bad_sjis < utf8-bad_utf8)
331- return UTF8;
332- }
333-
334- if (sjis-bad_sjis < euc-bad_euc) {
335- if (euc-bad_euc > utf8-bad_utf8)
336- return EUC;
337- else if (euc-bad_euc < utf8-bad_utf8)
338- return UTF8;
339- }
340- return UNKNOWN;
341-}
342-
343-LOCAL_INLINE
344-void jis_to_sjis2(unsigned char *ph, unsigned char *pl);
345-
346-LOCAL_INLINE
347-int isjis0213(unsigned char h, unsigned char l) {
348- int *p;
349- int jis = (h << 8 | l) & 0xffff;
350-
351- for (p=tbl_jis0213; *(p+2) < jis; p+=2);
352-
353- if (*p <= jis && (jis < (p[0] + p[1]))) {
354- return 1;
355- }
356- else {
357- return 0;
358- }
359-}
360-
361-
362-LOCAL_INLINE
363-int mskanji_to_jis(unsigned char *ph, unsigned char *pl) {
364- int *p;
365- int sjis = (*ph << 8 | *pl) & 0xffff;
366-
367- if (isgaiji1(*ph)) {
368- *ph = (CONV_FAILED >> 8) & 0xff;
369- *pl = CONV_FAILED & 0xff;
370- return 1;
371- }
372-
373- for (p=tbl_sjis2jis; *p < sjis; p+=2);
374-
375- if (*p == sjis) {
376- *ph = (*(p+1)) >> 8;
377- *pl = (*(p+1)) & 0xff;
378- return 1;
379- }
380- return 0;
381-}
382-
383-LOCAL_INLINE
384-void sjis_to_jis(unsigned char *ph, unsigned char *pl)
385-{
386- if (*ph <= 0x9f) {
387- if (*pl < 0x9f)
388- *ph = (*ph << 1) - 0xe1;
389- else
390- *ph = (*ph << 1) - 0xe0;
391- } else {
392- if (*pl < 0x9f)
393- *ph = (*ph << 1) - 0x161;
394- else
395- *ph = (*ph << 1) - 0x160;
396- }
397- if (*pl < 0x7f)
398- *pl -= 0x1f;
399- else if (*pl < 0x9f)
400- *pl -= 0x20;
401- else
402- *pl -= 0x7e;
403-}
404-
405-LOCAL_INLINE
406-void sjis_to_jis2(unsigned char *ph, unsigned char *pl)
407-{
408- if (mskanji_to_jis(ph, pl))
409- return;
410- else
411- sjis_to_jis(ph, pl);
412-}
413-
414-
415-LOCAL_INLINE
416-void jis_to_sjis(unsigned char *ph, unsigned char *pl)
417-{
418- if (*ph & 1) {
419- if (*pl < 0x60)
420- *pl += 0x1f;
421- else
422- *pl += 0x20;
423- } else
424- *pl += 0x7e;
425-
426- if (*ph < 0x5f)
427- *ph = (*ph + 0xe1) >> 1;
428- else
429- *ph = (*ph + 0x161) >> 1;
430-}
431-
432-
433-LOCAL_INLINE
434-int jis_to_mskanji(unsigned char *ph, unsigned char *pl) {
435- int *p;
436- int jis = (*ph << 8 | *pl) & 0xffff;
437-
438- for (p=tbl_jis2sjis; *p < jis; p+=2);
439-
440- if (*p == jis) {
441- *ph = (*(p+1)) >> 8;
442- *pl = (*(p+1)) & 0xff;
443- return 1;
444- }
445- return 0;
446-}
447-
448-
449-
450-LOCAL_INLINE
451-void jis_to_sjis2(unsigned char *ph, unsigned char *pl)
452-{
453- if (jis_to_mskanji(ph, pl))
454- return;
455- else
456- jis_to_sjis(ph, pl);
457-}
458-
459-
460-
461-
462-
463-int sjistojis(int len, unsigned char *buf, unsigned char **ret, int *retlen, int j0208)
464-{
465- unsigned char c, d;
466- int pos, tmplen, retpos=0;
467- char tmp[10];
468- char *newbuf;
469- enum {NORMAL, KANJI, HANKANA, JIS0213} mode = NORMAL;
470-
471- if (!len) {
472- *retlen = 0;
473- return 1;
474- }
475-
476- *retlen = len;
477- *ret = malloc(*retlen);
478- if (!*ret) {
479- return 0;
480- }
481-
482-
483- for (pos = 0; pos < len; pos++) {
484- tmplen = 0;
485- if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) {
486- c = buf[pos];
487- d = buf[pos+1];
488- pos += 1;
489- sjis_to_jis2(&c, &d);
490-
491- if (j0208 || !isjis0213(c, d)) {
492- if (mode != KANJI) {
493- mode = KANJI;
494- tmp[tmplen++] = 0x1b;
495- tmp[tmplen++] = '$';
496- tmp[tmplen++] = 'B';
497- }
498- }
499- else {
500- if (mode != JIS0213) {
501- mode = JIS0213;
502- tmp[tmplen++] = 0x1b;
503- tmp[tmplen++] = '$';
504- tmp[tmplen++] = '(';
505- tmp[tmplen++] = 'O';
506- }
507- }
508- tmp[tmplen++] = c;
509- tmp[tmplen++] = d;
510- } else if (ishankana(buf[pos])) {
511- if (mode != HANKANA) {
512- mode = HANKANA;
513- tmp[tmplen++] = 0x1b;
514- tmp[tmplen++] = '(';
515- tmp[tmplen++] = 'I';
516- }
517- tmp[tmplen++] = buf[pos] & 0x7f;
518- } else {
519- if (mode != NORMAL) {
520- mode = NORMAL;
521- tmp[tmplen++] = 0x1b;
522- tmp[tmplen++] = '(';
523- tmp[tmplen++] = 'B';
524- }
525- tmp[tmplen++] = buf[pos];
526- }
527-
528- if (tmplen) {
529- if (retpos + tmplen > *retlen) {
530- *retlen = *retlen + len / 2 + 16;
531- newbuf = realloc(*ret, *retlen);
532- if (!newbuf) {
533- free(*ret);
534- return 0;
535- }
536- *ret = newbuf;
537- }
538- memcpy(*ret+retpos, tmp, tmplen);
539- retpos += tmplen;
540- }
541- }
542-
543- if (!retpos) {
544- *retlen = 0;
545- free(*ret);
546- return 1;
547- }
548-
549- if (mode != NORMAL) {
550- if (retpos + 3 > *retlen) {
551- *retlen = retpos + 3;
552- newbuf = realloc(*ret, *retlen);
553- if (!newbuf) {
554- free(*ret);
555- return 0;
556- }
557- *ret = newbuf;
558- }
559- *(*ret + retpos) = 0x1b;
560- *(*ret + retpos+1) = '(';
561- *(*ret + retpos+2) = 'B';
562- retpos += 3;
563- }
564-
565- newbuf = realloc(*ret, retpos);
566- if (!newbuf) {
567- free(*ret);
568- return 0;
569- }
570- *ret = newbuf;
571- *retlen = retpos;
572- return 1;
573-}
574-
575-int euctojis(int len, unsigned char *buf, unsigned char **ret, int *retlen, int j0208)
576-{
577- unsigned char c, d;
578- int pos, tmplen, retpos=0;
579- char tmp[10];
580- char *newbuf;
581- enum {NORMAL, KANJI, HANKANA, JIS0213} mode = NORMAL;
582-
583- if (!len) {
584- *retlen = 0;
585- return 1;
586- }
587-
588- *retlen = len;
589- *ret = malloc(*retlen);
590- if (!*ret) {
591- return 0;
592- }
593-
594- for (pos = 0; pos < len; pos++) {
595- tmplen = 0;
596- if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) {
597- c = buf[pos] & 0x7f;
598- d = buf[pos+1] & 0x7f;
599- pos += 1;
600-
601- if (j0208 || !isjis0213(c, d)) {
602- if (mode != KANJI) {
603- mode = KANJI;
604- tmp[tmplen++] = 0x1b;
605- tmp[tmplen++] = '$';
606- tmp[tmplen++] = 'B';
607- }
608- }
609- else {
610- if (mode != JIS0213) {
611- mode = JIS0213;
612- tmp[tmplen++] = 0x1b;
613- tmp[tmplen++] = '$';
614- tmp[tmplen++] = '(';
615- tmp[tmplen++] = 'O';
616- }
617- }
618- tmp[tmplen++] = c;
619- tmp[tmplen++] = d;
620- } else if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) {
621-
622-
623- if (mode != HANKANA) {
624- mode = HANKANA;
625- tmp[tmplen++] = 0x1b;
626- tmp[tmplen++] = '(';
627- tmp[tmplen++] = 'I';
628- }
629- tmp[tmplen++] = buf[pos+1] & 0x7f;
630- pos += 1;
631-
632- } else {
633- if (mode != NORMAL) {
634- mode = NORMAL;
635- tmp[tmplen++] = 0x1b;
636- tmp[tmplen++] = '(';
637- tmp[tmplen++] = 'B';
638- }
639- tmp[tmplen++] = buf[pos];
640- }
641-
642- if (tmplen) {
643- if (retpos + tmplen > *retlen) {
644- *retlen = *retlen + len / 2 + 16;
645- newbuf = realloc(*ret, *retlen);
646- if (!newbuf) {
647- free(*ret);
648- return 0;
649- }
650- *ret = newbuf;
651- }
652- memcpy(*ret+retpos, tmp, tmplen);
653- retpos += tmplen;
654- }
655- }
656-
657- if (!retpos) {
658- *retlen = 0;
659- free(*ret);
660- return 1;
661- }
662-
663- if (mode != NORMAL) {
664- if (retpos + 3 > *retlen) {
665- *retlen = retpos + 3;
666- newbuf = realloc(*ret, *retlen);
667- if (!newbuf) {
668- free(*ret);
669- return 0;
670- }
671- *ret = newbuf;
672- }
673- *(*ret + retpos) = 0x1b;
674- *(*ret + retpos+1) = '(';
675- *(*ret + retpos+2) = 'B';
676- retpos += 3;
677- }
678-
679- newbuf = realloc(*ret, retpos);
680- if (!newbuf) {
681- free(*ret);
682- return 0;
683- }
684- *ret = newbuf;
685- *retlen = retpos;
686- return 1;
687-}
688-
689-
690-int sjistoeuc(int len, unsigned char *buf, unsigned char **ret, int *retlen)
691-{
692- unsigned char c, d;
693- int pos, tmplen, retpos=0;
694- char tmp[10];
695- char *newbuf;
696-
697- if (!len) {
698- *retlen = 0;
699- return 1;
700- }
701-
702- *retlen = len;
703- *ret = malloc(*retlen);
704- if (!*ret) {
705- return 0;
706- }
707-
708- for (pos = 0; pos < len; pos++) {
709- tmplen=0;
710-
711- if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) {
712- c = buf[pos];
713- d = buf[pos+1];
714- pos += 1;
715-
716- sjis_to_jis2(&c, &d);
717- tmp[tmplen++] = c | 0x80;
718- tmp[tmplen++] = d | 0x80;
719- } else if (ishankana(buf[pos])) {
720- tmp[tmplen++] = '\x8e';
721- tmp[tmplen++] = buf[pos];
722- } else {
723- tmp[tmplen++] = buf[pos];
724- }
725-
726- if (tmplen) {
727- if (retpos + tmplen > *retlen) {
728- *retlen = *retlen + len / 2 + 16;
729- newbuf = realloc(*ret, *retlen);
730- if (!newbuf) {
731- free(*ret);
732- return 0;
733- }
734- *ret = newbuf;
735- }
736- memcpy(*ret+retpos, tmp, tmplen);
737- retpos += tmplen;
738- }
739- }
740-
741- if (!retpos) {
742- *retlen = 0;
743- free(*ret);
744- return 1;
745- }
746-
747- newbuf = realloc(*ret, retpos);
748- if (!newbuf) {
749- free(*ret);
750- return 0;
751- }
752- *ret = newbuf;
753- *retlen = retpos;
754-
755- return 1;
756-}
757-
758-int jistoeuc(int len, unsigned char *buf, unsigned char **ret, int *retlen)
759-{
760- int pos, tmplen, retpos=0;
761- char tmp[10];
762- char *newbuf;
763-
764- enum {NORMAL, KANJI, HANKANA} mode = NORMAL;
765-
766- if (!len) {
767- *retlen = 0;
768- return 1;
769- }
770-
771- *retlen = len;
772- *ret = malloc(*retlen);
773- if (!*ret) {
774- return 0;
775- }
776-
777- for (pos = 0; pos < len; pos++) {
778- tmplen=0;
779-
780- if ((pos + 2 < len) &&
781- (!memcmp(buf+pos, "\x1b$@", 3) ||
782- !memcmp(buf+pos, "\x1b$B", 3))) {
783-
784- mode = KANJI;
785- pos += 2;
786- }
787- else if ((pos + 3 < len) && !memcmp(buf+pos, "\x1b$(O", 4)) {
788- mode = KANJI;
789- pos += 3;
790- }
791- else if ((pos + 2 < len) &&
792- (!memcmp(buf+pos, "\x1b(B", 3) ||
793- !memcmp(buf+pos, "\x1b(J", 3))) {
794-
795- mode = NORMAL;
796- pos += 2;
797- }
798- else if ((pos + 2 < len) && !memcmp(buf+pos, "\x1b(I", 3)) {
799- mode = HANKANA;
800- pos += 2;
801- }
802- else if (buf[pos] == '\x0e') {
803- mode = HANKANA;
804- }
805- else if (buf[pos] == '\x0f') {
806- mode = NORMAL;
807- }
808- else if (mode == KANJI && isjis(buf[pos]) && (pos+1 < len) && isjis(buf[pos+1])) {
809- tmp[tmplen++] = buf[pos] | 0x80;
810- tmp[tmplen++] = buf[pos+1] | 0x80;
811- pos++;
812- } else if (mode == HANKANA && buf[pos] >= 0x20 && buf[pos] <= 0x5f) {
813- tmp[tmplen++] = '\x8e';
814- tmp[tmplen++] = buf[pos] | 0x80;
815- } else {
816- tmp[tmplen++] = buf[pos];
817- }
818-
819- if (tmplen) {
820- if (retpos + tmplen > *retlen) {
821- *retlen = *retlen + len / 2 + 16;
822- newbuf = realloc(*ret, *retlen);
823- if (!newbuf) {
824- free(*ret);
825- return 0;
826- }
827- *ret = newbuf;
828- }
829- memcpy(*ret+retpos, tmp, tmplen);
830- retpos += tmplen;
831- }
832- }
833-
834- if (!retpos) {
835- *retlen = 0;
836- free(*ret);
837- return 1;
838- }
839-
840- newbuf = realloc(*ret, retpos);
841- if (!newbuf) {
842- free(*ret);
843- return 0;
844- }
845- *ret = newbuf;
846- *retlen = retpos;
847- return 1;
848-}
849-
850-
851-int jistosjis(int len, unsigned char *buf, unsigned char **ret, int *retlen)
852-{
853- unsigned char c, d;
854- int pos, tmplen, retpos=0;
855- char tmp[10];
856- char *newbuf;
857-
858- enum {NORMAL, KANJI, HANKANA} mode = NORMAL;
859-
860- if (!len) {
861- *retlen = 0;
862- return 1;
863- }
864-
865- *retlen = len;
866- *ret = malloc(*retlen);
867- if (!*ret) {
868- return 0;
869- }
870-
871- for (pos = 0; pos < len; pos++) {
872- tmplen=0;
873-
874- if ((pos + 2 < len) &&
875- (!memcmp(buf+pos, "\x1b$@", 3) ||
876- !memcmp(buf+pos, "\x1b$B", 3))) {
877-
878- mode = KANJI;
879- pos += 2;
880- }
881- else if ((pos + 3 < len) && !memcmp(buf+pos, "\x1b$(O", 4)) {
882- mode = KANJI;
883- pos += 3;
884- }
885- else if ((pos + 2 < len) &&
886- (!memcmp(buf+pos, "\x1b(B", 3) ||
887- !memcmp(buf+pos, "\x1b(J", 3))) {
888-
889- mode = NORMAL;
890- pos += 2;
891- }
892- else if ((pos + 2 < len) && !memcmp(buf+pos, "\x1b(I", 3)) {
893- mode = HANKANA;
894- pos += 2;
895- }
896- else if (buf[pos] == '\x0e') {
897- mode = HANKANA;
898- }
899- else if (buf[pos] == '\x0f') {
900- mode = NORMAL;
901- }
902- else if (mode == KANJI && isjis(buf[pos]) && (pos+1 < len) && isjis(buf[pos+1])) {
903- c = buf[pos];
904- d = buf[pos+1];
905- pos++;
906-
907- jis_to_sjis2(&c, &d);
908- tmp[tmplen++] = c;
909- tmp[tmplen++] = d;
910- } else if (mode == HANKANA && buf[pos] >= 0x20 && buf[pos] <= 0x5f) {
911- tmp[tmplen++] = buf[pos] | 0x80;
912- } else {
913- tmp[tmplen++] = buf[pos];
914- }
915-
916- if (tmplen) {
917- if (retpos + tmplen > *retlen) {
918- *retlen = *retlen + len / 2 + 16;
919- newbuf = realloc(*ret, *retlen);
920- if (!newbuf) {
921- free(*ret);
922- return 0;
923- }
924- *ret = newbuf;
925- }
926- memcpy(*ret+retpos, tmp, tmplen);
927- retpos += tmplen;
928- }
929- }
930-
931- if (!retpos) {
932- *retlen = 0;
933- free(*ret);
934- return 1;
935- }
936-
937- newbuf = realloc(*ret, retpos);
938- if (!newbuf) {
939- free(*ret);
940- return 0;
941- }
942- *ret = newbuf;
943- *retlen = retpos;
944- return 1;
945-}
946-
947-int euctosjis(int len, unsigned char *buf, unsigned char **ret, int *retlen)
948-{
949- unsigned char c, d;
950- int pos, tmplen, retpos=0;
951- char tmp[10];
952- char *newbuf;
953-
954- if (!len) {
955- *retlen = 0;
956- return 1;
957- }
958-
959- *retlen = len;
960- *ret = malloc(*retlen);
961- if (!*ret) {
962- return 0;
963- }
964-
965- for (pos = 0; pos < len; pos++) {
966- tmplen=0;
967-
968- if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) {
969- c = buf[pos] & 0x7f;
970- d = buf[pos+1] & 0x7f;
971- pos += 1;
972-
973- jis_to_sjis2(&c, &d);
974- tmp[tmplen++] = c;
975- tmp[tmplen++] = d;
976- } else if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) {
977- tmp[tmplen++] = buf[pos+1];
978- pos++;
979- } else {
980- tmp[tmplen++] = buf[pos];
981- }
982-
983- if (tmplen) {
984- if (retpos + tmplen > *retlen) {
985- *retlen = *retlen + len / 2 + 16;
986- newbuf = realloc(*ret, *retlen);
987- if (!newbuf) {
988- free(*ret);
989- return 0;
990- }
991- *ret = newbuf;
992- }
993- memcpy(*ret+retpos, tmp, tmplen);
994- retpos += tmplen;
995- }
996- }
997-
998- if (!retpos) {
999- *retlen = 0;
1000- free(*ret);
1001- return 1;
1002- }
1003-
1004- newbuf = realloc(*ret, retpos);
1005- if (!newbuf) {
1006- free(*ret);
1007- return 0;
1008- }
1009- *ret = newbuf;
1010- *retlen = retpos;
1011- return 1;
1012-}
1013-
1014-static const unsigned char *h_kana[] = {
1015-"\xdd", "\xdc", "\xdb", "\xda", "\xd9", "\xd8", "\xd7", "\xd6", "\xd5", "\xd4",
1016-"\xd3", "\xd2", "\xd1", "\xd0", "\xcf", "\xce\xdf", "\xce\xde", "\xce", "\xcd\xdf",
1017-"\xcd\xde", "\xcd", "\xcc\xdf", "\xcc\xde", "\xcc", "\xcb\xdf", "\xcb\xde",
1018-"\xcb", "\xca\xdf", "\xca\xde", "\xca", "\xc9", "\xc8", "\xc7", "\xc6", "\xc5",
1019-"\xc4\xde", "\xc4", "\xc3\xde", "\xc3", "\xc2\xde", "\xc2", "\xc1\xde", "\xc1",
1020-"\xc0\xde", "\xc0", "\xbf\xde", "\xbf", "\xbe\xde", "\xbe", "\xbd\xde", "\xbd",
1021-"\xbc\xde", "\xbc", "\xbb\xde", "\xbb", "\xba\xde", "\xba", "\xb9\xde", "\xb9",
1022-"\xb8\xde", "\xb8", "\xb7\xde", "\xb7", "\xb6\xde", "\xb6", "\xb5", "\xb4", "\xb3\xde",
1023-"\xb3", "\xb2", "\xb1", "\xb0", "\xaf", "\xae", "\xad", "\xac", "\xab",
1024-"\xaa", "\xa9", "\xa8", "\xa7", "\xa6", "\xa5", "\xa4", "\xa3", "\xa2", "\xa1", NULL};
1025-
1026-static const unsigned char *euc_h_kana[] = {
1027-"\x8e\xdd", "\x8e\xdc", "\x8e\xdb", "\x8e\xda", "\x8e\xd9", "\x8e\xd8", "\x8e\xd7", "\x8e\xd6", "\x8e\xd5", "\x8e\xd4",
1028-"\x8e\xd3", "\x8e\xd2", "\x8e\xd1", "\x8e\xd0", "\x8e\xcf", "\x8e\xce\x8e\xdf", "\x8e\xce\x8e\xde", "\x8e\xce", "\x8e\xcd\x8e\xdf",
1029-"\x8e\xcd\x8e\xde", "\x8e\xcd", "\x8e\xcc\x8e\xdf", "\x8e\xcc\x8e\xde", "\x8e\xcc", "\x8e\xcb\x8e\xdf", "\x8e\xcb\x8e\xde",
1030-"\x8e\xcb", "\x8e\xca\x8e\xdf", "\x8e\xca\x8e\xde", "\x8e\xca", "\x8e\xc9", "\x8e\xc8", "\x8e\xc7", "\x8e\xc6", "\x8e\xc5",
1031-"\x8e\xc4\x8e\xde", "\x8e\xc4", "\x8e\xc3\x8e\xde", "\x8e\xc3", "\x8e\xc2\x8e\xde", "\x8e\xc2", "\x8e\xc1\x8e\xde", "\x8e\xc1",
1032-"\x8e\xc0\x8e\xde", "\x8e\xc0", "\x8e\xbf\x8e\xde", "\x8e\xbf", "\x8e\xbe\x8e\xde", "\x8e\xbe", "\x8e\xbd\x8e\xde", "\x8e\xbd",
1033-"\x8e\xbc\x8e\xde", "\x8e\xbc", "\x8e\xbb\x8e\xde", "\x8e\xbb", "\x8e\xba\x8e\xde", "\x8e\xba", "\x8e\xb9\x8e\xde", "\x8e\xb9",
1034-"\x8e\xb8\x8e\xde", "\x8e\xb8", "\x8e\xb7\x8e\xde", "\x8e\xb7", "\x8e\xb6\x8e\xde", "\x8e\xb6", "\x8e\xb5", "\x8e\xb4", "\x8e\xb3\x8e\xde",
1035-"\x8e\xb3", "\x8e\xb2", "\x8e\xb1", "\x8e\xb0", "\x8e\xaf", "\x8e\xae", "\x8e\xad", "\x8e\xac", "\x8e\xab",
1036-"\x8e\xaa", "\x8e\xa9", "\x8e\xa8", "\x8e\xa7", "\x8e\xa6", "\x8e\xa5", "\x8e\xa4", "\x8e\xa3", "\x8e\xa2", "\x8e\xa1", NULL};
1037-
1038-static const unsigned char *sjis_f_kana[] = {
1039- "\x83\x93", "\x83\x8f", "\x83\x8d", "\x83\x8c", "\x83\x8b", "\x83\x8a",
1040- "\x83\x89", "\x83\x88", "\x83\x86", "\x83\x84", "\x83\x82", "\x83\x81",
1041- "\x83\x80", "\x83\x7e", "\x83\x7d", "\x83\x7c", "\x83\x7b", "\x83\x7a",
1042- "\x83\x79", "\x83\x78", "\x83\x77", "\x83\x76", "\x83\x75", "\x83\x74",
1043- "\x83\x73", "\x83\x72", "\x83\x71", "\x83\x70", "\x83\x6f", "\x83\x6e",
1044- "\x83\x6d", "\x83\x6c", "\x83\x6b", "\x83\x6a", "\x83\x69", "\x83\x68",
1045- "\x83\x67", "\x83\x66", "\x83\x65", "\x83\x64", "\x83\x63", "\x83\x61",
1046- "\x83\x60", "\x83\x5f", "\x83\x5e", "\x83\x5d", "\x83\x5c", "\x83\x5b",
1047- "\x83\x5a", "\x83\x59", "\x83\x58", "\x83\x57", "\x83\x56", "\x83\x55",
1048- "\x83\x54", "\x83\x53", "\x83\x52", "\x83\x51", "\x83\x50", "\x83\x4f",
1049- "\x83\x4e", "\x83\x4d", "\x83\x4c", "\x83\x4b", "\x83\x4a", "\x83\x49",
1050- "\x83\x47", "\x83\x94", "\x83\x45", "\x83\x43", "\x83\x41", "\x81\x5b",
1051- "\x83\x62", "\x83\x87", "\x83\x85", "\x83\x83", "\x83\x48", "\x83\x46",
1052- "\x83\x44", "\x83\x42", "\x83\x40", "\x83\x92", "\x81\x45", "\x81\x41",
1053- "\x81\x76", "\x81\x75", "\x81\x42", NULL};
1054-
1055-
1056-static const unsigned char *euc_f_kana[] = {
1057- "\xa5\xf3", "\xa5\xef", "\xa5\xed", "\xa5\xec", "\xa5\xeb", "\xa5\xea",
1058- "\xa5\xe9", "\xa5\xe8", "\xa5\xe6", "\xa5\xe4", "\xa5\xe2", "\xa5\xe1",
1059- "\xa5\xe0", "\xa5\xdf", "\xa5\xde", "\xa5\xdd", "\xa5\xdc", "\xa5\xdb",
1060- "\xa5\xda", "\xa5\xd9", "\xa5\xd8", "\xa5\xd7", "\xa5\xd6", "\xa5\xd5",
1061- "\xa5\xd4", "\xa5\xd3", "\xa5\xd2", "\xa5\xd1", "\xa5\xd0", "\xa5\xcf",
1062- "\xa5\xce", "\xa5\xcd", "\xa5\xcc", "\xa5\xcb", "\xa5\xca", "\xa5\xc9",
1063- "\xa5\xc8", "\xa5\xc7", "\xa5\xc6", "\xa5\xc5", "\xa5\xc4", "\xa5\xc2",
1064- "\xa5\xc1", "\xa5\xc0", "\xa5\xbf", "\xa5\xbe", "\xa5\xbd", "\xa5\xbc",
1065- "\xa5\xbb", "\xa5\xba", "\xa5\xb9", "\xa5\xb8", "\xa5\xb7", "\xa5\xb6",
1066- "\xa5\xb5", "\xa5\xb4", "\xa5\xb3", "\xa5\xb2", "\xa5\xb1", "\xa5\xb0",
1067- "\xa5\xaf", "\xa5\xae", "\xa5\xad", "\xa5\xac", "\xa5\xab", "\xa5\xaa",
1068- "\xa5\xa8", "\xa5\xf4", "\xa5\xa6", "\xa5\xa4", "\xa5\xa2", "\xa1\xbc",
1069- "\xa5\xc3", "\xa5\xe7", "\xa5\xe5", "\xa5\xe3", "\xa5\xa9", "\xa5\xa7",
1070- "\xa5\xa5", "\xa5\xa3", "\xa5\xa1", "\xa5\xf2", "\xa1\xa6", "\xa1\xa2",
1071- "\xa1\xd7", "\xa1\xd6", "\xa1\xa3", NULL};
1072-
1073-
1074-int sjistohankana(int len, unsigned char *buf, unsigned char **ret, int *retlen) {
1075- int pos, tmplen, retpos=0;
1076- char tmp[10];
1077- char *newbuf;
1078- int i;
1079-
1080- if (!len) {
1081- *retlen = 0;
1082- return 1;
1083- }
1084-
1085- *retlen = len;
1086- *ret = malloc(*retlen);
1087- if (!*ret) {
1088- return 0;
1089- }
1090-
1091- for (pos = 0; pos < len; pos++) {
1092- tmplen=0;
1093-
1094- if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) {
1095- for (i = 0; sjis_f_kana[i]; i++) {
1096- if (buf[pos] == sjis_f_kana[i][0] && buf[pos+1] == sjis_f_kana[i][1]) {
1097- tmp[tmplen++] = h_kana[i][0];
1098- if (h_kana[i][1]) {
1099- tmp[tmplen++] = h_kana[i][1];
1100- }
1101- break;
1102- }
1103- }
1104- if (!sjis_f_kana[i]) {
1105- tmp[tmplen++] = buf[pos];
1106- tmp[tmplen++] = buf[pos+1];
1107- }
1108-
1109- pos++;
1110- } else {
1111- tmp[tmplen++] = buf[pos];
1112- }
1113-
1114- if (tmplen) {
1115- if (retpos + tmplen > *retlen) {
1116- *retlen = *retlen + len / 2 + 16;
1117- newbuf = realloc(*ret, *retlen);
1118- if (!newbuf) {
1119- free(*ret);
1120- return 0;
1121- }
1122- *ret = newbuf;
1123- }
1124- memcpy(*ret+retpos, tmp, tmplen);
1125- retpos += tmplen;
1126- }
1127- }
1128-
1129- if (!retpos) {
1130- *retlen = 0;
1131- free(*ret);
1132- return 1;
1133- }
1134-
1135- newbuf = realloc(*ret, retpos);
1136- if (!newbuf) {
1137- free(*ret);
1138- return 0;
1139- }
1140- *ret = newbuf;
1141- *retlen = retpos;
1142-
1143- return 1;
1144-}
1145-
1146-
1147-int sjistofullkana(int len, unsigned char *buf, unsigned char **ret, int *retlen) {
1148- int pos, tmplen, retpos=0;
1149- char tmp[10];
1150- char *newbuf;
1151- int i, j;
1152-
1153- if (!len) {
1154- *retlen = 0;
1155- return 1;
1156- }
1157-
1158- *retlen = len;
1159- *ret = malloc(*retlen);
1160- if (!*ret) {
1161- return 0;
1162- }
1163-
1164- for (pos = 0; pos < len; pos++) {
1165- tmplen=0;
1166-
1167- if (ishankana(buf[pos])) {
1168- for (i = 0; h_kana[i]; i++) {
1169- for (j = 0; h_kana[i][j] && buf[pos+j]; j++) {
1170- if (h_kana[i][j] != buf[pos+j]) {
1171- break;
1172- }
1173- }
1174- if (!h_kana[i][j]) {
1175- const char *p;
1176- for (p = sjis_f_kana[i]; *p; p++) {
1177- tmp[tmplen++] = *p;
1178- }
1179- pos += j-1;
1180- break;
1181- }
1182- }
1183-
1184- if (!h_kana[i]) {
1185- tmp[tmplen++] = buf[pos];
1186- }
1187- }
1188- else if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) {
1189- tmp[tmplen++] = buf[pos];
1190- tmp[tmplen++] = buf[pos+1];
1191- pos += 1;
1192- } else {
1193- tmp[tmplen++] = buf[pos];
1194- }
1195-
1196- if (tmplen) {
1197- if (retpos + tmplen > *retlen) {
1198- *retlen = *retlen + len / 2 + 16;
1199- newbuf = realloc(*ret, *retlen);
1200- if (!newbuf) {
1201- free(*ret);
1202- return 0;
1203- }
1204- *ret = newbuf;
1205- }
1206- memcpy(*ret+retpos, tmp, tmplen);
1207- retpos += tmplen;
1208- }
1209- }
1210-
1211- if (!retpos) {
1212- *retlen = 0;
1213- free(*ret);
1214- return 1;
1215- }
1216-
1217- newbuf = realloc(*ret, retpos);
1218- if (!newbuf) {
1219- free(*ret);
1220- return 0;
1221- }
1222- *ret = newbuf;
1223- *retlen = retpos;
1224-
1225- return 1;
1226-}
1227-
1228-int euctohankana(int len, unsigned char *buf, unsigned char **ret, int *retlen) {
1229- int pos, tmplen, retpos=0;
1230- char tmp[10];
1231- char *newbuf;
1232- int i;
1233-
1234- if (!len) {
1235- *retlen = 0;
1236- return 1;
1237- }
1238-
1239- *retlen = len;
1240- *ret = malloc(*retlen);
1241- if (!*ret) {
1242- return 0;
1243- }
1244-
1245- for (pos = 0; pos < len; pos++) {
1246- tmplen=0;
1247-
1248- if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) {
1249- for (i = 0; euc_f_kana[i]; i++) {
1250- if (buf[pos] == euc_f_kana[i][0] && buf[pos+1] == euc_f_kana[i][1]) {
1251- tmp[tmplen++] = '\x8e';
1252- tmp[tmplen++] = h_kana[i][0];
1253- if (h_kana[i][1]) {
1254- tmp[tmplen++] = '\x8e';
1255- tmp[tmplen++] = h_kana[i][1];
1256- }
1257- break;
1258- }
1259- }
1260- if (!euc_f_kana[i]) {
1261- tmp[tmplen++] = buf[pos];
1262- tmp[tmplen++] = buf[pos+1];
1263- }
1264- pos++;
1265- }
1266- else if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) {
1267- tmp[tmplen++] = buf[pos];
1268- tmp[tmplen++] = buf[pos+1];
1269- pos++;
1270- } else {
1271- tmp[tmplen++] = buf[pos];
1272- }
1273-
1274- if (tmplen) {
1275- if (retpos + tmplen > *retlen) {
1276- *retlen = *retlen + len / 2 + 16;
1277- newbuf = realloc(*ret, *retlen);
1278- if (!newbuf) {
1279- free(*ret);
1280- return 0;
1281- }
1282- *ret = newbuf;
1283- }
1284- memcpy(*ret+retpos, tmp, tmplen);
1285- retpos += tmplen;
1286- }
1287- }
1288-
1289- if (!retpos) {
1290- *retlen = 0;
1291- free(*ret);
1292- return 1;
1293- }
1294-
1295- newbuf = realloc(*ret, retpos);
1296- if (!newbuf) {
1297- free(*ret);
1298- return 0;
1299- }
1300- *ret = newbuf;
1301- *retlen = retpos;
1302-
1303- return 1;
1304-}
1305-
1306-
1307-int euctofullkana(int len, unsigned char *buf, unsigned char **ret, int *retlen) {
1308- int pos, tmplen, retpos=0;
1309- char tmp[10];
1310- char *newbuf;
1311- int i, j;
1312-
1313- if (!len) {
1314- *retlen = 0;
1315- return 1;
1316- }
1317-
1318- *retlen = len;
1319- *ret = malloc(*retlen);
1320- if (!*ret) {
1321- return 0;
1322- }
1323-
1324- for (pos = 0; pos < len; pos++) {
1325- tmplen=0;
1326-
1327- if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) {
1328- for (i = 0; euc_h_kana[i]; i++) {
1329- for (j = 0; euc_h_kana[i][j] && buf[pos+j]; j++) {
1330- if (euc_h_kana[i][j] != buf[pos+j]) {
1331- break;
1332- }
1333- }
1334- if (!euc_h_kana[i][j]) {
1335- const char *p;
1336- for (p = euc_f_kana[i]; *p; p++) {
1337- tmp[tmplen++] = *p;
1338- }
1339- pos += j-1;
1340- break;
1341- }
1342- }
1343-
1344- if (!h_kana[i]) {
1345- tmp[tmplen++] = buf[pos];
1346- }
1347- }
1348- else if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) {
1349- tmp[tmplen++] = buf[pos];
1350- tmp[tmplen++] = buf[pos+1];
1351- pos += 1;
1352- } else {
1353- tmp[tmplen++] = buf[pos];
1354- }
1355-
1356- if (tmplen) {
1357- if (retpos + tmplen > *retlen) {
1358- *retlen = *retlen + len / 2 + 16;
1359- newbuf = realloc(*ret, *retlen);
1360- if (!newbuf) {
1361- free(*ret);
1362- return 0;
1363- }
1364- *ret = newbuf;
1365- }
1366- memcpy(*ret+retpos, tmp, tmplen);
1367- retpos += tmplen;
1368- }
1369- }
1370-
1371- if (!retpos) {
1372- *retlen = 0;
1373- free(*ret);
1374- return 1;
1375- }
1376-
1377- newbuf = realloc(*ret, retpos);
1378- if (!newbuf) {
1379- free(*ret);
1380- return 0;
1381- }
1382- *ret = newbuf;
1383- *retlen = retpos;
1384-
1385- return 1;
1386-}
1387-
1388-
1389-#ifdef PYKF_MAIN
1390-
1391-
1392-void main() {
1393-/*
1394-
1395- char *ret, *ret2, *ret3, *ret4, *ret5, *ret6, *ret7, *ret8;
1396- int retlen, retlen2, retlen3, retlen4, retlen5, retlen6, retlen7, retlen8;
1397- char *s1 = "\x82\xa0\xb1\x88\x9f\x61\x82\xa2\xb2\x8b\x8f\x62\x82\xa4\xb3\x89\x4b\x63\x82\xa6\xb4\x93\xbe\x64\x82\xa8\xb5\x94\xf6\x6f";
1398- char *s2 = "アイウエオ";
1399- char *gaiji = "\xf0\x40";
1400- char *s3 = "あいうえお\x81";
1401- char *s4 = "アイウエオカ";
1402- char *s5 = "アイ";
1403- int guessed;
1404-
1405- guess(strlen(s1), s1, 1);
1406- sjistohankana(strlen(s2), s2, &ret7, &retlen7);
1407-
1408-
1409- sjistojis(strlen(s1), s1, &ret, &retlen);
1410- jistoeuc(retlen, ret, &ret2, &retlen2);
1411- guess(retlen2, ret2, 1);
1412-
1413- euctosjis(retlen2, ret2, &ret3, &retlen3);
1414- assert(strncmp(s1, ret3, strlen(s1))==0);
1415-
1416- euctojis(retlen2, ret2, &ret4, &retlen4);
1417- assert(strncmp(ret, ret4, retlen)==0);
1418-
1419- sjistoeuc(strlen(s1), s1, &ret5, &retlen5);
1420- assert(strncmp(ret2, ret5, strlen(ret2))==0);
1421-
1422- jistosjis(retlen4, ret4, &ret6, &retlen6);
1423- assert(strncmp(s1, ret6, strlen(s1))==0);
1424-
1425- sjistoeuc(strlen(gaiji), gaiji, &ret7, &retlen7);
1426-
1427- sjistojis(strlen(s5), s5, &ret8, &retlen8);
1428-
1429- guessed = guess(strlen(s3), s3, 1);
1430- assert(guessed == ERROR);
1431-
1432- guessed = guess(strlen(s3), s3, 0);
1433- assert(guessed == SJIS);
1434-
1435- guessed = guess(strlen(s4), s4, 0);
1436-
1437-
1438-*/
1439- char *s = "?";
1440- char *ret;
1441- int retlen;
1442-
1443- sjistojis(strlen(s), s, &ret, &retlen, 0);
1444-
1445-}
1446-
1447-#endif
1+/*********************************************************************
2+
3+Japanese Kanji filter module
4+ Copyright (c) 2002, Atsuo Ishimoto. All rights reserved.
5+
6+Permission to use, copy, modify, and distribute this software and its
7+documentation for any purpose and without fee is hereby granted, provided that
8+the above copyright notice appear in all copies and that both that copyright
9+notice and this permission notice appear in supporting documentation, and that
10+the name of Atsuo Ishimoto not be used in advertising or publicity pertaining
11+to distribution of the software without specific, written prior permission.
12+
13+ATSUO ISHIMOTO DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
14+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
15+EVENT SHALL ATSUO ISHIMOTO BE LIABLE FOR ANY SPECIAL, INDIRECT OR
16+CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
17+USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
18+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
19+PERFORMANCE OF THIS SOFTWARE.
20+
21+---------------------------------------------------------------------
22+This module is besed on kf.c written by Haruhiko Okumura.
23+ Copyright (c) 1995-2000 Haruhiko Okumura
24+ This file may be freely modified/redistributed.
25+
26+Original kf.c:
27+ http://www.matsusaka-u.ac.jp/~okumura/kf.html
28+
29+*********************************************************************/
30+
31+#include <stdio.h>
32+#include <stdlib.h>
33+#include <memory.h>
34+#include <string.h>
35+#include <assert.h>
36+
37+#include "pykf.h"
38+
39+#if defined(_MSC_VER)
40+#define LOCAL_INLINE __inline static
41+#endif
42+
43+#if !defined(__cplusplus) && !defined(inline)
44+#ifdef __GNUC__
45+#define LOCAL_INLINE __inline static
46+#endif
47+#endif
48+
49+#if !defined(LOCAL_INLINE)
50+#define LOCAL_INLINE static
51+#endif
52+
53+#include "convert.h"
54+
55+
56+
57+
58+int guess(int imax, unsigned char buf[], int strict)
59+{
60+ int i, n;
61+ int ascii, euc, sjis, utf8, bad_euc, bad_sjis, bad_utf8;
62+ int jis, hankana;
63+ int sjis_error, euc_error, utf8_error;
64+
65+ ascii = 1;
66+ bad_euc=euc=0;
67+ bad_sjis=sjis=0;
68+ bad_utf8 = utf8=0;
69+ jis = 0;
70+ sjis_error = euc_error = utf8_error = 0;
71+
72+ /* check BOM */
73+ if (imax >= 2) {
74+ if (buf[0] == 0xff && buf[1] == 0xfe) {
75+ return UTF16_LE;
76+ }
77+ else if (buf[0] == 0xfe && buf[1] == 0xff) {
78+ return UTF16_BE;
79+ }
80+ }
81+ if (imax >= 3 && !memcmp(buf, "\xef\xbb\xbf", 3)) {
82+ return UTF8;
83+ }
84+
85+ // check SJIS
86+ hankana = 0;
87+ for (i = 0; i < imax; i++) {
88+
89+ if (buf[i] >= 0x80) {
90+ ascii = 0;
91+ }
92+
93+ if (buf[i] == 0x1b) {
94+ jis= 1;
95+ }
96+
97+ if (buf[i] == 0x8e ) {
98+ // looks like euc.
99+ if (i + 2 < imax) {
100+ if (buf[i+2]==0x8e && ishankana(buf[i+1])) {
101+ bad_sjis += 1;
102+ }
103+ }
104+ }
105+
106+ if (ishankana(buf[i])) {
107+ sjis += 0x10/2-1;
108+ hankana++;
109+ }
110+ else {
111+ if (hankana == 1) {
112+ // single halfwidth-kana is bad sign.
113+ bad_sjis++;
114+ }
115+ hankana = 0;
116+
117+ if (issjis1(buf[i])) {
118+ if (i+1 >= imax) {
119+ if (strict) {
120+ sjis_error = 1;
121+ break;
122+ }
123+ bad_sjis+=0x100;
124+ }
125+ else if (issjis2(buf[i+1])) {
126+ sjis += 0x10;
127+ i++;
128+ }
129+ else {
130+ if (strict) {
131+ sjis_error = 1;
132+ break;
133+ }
134+ bad_sjis += 0x100;
135+ }
136+ }
137+ else if (buf[i] >= 0x80) {
138+ if (strict) {
139+ sjis_error = 1;
140+ break;
141+ }
142+ bad_sjis += 0x100;
143+ }
144+ }
145+ }
146+
147+ if (ascii && jis) {
148+ return JIS;
149+ }
150+
151+ if (ascii) {
152+ return ASCII;
153+ }
154+
155+ // check EUC-JP
156+ hankana=0;
157+ for (i = 0; i < imax; i++) {
158+ if (buf[i] == 0x8e) {
159+ if (i+1 >= imax) {
160+ if (strict) {
161+ euc_error = 1;
162+ break;
163+ }
164+ bad_euc += 0x100;
165+ }
166+ else if (ishankana(buf[i+1])) {
167+ euc+=10;
168+ i++;
169+ hankana++;
170+ }
171+ else {
172+ if (strict) {
173+ euc_error = 1;
174+ break;
175+ }
176+ bad_euc += 0x100;
177+ }
178+ }
179+ else {
180+ if (hankana == 1) {
181+ bad_euc++;
182+ }
183+ hankana = 0;
184+ if (iseuc(buf[i])) {
185+ if (i+1 >= imax) {
186+ if (strict) {
187+ euc_error = 1;
188+ break;
189+ }
190+ bad_euc+=0x100;
191+ }
192+ else if (iseuc(buf[i+1])) {
193+ i++;
194+ euc+=0x10;
195+ }
196+ else {
197+ if (strict) {
198+ euc_error = 1;
199+ break;
200+ }
201+ bad_euc+=0x100;
202+ }
203+ }
204+ else if (buf[i] == 0x8f) {
205+ if (i+2 >= imax) {
206+ if (strict) {
207+ euc_error = 1;
208+ break;
209+ }
210+ bad_euc+=0x100;
211+ }
212+ else if (iseuc(buf[i+1]) && iseuc(buf[i+2])) {
213+ i+=2;
214+ euc+=0x10;
215+ }
216+ else {
217+ if (strict) {
218+ euc_error = 1;
219+ break;
220+ }
221+ bad_euc+=100;
222+ }
223+ }
224+ else if (buf[i] >= 0x80) {
225+ if (strict) {
226+ euc_error = 1;
227+ break;
228+ }
229+ bad_euc+=0x100;
230+ }
231+ }
232+ }
233+
234+ // check UTF-8
235+ for (i = 0; i < imax; i++) {
236+ int c_len;
237+ c_len = utf8_len(buf[i]);
238+ if (c_len) {
239+ if (i+c_len-1 >= imax) {
240+ if (strict) {
241+ utf8_error = 1;
242+ break;
243+ }
244+ bad_utf8 += 1000;
245+ }
246+ i++;
247+ for (n=0; n < c_len-1; n++) {
248+ if (!isutf8_trail(buf[i+n])) {
249+ if (strict) {
250+ utf8_error = 1;
251+ }
252+ else {
253+ bad_utf8 += 1000;
254+ }
255+ break;
256+ }
257+ }
258+
259+ if (utf8_error) {
260+ break;
261+ }
262+
263+ if (n == (c_len-1)) {
264+ /* no error */
265+ utf8 += (int)(0x10 * c_len/2.0+1); /* prefer utf-8 over SJIS/EUC a bit....*/
266+ i += (c_len-2);
267+ }
268+ } else if (buf[i] >= 0x80) {
269+ if (strict) {
270+ utf8_error = 1;
271+ break;
272+ }
273+ bad_utf8 += 1000;
274+ }
275+ }
276+/*
277+ printf("sjis_error:%d euc_error:%d, utf8_error:%d\n", sjis_error, euc_error, utf8_error);
278+ printf("sjis:%d euc:%d, utf8:%d\n", sjis, euc, utf8);
279+ printf("bad_sjis:%d bad_euc:%d, bad_utf8:%d\n", bad_sjis, bad_euc, bad_utf8);
280+*/
281+
282+ if (sjis_error && euc_error && utf8_error) {
283+ return ERROR;
284+ }
285+
286+ if (sjis_error) {
287+ if (euc_error) {
288+ return UTF8;
289+ }
290+ if (utf8_error) {
291+ return EUC;
292+ }
293+ if (euc-bad_euc > utf8-bad_utf8)
294+ return EUC;
295+ else if (euc-bad_euc < utf8-bad_utf8)
296+ return UTF8;
297+ }
298+
299+ if (euc_error) {
300+ if (sjis_error) {
301+ return UTF8;
302+ }
303+ if (utf8_error) {
304+ return SJIS;
305+ }
306+ if (sjis-bad_sjis > utf8-bad_utf8)
307+ return SJIS;
308+ else if (sjis-bad_sjis < utf8-bad_utf8)
309+ return UTF8;
310+ }
311+
312+ if (utf8_error) {
313+ if (sjis_error) {
314+ return EUC;
315+ }
316+ if (euc_error) {
317+ return SJIS;
318+ }
319+ if (sjis-bad_sjis > euc-bad_euc)
320+ return SJIS;
321+ else
322+ return EUC;
323+ }
324+
325+ if (sjis-bad_sjis > euc-bad_euc) {
326+ if (sjis-bad_sjis > utf8-bad_utf8)
327+ return SJIS;
328+ else if (sjis-bad_sjis < utf8-bad_utf8)
329+ return UTF8;
330+ }
331+
332+ if (sjis-bad_sjis < euc-bad_euc) {
333+ if (euc-bad_euc > utf8-bad_utf8)
334+ return EUC;
335+ else if (euc-bad_euc < utf8-bad_utf8)
336+ return UTF8;
337+ }
338+ return UNKNOWN;
339+}
340+
341+LOCAL_INLINE
342+void jis_to_sjis2(unsigned char *ph, unsigned char *pl);
343+
344+LOCAL_INLINE
345+int isjis0213(unsigned char h, unsigned char l) {
346+ int *p;
347+ int jis = (h << 8 | l) & 0xffff;
348+
349+ for (p=tbl_jis0213; *(p+2) < jis; p+=2);
350+
351+ if (*p <= jis && (jis < (p[0] + p[1]))) {
352+ return 1;
353+ }
354+ else {
355+ return 0;
356+ }
357+}
358+
359+
360+LOCAL_INLINE
361+int mskanji_to_jis(unsigned char *ph, unsigned char *pl) {
362+ int *p;
363+ int sjis = (*ph << 8 | *pl) & 0xffff;
364+
365+ if (isgaiji1(*ph)) {
366+ *ph = (CONV_FAILED >> 8) & 0xff;
367+ *pl = CONV_FAILED & 0xff;
368+ return 1;
369+ }
370+
371+ for (p=tbl_sjis2jis; *p < sjis; p+=2);
372+
373+ if (*p == sjis) {
374+ *ph = (*(p+1)) >> 8;
375+ *pl = (*(p+1)) & 0xff;
376+ return 1;
377+ }
378+ return 0;
379+}
380+
381+LOCAL_INLINE
382+void sjis_to_jis(unsigned char *ph, unsigned char *pl)
383+{
384+ if (*ph <= 0x9f) {
385+ if (*pl < 0x9f)
386+ *ph = (*ph << 1) - 0xe1;
387+ else
388+ *ph = (*ph << 1) - 0xe0;
389+ } else {
390+ if (*pl < 0x9f)
391+ *ph = (*ph << 1) - 0x161;
392+ else
393+ *ph = (*ph << 1) - 0x160;
394+ }
395+ if (*pl < 0x7f)
396+ *pl -= 0x1f;
397+ else if (*pl < 0x9f)
398+ *pl -= 0x20;
399+ else
400+ *pl -= 0x7e;
401+}
402+
403+LOCAL_INLINE
404+void sjis_to_jis2(unsigned char *ph, unsigned char *pl)
405+{
406+ if (mskanji_to_jis(ph, pl))
407+ return;
408+ else
409+ sjis_to_jis(ph, pl);
410+}
411+
412+
413+LOCAL_INLINE
414+void jis_to_sjis(unsigned char *ph, unsigned char *pl)
415+{
416+ if (*ph & 1) {
417+ if (*pl < 0x60)
418+ *pl += 0x1f;
419+ else
420+ *pl += 0x20;
421+ } else
422+ *pl += 0x7e;
423+
424+ if (*ph < 0x5f)
425+ *ph = (*ph + 0xe1) >> 1;
426+ else
427+ *ph = (*ph + 0x161) >> 1;
428+}
429+
430+
431+LOCAL_INLINE
432+int jis_to_mskanji(unsigned char *ph, unsigned char *pl) {
433+ int *p;
434+ int jis = (*ph << 8 | *pl) & 0xffff;
435+
436+ for (p=tbl_jis2sjis; *p < jis; p+=2);
437+
438+ if (*p == jis) {
439+ *ph = (*(p+1)) >> 8;
440+ *pl = (*(p+1)) & 0xff;
441+ return 1;
442+ }
443+ return 0;
444+}
445+
446+
447+
448+LOCAL_INLINE
449+void jis_to_sjis2(unsigned char *ph, unsigned char *pl)
450+{
451+ if (jis_to_mskanji(ph, pl))
452+ return;
453+ else
454+ jis_to_sjis(ph, pl);
455+}
456+
457+
458+
459+
460+
461+int sjistojis(int len, unsigned char *buf, unsigned char **ret, int *retlen, int j0208)
462+{
463+ unsigned char c, d;
464+ int pos, tmplen, retpos=0;
465+ char tmp[10];
466+ char *newbuf;
467+ enum {NORMAL, KANJI, HANKANA, JIS0213} mode = NORMAL;
468+
469+ if (!len) {
470+ *retlen = 0;
471+ return 1;
472+ }
473+
474+ *retlen = len;
475+ *ret = malloc(*retlen);
476+ if (!*ret) {
477+ return 0;
478+ }
479+
480+
481+ for (pos = 0; pos < len; pos++) {
482+ tmplen = 0;
483+ if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) {
484+ c = buf[pos];
485+ d = buf[pos+1];
486+ pos += 1;
487+ sjis_to_jis2(&c, &d);
488+
489+ if (j0208 || !isjis0213(c, d)) {
490+ if (mode != KANJI) {
491+ mode = KANJI;
492+ tmp[tmplen++] = 0x1b;
493+ tmp[tmplen++] = '$';
494+ tmp[tmplen++] = 'B';
495+ }
496+ }
497+ else {
498+ if (mode != JIS0213) {
499+ mode = JIS0213;
500+ tmp[tmplen++] = 0x1b;
501+ tmp[tmplen++] = '$';
502+ tmp[tmplen++] = '(';
503+ tmp[tmplen++] = 'O';
504+ }
505+ }
506+ tmp[tmplen++] = c;
507+ tmp[tmplen++] = d;
508+ } else if (ishankana(buf[pos])) {
509+ if (mode != HANKANA) {
510+ mode = HANKANA;
511+ tmp[tmplen++] = 0x1b;
512+ tmp[tmplen++] = '(';
513+ tmp[tmplen++] = 'I';
514+ }
515+ tmp[tmplen++] = buf[pos] & 0x7f;
516+ } else {
517+ if (mode != NORMAL) {
518+ mode = NORMAL;
519+ tmp[tmplen++] = 0x1b;
520+ tmp[tmplen++] = '(';
521+ tmp[tmplen++] = 'B';
522+ }
523+ tmp[tmplen++] = buf[pos];
524+ }
525+
526+ if (tmplen) {
527+ if (retpos + tmplen > *retlen) {
528+ *retlen = *retlen + len / 2 + 16;
529+ newbuf = realloc(*ret, *retlen);
530+ if (!newbuf) {
531+ free(*ret);
532+ return 0;
533+ }
534+ *ret = newbuf;
535+ }
536+ memcpy(*ret+retpos, tmp, tmplen);
537+ retpos += tmplen;
538+ }
539+ }
540+
541+ if (!retpos) {
542+ *retlen = 0;
543+ free(*ret);
544+ return 1;
545+ }
546+
547+ if (mode != NORMAL) {
548+ if (retpos + 3 > *retlen) {
549+ *retlen = retpos + 3;
550+ newbuf = realloc(*ret, *retlen);
551+ if (!newbuf) {
552+ free(*ret);
553+ return 0;
554+ }
555+ *ret = newbuf;
556+ }
557+ *(*ret + retpos) = 0x1b;
558+ *(*ret + retpos+1) = '(';
559+ *(*ret + retpos+2) = 'B';
560+ retpos += 3;
561+ }
562+
563+ newbuf = realloc(*ret, retpos);
564+ if (!newbuf) {
565+ free(*ret);
566+ return 0;
567+ }
568+ *ret = newbuf;
569+ *retlen = retpos;
570+ return 1;
571+}
572+
573+int euctojis(int len, unsigned char *buf, unsigned char **ret, int *retlen, int j0208)
574+{
575+ unsigned char c, d;
576+ int pos, tmplen, retpos=0;
577+ char tmp[10];
578+ char *newbuf;
579+ enum {NORMAL, KANJI, HANKANA, JIS0213} mode = NORMAL;
580+
581+ if (!len) {
582+ *retlen = 0;
583+ return 1;
584+ }
585+
586+ *retlen = len;
587+ *ret = malloc(*retlen);
588+ if (!*ret) {
589+ return 0;
590+ }
591+
592+ for (pos = 0; pos < len; pos++) {
593+ tmplen = 0;
594+ if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) {
595+ c = buf[pos] & 0x7f;
596+ d = buf[pos+1] & 0x7f;
597+ pos += 1;
598+
599+ if (j0208 || !isjis0213(c, d)) {
600+ if (mode != KANJI) {
601+ mode = KANJI;
602+ tmp[tmplen++] = 0x1b;
603+ tmp[tmplen++] = '$';
604+ tmp[tmplen++] = 'B';
605+ }
606+ }
607+ else {
608+ if (mode != JIS0213) {
609+ mode = JIS0213;
610+ tmp[tmplen++] = 0x1b;
611+ tmp[tmplen++] = '$';
612+ tmp[tmplen++] = '(';
613+ tmp[tmplen++] = 'O';
614+ }
615+ }
616+ tmp[tmplen++] = c;
617+ tmp[tmplen++] = d;
618+ } else if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) {
619+
620+
621+ if (mode != HANKANA) {
622+ mode = HANKANA;
623+ tmp[tmplen++] = 0x1b;
624+ tmp[tmplen++] = '(';
625+ tmp[tmplen++] = 'I';
626+ }
627+ tmp[tmplen++] = buf[pos+1] & 0x7f;
628+ pos += 1;
629+
630+ } else {
631+ if (mode != NORMAL) {
632+ mode = NORMAL;
633+ tmp[tmplen++] = 0x1b;
634+ tmp[tmplen++] = '(';
635+ tmp[tmplen++] = 'B';
636+ }
637+ tmp[tmplen++] = buf[pos];
638+ }
639+
640+ if (tmplen) {
641+ if (retpos + tmplen > *retlen) {
642+ *retlen = *retlen + len / 2 + 16;
643+ newbuf = realloc(*ret, *retlen);
644+ if (!newbuf) {
645+ free(*ret);
646+ return 0;
647+ }
648+ *ret = newbuf;
649+ }
650+ memcpy(*ret+retpos, tmp, tmplen);
651+ retpos += tmplen;
652+ }
653+ }
654+
655+ if (!retpos) {
656+ *retlen = 0;
657+ free(*ret);
658+ return 1;
659+ }
660+
661+ if (mode != NORMAL) {
662+ if (retpos + 3 > *retlen) {
663+ *retlen = retpos + 3;
664+ newbuf = realloc(*ret, *retlen);
665+ if (!newbuf) {
666+ free(*ret);
667+ return 0;
668+ }
669+ *ret = newbuf;
670+ }
671+ *(*ret + retpos) = 0x1b;
672+ *(*ret + retpos+1) = '(';
673+ *(*ret + retpos+2) = 'B';
674+ retpos += 3;
675+ }
676+
677+ newbuf = realloc(*ret, retpos);
678+ if (!newbuf) {
679+ free(*ret);
680+ return 0;
681+ }
682+ *ret = newbuf;
683+ *retlen = retpos;
684+ return 1;
685+}
686+
687+
688+int sjistoeuc(int len, unsigned char *buf, unsigned char **ret, int *retlen)
689+{
690+ unsigned char c, d;
691+ int pos, tmplen, retpos=0;
692+ char tmp[10];
693+ char *newbuf;
694+
695+ if (!len) {
696+ *retlen = 0;
697+ return 1;
698+ }
699+
700+ *retlen = len;
701+ *ret = malloc(*retlen);
702+ if (!*ret) {
703+ return 0;
704+ }
705+
706+ for (pos = 0; pos < len; pos++) {
707+ tmplen=0;
708+
709+ if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) {
710+ c = buf[pos];
711+ d = buf[pos+1];
712+ pos += 1;
713+
714+ sjis_to_jis2(&c, &d);
715+ tmp[tmplen++] = c | 0x80;
716+ tmp[tmplen++] = d | 0x80;
717+ } else if (ishankana(buf[pos])) {
718+ tmp[tmplen++] = '\x8e';
719+ tmp[tmplen++] = buf[pos];
720+ } else {
721+ tmp[tmplen++] = buf[pos];
722+ }
723+
724+ if (tmplen) {
725+ if (retpos + tmplen > *retlen) {
726+ *retlen = *retlen + len / 2 + 16;
727+ newbuf = realloc(*ret, *retlen);
728+ if (!newbuf) {
729+ free(*ret);
730+ return 0;
731+ }
732+ *ret = newbuf;
733+ }
734+ memcpy(*ret+retpos, tmp, tmplen);
735+ retpos += tmplen;
736+ }
737+ }
738+
739+ if (!retpos) {
740+ *retlen = 0;
741+ free(*ret);
742+ return 1;
743+ }
744+
745+ newbuf = realloc(*ret, retpos);
746+ if (!newbuf) {
747+ free(*ret);
748+ return 0;
749+ }
750+ *ret = newbuf;
751+ *retlen = retpos;
752+
753+ return 1;
754+}
755+
756+int jistoeuc(int len, unsigned char *buf, unsigned char **ret, int *retlen)
757+{
758+ int pos, tmplen, retpos=0;
759+ char tmp[10];
760+ char *newbuf;
761+
762+ enum {NORMAL, KANJI, HANKANA} mode = NORMAL;
763+
764+ if (!len) {
765+ *retlen = 0;
766+ return 1;
767+ }
768+
769+ *retlen = len;
770+ *ret = malloc(*retlen);
771+ if (!*ret) {
772+ return 0;
773+ }
774+
775+ for (pos = 0; pos < len; pos++) {
776+ tmplen=0;
777+
778+ if ((pos + 2 < len) &&
779+ (!memcmp(buf+pos, "\x1b$@", 3) ||
780+ !memcmp(buf+pos, "\x1b$B", 3))) {
781+
782+ mode = KANJI;
783+ pos += 2;
784+ }
785+ else if ((pos + 3 < len) && !memcmp(buf+pos, "\x1b$(O", 4)) {
786+ mode = KANJI;
787+ pos += 3;
788+ }
789+ else if ((pos + 2 < len) &&
790+ (!memcmp(buf+pos, "\x1b(B", 3) ||
791+ !memcmp(buf+pos, "\x1b(J", 3))) {
792+
793+ mode = NORMAL;
794+ pos += 2;
795+ }
796+ else if ((pos + 2 < len) && !memcmp(buf+pos, "\x1b(I", 3)) {
797+ mode = HANKANA;
798+ pos += 2;
799+ }
800+ else if (buf[pos] == '\x0e') {
801+ mode = HANKANA;
802+ }
803+ else if (buf[pos] == '\x0f') {
804+ mode = NORMAL;
805+ }
806+ else if (mode == KANJI && isjis(buf[pos]) && (pos+1 < len) && isjis(buf[pos+1])) {
807+ tmp[tmplen++] = buf[pos] | 0x80;
808+ tmp[tmplen++] = buf[pos+1] | 0x80;
809+ pos++;
810+ } else if (mode == HANKANA && buf[pos] >= 0x20 && buf[pos] <= 0x5f) {
811+ tmp[tmplen++] = '\x8e';
812+ tmp[tmplen++] = buf[pos] | 0x80;
813+ } else {
814+ tmp[tmplen++] = buf[pos];
815+ }
816+
817+ if (tmplen) {
818+ if (retpos + tmplen > *retlen) {
819+ *retlen = *retlen + len / 2 + 16;
820+ newbuf = realloc(*ret, *retlen);
821+ if (!newbuf) {
822+ free(*ret);
823+ return 0;
824+ }
825+ *ret = newbuf;
826+ }
827+ memcpy(*ret+retpos, tmp, tmplen);
828+ retpos += tmplen;
829+ }
830+ }
831+
832+ if (!retpos) {
833+ *retlen = 0;
834+ free(*ret);
835+ return 1;
836+ }
837+
838+ newbuf = realloc(*ret, retpos);
839+ if (!newbuf) {
840+ free(*ret);
841+ return 0;
842+ }
843+ *ret = newbuf;
844+ *retlen = retpos;
845+ return 1;
846+}
847+
848+
849+int jistosjis(int len, unsigned char *buf, unsigned char **ret, int *retlen)
850+{
851+ unsigned char c, d;
852+ int pos, tmplen, retpos=0;
853+ char tmp[10];
854+ char *newbuf;
855+
856+ enum {NORMAL, KANJI, HANKANA} mode = NORMAL;
857+
858+ if (!len) {
859+ *retlen = 0;
860+ return 1;
861+ }
862+
863+ *retlen = len;
864+ *ret = malloc(*retlen);
865+ if (!*ret) {
866+ return 0;
867+ }
868+
869+ for (pos = 0; pos < len; pos++) {
870+ tmplen=0;
871+
872+ if ((pos + 2 < len) &&
873+ (!memcmp(buf+pos, "\x1b$@", 3) ||
874+ !memcmp(buf+pos, "\x1b$B", 3))) {
875+
876+ mode = KANJI;
877+ pos += 2;
878+ }
879+ else if ((pos + 3 < len) && !memcmp(buf+pos, "\x1b$(O", 4)) {
880+ mode = KANJI;
881+ pos += 3;
882+ }
883+ else if ((pos + 2 < len) &&
884+ (!memcmp(buf+pos, "\x1b(B", 3) ||
885+ !memcmp(buf+pos, "\x1b(J", 3))) {
886+
887+ mode = NORMAL;
888+ pos += 2;
889+ }
890+ else if ((pos + 2 < len) && !memcmp(buf+pos, "\x1b(I", 3)) {
891+ mode = HANKANA;
892+ pos += 2;
893+ }
894+ else if (buf[pos] == '\x0e') {
895+ mode = HANKANA;
896+ }
897+ else if (buf[pos] == '\x0f') {
898+ mode = NORMAL;
899+ }
900+ else if (mode == KANJI && isjis(buf[pos]) && (pos+1 < len) && isjis(buf[pos+1])) {
901+ c = buf[pos];
902+ d = buf[pos+1];
903+ pos++;
904+
905+ jis_to_sjis2(&c, &d);
906+ tmp[tmplen++] = c;
907+ tmp[tmplen++] = d;
908+ } else if (mode == HANKANA && buf[pos] >= 0x20 && buf[pos] <= 0x5f) {
909+ tmp[tmplen++] = buf[pos] | 0x80;
910+ } else {
911+ tmp[tmplen++] = buf[pos];
912+ }
913+
914+ if (tmplen) {
915+ if (retpos + tmplen > *retlen) {
916+ *retlen = *retlen + len / 2 + 16;
917+ newbuf = realloc(*ret, *retlen);
918+ if (!newbuf) {
919+ free(*ret);
920+ return 0;
921+ }
922+ *ret = newbuf;
923+ }
924+ memcpy(*ret+retpos, tmp, tmplen);
925+ retpos += tmplen;
926+ }
927+ }
928+
929+ if (!retpos) {
930+ *retlen = 0;
931+ free(*ret);
932+ return 1;
933+ }
934+
935+ newbuf = realloc(*ret, retpos);
936+ if (!newbuf) {
937+ free(*ret);
938+ return 0;
939+ }
940+ *ret = newbuf;
941+ *retlen = retpos;
942+ return 1;
943+}
944+
945+int euctosjis(int len, unsigned char *buf, unsigned char **ret, int *retlen)
946+{
947+ unsigned char c, d;
948+ int pos, tmplen, retpos=0;
949+ char tmp[10];
950+ char *newbuf;
951+
952+ if (!len) {
953+ *retlen = 0;
954+ return 1;
955+ }
956+
957+ *retlen = len;
958+ *ret = malloc(*retlen);
959+ if (!*ret) {
960+ return 0;
961+ }
962+
963+ for (pos = 0; pos < len; pos++) {
964+ tmplen=0;
965+
966+ if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) {
967+ c = buf[pos] & 0x7f;
968+ d = buf[pos+1] & 0x7f;
969+ pos += 1;
970+
971+ jis_to_sjis2(&c, &d);
972+ tmp[tmplen++] = c;
973+ tmp[tmplen++] = d;
974+ } else if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) {
975+ tmp[tmplen++] = buf[pos+1];
976+ pos++;
977+ } else {
978+ tmp[tmplen++] = buf[pos];
979+ }
980+
981+ if (tmplen) {
982+ if (retpos + tmplen > *retlen) {
983+ *retlen = *retlen + len / 2 + 16;
984+ newbuf = realloc(*ret, *retlen);
985+ if (!newbuf) {
986+ free(*ret);
987+ return 0;
988+ }
989+ *ret = newbuf;
990+ }
991+ memcpy(*ret+retpos, tmp, tmplen);
992+ retpos += tmplen;
993+ }
994+ }
995+
996+ if (!retpos) {
997+ *retlen = 0;
998+ free(*ret);
999+ return 1;
1000+ }
1001+
1002+ newbuf = realloc(*ret, retpos);
1003+ if (!newbuf) {
1004+ free(*ret);
1005+ return 0;
1006+ }
1007+ *ret = newbuf;
1008+ *retlen = retpos;
1009+ return 1;
1010+}
1011+
1012+static const unsigned char *h_kana[] = {
1013+"\xdd", "\xdc", "\xdb", "\xda", "\xd9", "\xd8", "\xd7", "\xd6", "\xd5", "\xd4",
1014+"\xd3", "\xd2", "\xd1", "\xd0", "\xcf", "\xce\xdf", "\xce\xde", "\xce", "\xcd\xdf",
1015+"\xcd\xde", "\xcd", "\xcc\xdf", "\xcc\xde", "\xcc", "\xcb\xdf", "\xcb\xde",
1016+"\xcb", "\xca\xdf", "\xca\xde", "\xca", "\xc9", "\xc8", "\xc7", "\xc6", "\xc5",
1017+"\xc4\xde", "\xc4", "\xc3\xde", "\xc3", "\xc2\xde", "\xc2", "\xc1\xde", "\xc1",
1018+"\xc0\xde", "\xc0", "\xbf\xde", "\xbf", "\xbe\xde", "\xbe", "\xbd\xde", "\xbd",
1019+"\xbc\xde", "\xbc", "\xbb\xde", "\xbb", "\xba\xde", "\xba", "\xb9\xde", "\xb9",
1020+"\xb8\xde", "\xb8", "\xb7\xde", "\xb7", "\xb6\xde", "\xb6", "\xb5", "\xb4", "\xb3\xde",
1021+"\xb3", "\xb2", "\xb1", "\xb0", "\xaf", "\xae", "\xad", "\xac", "\xab",
1022+"\xaa", "\xa9", "\xa8", "\xa7", "\xa6", "\xa5", "\xa4", "\xa3", "\xa2", "\xa1", NULL};
1023+
1024+static const unsigned char *euc_h_kana[] = {
1025+"\x8e\xdd", "\x8e\xdc", "\x8e\xdb", "\x8e\xda", "\x8e\xd9", "\x8e\xd8", "\x8e\xd7", "\x8e\xd6", "\x8e\xd5", "\x8e\xd4",
1026+"\x8e\xd3", "\x8e\xd2", "\x8e\xd1", "\x8e\xd0", "\x8e\xcf", "\x8e\xce\x8e\xdf", "\x8e\xce\x8e\xde", "\x8e\xce", "\x8e\xcd\x8e\xdf",
1027+"\x8e\xcd\x8e\xde", "\x8e\xcd", "\x8e\xcc\x8e\xdf", "\x8e\xcc\x8e\xde", "\x8e\xcc", "\x8e\xcb\x8e\xdf", "\x8e\xcb\x8e\xde",
1028+"\x8e\xcb", "\x8e\xca\x8e\xdf", "\x8e\xca\x8e\xde", "\x8e\xca", "\x8e\xc9", "\x8e\xc8", "\x8e\xc7", "\x8e\xc6", "\x8e\xc5",
1029+"\x8e\xc4\x8e\xde", "\x8e\xc4", "\x8e\xc3\x8e\xde", "\x8e\xc3", "\x8e\xc2\x8e\xde", "\x8e\xc2", "\x8e\xc1\x8e\xde", "\x8e\xc1",
1030+"\x8e\xc0\x8e\xde", "\x8e\xc0", "\x8e\xbf\x8e\xde", "\x8e\xbf", "\x8e\xbe\x8e\xde", "\x8e\xbe", "\x8e\xbd\x8e\xde", "\x8e\xbd",
1031+"\x8e\xbc\x8e\xde", "\x8e\xbc", "\x8e\xbb\x8e\xde", "\x8e\xbb", "\x8e\xba\x8e\xde", "\x8e\xba", "\x8e\xb9\x8e\xde", "\x8e\xb9",
1032+"\x8e\xb8\x8e\xde", "\x8e\xb8", "\x8e\xb7\x8e\xde", "\x8e\xb7", "\x8e\xb6\x8e\xde", "\x8e\xb6", "\x8e\xb5", "\x8e\xb4", "\x8e\xb3\x8e\xde",
1033+"\x8e\xb3", "\x8e\xb2", "\x8e\xb1", "\x8e\xb0", "\x8e\xaf", "\x8e\xae", "\x8e\xad", "\x8e\xac", "\x8e\xab",
1034+"\x8e\xaa", "\x8e\xa9", "\x8e\xa8", "\x8e\xa7", "\x8e\xa6", "\x8e\xa5", "\x8e\xa4", "\x8e\xa3", "\x8e\xa2", "\x8e\xa1", NULL};
1035+
1036+static const unsigned char *sjis_f_kana[] = {
1037+ "\x83\x93", "\x83\x8f", "\x83\x8d", "\x83\x8c", "\x83\x8b", "\x83\x8a",
1038+ "\x83\x89", "\x83\x88", "\x83\x86", "\x83\x84", "\x83\x82", "\x83\x81",
1039+ "\x83\x80", "\x83\x7e", "\x83\x7d", "\x83\x7c", "\x83\x7b", "\x83\x7a",
1040+ "\x83\x79", "\x83\x78", "\x83\x77", "\x83\x76", "\x83\x75", "\x83\x74",
1041+ "\x83\x73", "\x83\x72", "\x83\x71", "\x83\x70", "\x83\x6f", "\x83\x6e",
1042+ "\x83\x6d", "\x83\x6c", "\x83\x6b", "\x83\x6a", "\x83\x69", "\x83\x68",
1043+ "\x83\x67", "\x83\x66", "\x83\x65", "\x83\x64", "\x83\x63", "\x83\x61",
1044+ "\x83\x60", "\x83\x5f", "\x83\x5e", "\x83\x5d", "\x83\x5c", "\x83\x5b",
1045+ "\x83\x5a", "\x83\x59", "\x83\x58", "\x83\x57", "\x83\x56", "\x83\x55",
1046+ "\x83\x54", "\x83\x53", "\x83\x52", "\x83\x51", "\x83\x50", "\x83\x4f",
1047+ "\x83\x4e", "\x83\x4d", "\x83\x4c", "\x83\x4b", "\x83\x4a", "\x83\x49",
1048+ "\x83\x47", "\x83\x94", "\x83\x45", "\x83\x43", "\x83\x41", "\x81\x5b",
1049+ "\x83\x62", "\x83\x87", "\x83\x85", "\x83\x83", "\x83\x48", "\x83\x46",
1050+ "\x83\x44", "\x83\x42", "\x83\x40", "\x83\x92", "\x81\x45", "\x81\x41",
1051+ "\x81\x76", "\x81\x75", "\x81\x42", NULL};
1052+
1053+
1054+static const unsigned char *euc_f_kana[] = {
1055+ "\xa5\xf3", "\xa5\xef", "\xa5\xed", "\xa5\xec", "\xa5\xeb", "\xa5\xea",
1056+ "\xa5\xe9", "\xa5\xe8", "\xa5\xe6", "\xa5\xe4", "\xa5\xe2", "\xa5\xe1",
1057+ "\xa5\xe0", "\xa5\xdf", "\xa5\xde", "\xa5\xdd", "\xa5\xdc", "\xa5\xdb",
1058+ "\xa5\xda", "\xa5\xd9", "\xa5\xd8", "\xa5\xd7", "\xa5\xd6", "\xa5\xd5",
1059+ "\xa5\xd4", "\xa5\xd3", "\xa5\xd2", "\xa5\xd1", "\xa5\xd0", "\xa5\xcf",
1060+ "\xa5\xce", "\xa5\xcd", "\xa5\xcc", "\xa5\xcb", "\xa5\xca", "\xa5\xc9",
1061+ "\xa5\xc8", "\xa5\xc7", "\xa5\xc6", "\xa5\xc5", "\xa5\xc4", "\xa5\xc2",
1062+ "\xa5\xc1", "\xa5\xc0", "\xa5\xbf", "\xa5\xbe", "\xa5\xbd", "\xa5\xbc",
1063+ "\xa5\xbb", "\xa5\xba", "\xa5\xb9", "\xa5\xb8", "\xa5\xb7", "\xa5\xb6",
1064+ "\xa5\xb5", "\xa5\xb4", "\xa5\xb3", "\xa5\xb2", "\xa5\xb1", "\xa5\xb0",
1065+ "\xa5\xaf", "\xa5\xae", "\xa5\xad", "\xa5\xac", "\xa5\xab", "\xa5\xaa",
1066+ "\xa5\xa8", "\xa5\xf4", "\xa5\xa6", "\xa5\xa4", "\xa5\xa2", "\xa1\xbc",
1067+ "\xa5\xc3", "\xa5\xe7", "\xa5\xe5", "\xa5\xe3", "\xa5\xa9", "\xa5\xa7",
1068+ "\xa5\xa5", "\xa5\xa3", "\xa5\xa1", "\xa5\xf2", "\xa1\xa6", "\xa1\xa2",
1069+ "\xa1\xd7", "\xa1\xd6", "\xa1\xa3", NULL};
1070+
1071+
1072+int sjistohankana(int len, unsigned char *buf, unsigned char **ret, int *retlen) {
1073+ int pos, tmplen, retpos=0;
1074+ char tmp[10];
1075+ char *newbuf;
1076+ int i;
1077+
1078+ if (!len) {
1079+ *retlen = 0;
1080+ return 1;
1081+ }
1082+
1083+ *retlen = len;
1084+ *ret = malloc(*retlen);
1085+ if (!*ret) {
1086+ return 0;
1087+ }
1088+
1089+ for (pos = 0; pos < len; pos++) {
1090+ tmplen=0;
1091+
1092+ if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) {
1093+ for (i = 0; sjis_f_kana[i]; i++) {
1094+ if (buf[pos] == sjis_f_kana[i][0] && buf[pos+1] == sjis_f_kana[i][1]) {
1095+ tmp[tmplen++] = h_kana[i][0];
1096+ if (h_kana[i][1]) {
1097+ tmp[tmplen++] = h_kana[i][1];
1098+ }
1099+ break;
1100+ }
1101+ }
1102+ if (!sjis_f_kana[i]) {
1103+ tmp[tmplen++] = buf[pos];
1104+ tmp[tmplen++] = buf[pos+1];
1105+ }
1106+
1107+ pos++;
1108+ } else {
1109+ tmp[tmplen++] = buf[pos];
1110+ }
1111+
1112+ if (tmplen) {
1113+ if (retpos + tmplen > *retlen) {
1114+ *retlen = *retlen + len / 2 + 16;
1115+ newbuf = realloc(*ret, *retlen);
1116+ if (!newbuf) {
1117+ free(*ret);
1118+ return 0;
1119+ }
1120+ *ret = newbuf;
1121+ }
1122+ memcpy(*ret+retpos, tmp, tmplen);
1123+ retpos += tmplen;
1124+ }
1125+ }
1126+
1127+ if (!retpos) {
1128+ *retlen = 0;
1129+ free(*ret);
1130+ return 1;
1131+ }
1132+
1133+ newbuf = realloc(*ret, retpos);
1134+ if (!newbuf) {
1135+ free(*ret);
1136+ return 0;
1137+ }
1138+ *ret = newbuf;
1139+ *retlen = retpos;
1140+
1141+ return 1;
1142+}
1143+
1144+
1145+int sjistofullkana(int len, unsigned char *buf, unsigned char **ret, int *retlen) {
1146+ int pos, tmplen, retpos=0;
1147+ char tmp[10];
1148+ char *newbuf;
1149+ int i, j;
1150+
1151+ if (!len) {
1152+ *retlen = 0;
1153+ return 1;
1154+ }
1155+
1156+ *retlen = len;
1157+ *ret = malloc(*retlen);
1158+ if (!*ret) {
1159+ return 0;
1160+ }
1161+
1162+ for (pos = 0; pos < len; pos++) {
1163+ tmplen=0;
1164+
1165+ if (ishankana(buf[pos])) {
1166+ for (i = 0; h_kana[i]; i++) {
1167+ for (j = 0; h_kana[i][j] && buf[pos+j]; j++) {
1168+ if (h_kana[i][j] != buf[pos+j]) {
1169+ break;
1170+ }
1171+ }
1172+ if (!h_kana[i][j]) {
1173+ const char *p;
1174+ for (p = sjis_f_kana[i]; *p; p++) {
1175+ tmp[tmplen++] = *p;
1176+ }
1177+ pos += j-1;
1178+ break;
1179+ }
1180+ }
1181+
1182+ if (!h_kana[i]) {
1183+ tmp[tmplen++] = buf[pos];
1184+ }
1185+ }
1186+ else if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) {
1187+ tmp[tmplen++] = buf[pos];
1188+ tmp[tmplen++] = buf[pos+1];
1189+ pos += 1;
1190+ } else {
1191+ tmp[tmplen++] = buf[pos];
1192+ }
1193+
1194+ if (tmplen) {
1195+ if (retpos + tmplen > *retlen) {
1196+ *retlen = *retlen + len / 2 + 16;
1197+ newbuf = realloc(*ret, *retlen);
1198+ if (!newbuf) {
1199+ free(*ret);
1200+ return 0;
1201+ }
1202+ *ret = newbuf;
1203+ }
1204+ memcpy(*ret+retpos, tmp, tmplen);
1205+ retpos += tmplen;
1206+ }
1207+ }
1208+
1209+ if (!retpos) {
1210+ *retlen = 0;
1211+ free(*ret);
1212+ return 1;
1213+ }
1214+
1215+ newbuf = realloc(*ret, retpos);
1216+ if (!newbuf) {
1217+ free(*ret);
1218+ return 0;
1219+ }
1220+ *ret = newbuf;
1221+ *retlen = retpos;
1222+
1223+ return 1;
1224+}
1225+
1226+int euctohankana(int len, unsigned char *buf, unsigned char **ret, int *retlen) {
1227+ int pos, tmplen, retpos=0;
1228+ char tmp[10];
1229+ char *newbuf;
1230+ int i;
1231+
1232+ if (!len) {
1233+ *retlen = 0;
1234+ return 1;
1235+ }
1236+
1237+ *retlen = len;
1238+ *ret = malloc(*retlen);
1239+ if (!*ret) {
1240+ return 0;
1241+ }
1242+
1243+ for (pos = 0; pos < len; pos++) {
1244+ tmplen=0;
1245+
1246+ if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) {
1247+ for (i = 0; euc_f_kana[i]; i++) {
1248+ if (buf[pos] == euc_f_kana[i][0] && buf[pos+1] == euc_f_kana[i][1]) {
1249+ tmp[tmplen++] = '\x8e';
1250+ tmp[tmplen++] = h_kana[i][0];
1251+ if (h_kana[i][1]) {
1252+ tmp[tmplen++] = '\x8e';
1253+ tmp[tmplen++] = h_kana[i][1];
1254+ }
1255+ break;
1256+ }
1257+ }
1258+ if (!euc_f_kana[i]) {
1259+ tmp[tmplen++] = buf[pos];
1260+ tmp[tmplen++] = buf[pos+1];
1261+ }
1262+ pos++;
1263+ }
1264+ else if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) {
1265+ tmp[tmplen++] = buf[pos];
1266+ tmp[tmplen++] = buf[pos+1];
1267+ pos++;
1268+ } else {
1269+ tmp[tmplen++] = buf[pos];
1270+ }
1271+
1272+ if (tmplen) {
1273+ if (retpos + tmplen > *retlen) {
1274+ *retlen = *retlen + len / 2 + 16;
1275+ newbuf = realloc(*ret, *retlen);
1276+ if (!newbuf) {
1277+ free(*ret);
1278+ return 0;
1279+ }
1280+ *ret = newbuf;
1281+ }
1282+ memcpy(*ret+retpos, tmp, tmplen);
1283+ retpos += tmplen;
1284+ }
1285+ }
1286+
1287+ if (!retpos) {
1288+ *retlen = 0;
1289+ free(*ret);
1290+ return 1;
1291+ }
1292+
1293+ newbuf = realloc(*ret, retpos);
1294+ if (!newbuf) {
1295+ free(*ret);
1296+ return 0;
1297+ }
1298+ *ret = newbuf;
1299+ *retlen = retpos;
1300+
1301+ return 1;
1302+}
1303+
1304+
1305+int euctofullkana(int len, unsigned char *buf, unsigned char **ret, int *retlen) {
1306+ int pos, tmplen, retpos=0;
1307+ char tmp[10];
1308+ char *newbuf;
1309+ int i, j;
1310+
1311+ if (!len) {
1312+ *retlen = 0;
1313+ return 1;
1314+ }
1315+
1316+ *retlen = len;
1317+ *ret = malloc(*retlen);
1318+ if (!*ret) {
1319+ return 0;
1320+ }
1321+
1322+ for (pos = 0; pos < len; pos++) {
1323+ tmplen=0;
1324+
1325+ if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) {
1326+ for (i = 0; euc_h_kana[i]; i++) {
1327+ for (j = 0; euc_h_kana[i][j] && buf[pos+j]; j++) {
1328+ if (euc_h_kana[i][j] != buf[pos+j]) {
1329+ break;
1330+ }
1331+ }
1332+ if (!euc_h_kana[i][j]) {
1333+ const char *p;
1334+ for (p = euc_f_kana[i]; *p; p++) {
1335+ tmp[tmplen++] = *p;
1336+ }
1337+ pos += j-1;
1338+ break;
1339+ }
1340+ }
1341+
1342+ if (!h_kana[i]) {
1343+ tmp[tmplen++] = buf[pos];
1344+ }
1345+ }
1346+ else if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) {
1347+ tmp[tmplen++] = buf[pos];
1348+ tmp[tmplen++] = buf[pos+1];
1349+ pos += 1;
1350+ } else {
1351+ tmp[tmplen++] = buf[pos];
1352+ }
1353+
1354+ if (tmplen) {
1355+ if (retpos + tmplen > *retlen) {
1356+ *retlen = *retlen + len / 2 + 16;
1357+ newbuf = realloc(*ret, *retlen);
1358+ if (!newbuf) {
1359+ free(*ret);
1360+ return 0;
1361+ }
1362+ *ret = newbuf;
1363+ }
1364+ memcpy(*ret+retpos, tmp, tmplen);
1365+ retpos += tmplen;
1366+ }
1367+ }
1368+
1369+ if (!retpos) {
1370+ *retlen = 0;
1371+ free(*ret);
1372+ return 1;
1373+ }
1374+
1375+ newbuf = realloc(*ret, retpos);
1376+ if (!newbuf) {
1377+ free(*ret);
1378+ return 0;
1379+ }
1380+ *ret = newbuf;
1381+ *retlen = retpos;
1382+
1383+ return 1;
1384+}
1385+
1386+
1387+#ifdef PYKF_MAIN
1388+
1389+
1390+void main() {
1391+/*
1392+
1393+ char *ret, *ret2, *ret3, *ret4, *ret5, *ret6, *ret7, *ret8;
1394+ int retlen, retlen2, retlen3, retlen4, retlen5, retlen6, retlen7, retlen8;
1395+ char *s1 = "\x82\xa0\xb1\x88\x9f\x61\x82\xa2\xb2\x8b\x8f\x62\x82\xa4\xb3\x89\x4b\x63\x82\xa6\xb4\x93\xbe\x64\x82\xa8\xb5\x94\xf6\x6f";
1396+ char *s2 = "アイウエオ";
1397+ char *gaiji = "\xf0\x40";
1398+ char *s3 = "あいうえお\x81";
1399+ char *s4 = "アイウエオカ";
1400+ char *s5 = "アイ";
1401+ int guessed;
1402+
1403+ guess(strlen(s1), s1, 1);
1404+ sjistohankana(strlen(s2), s2, &ret7, &retlen7);
1405+
1406+
1407+ sjistojis(strlen(s1), s1, &ret, &retlen);
1408+ jistoeuc(retlen, ret, &ret2, &retlen2);
1409+ guess(retlen2, ret2, 1);
1410+
1411+ euctosjis(retlen2, ret2, &ret3, &retlen3);
1412+ assert(strncmp(s1, ret3, strlen(s1))==0);
1413+
1414+ euctojis(retlen2, ret2, &ret4, &retlen4);
1415+ assert(strncmp(ret, ret4, retlen)==0);
1416+
1417+ sjistoeuc(strlen(s1), s1, &ret5, &retlen5);
1418+ assert(strncmp(ret2, ret5, strlen(ret2))==0);
1419+
1420+ jistosjis(retlen4, ret4, &ret6, &retlen6);
1421+ assert(strncmp(s1, ret6, strlen(s1))==0);
1422+
1423+ sjistoeuc(strlen(gaiji), gaiji, &ret7, &retlen7);
1424+
1425+ sjistojis(strlen(s5), s5, &ret8, &retlen8);
1426+
1427+ guessed = guess(strlen(s3), s3, 1);
1428+ assert(guessed == ERROR);
1429+
1430+ guessed = guess(strlen(s3), s3, 0);
1431+ assert(guessed == SJIS);
1432+
1433+ guessed = guess(strlen(s4), s4, 0);
1434+
1435+
1436+*/
1437+ char *s = "?";
1438+ char *ret;
1439+ int retlen;
1440+
1441+ sjistojis(strlen(s), s, &ret, &retlen, 0);
1442+
1443+}
1444+
1445+#endif
--- trunk/MANIFEST.in (revision 7)
+++ trunk/MANIFEST.in (revision 8)
@@ -1,2 +1,3 @@
11 include readme.sjis
2-
2+include src/convert.h
3+include src/pykf.h
Afficher sur ancien navigateur de dépôt.