Tsukada
ttsuk****@users*****
2002年 5月 23日 (木) 04:55:52 JST
ttsukada 02/05/23 04:55:52 Modified: ext/mbstring mbfunction.c Log: MBFL update Revision Changes Path 1.3 +290 -16 php4/ext/mbstring/mbfunction.c Index: mbfunction.c =================================================================== RCS file: /cvsroot/php-i18n/php4/ext/mbstring/mbfunction.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mbfunction.c 21 May 2002 20:31:26 -0000 1.2 +++ mbfunction.c 22 May 2002 19:55:52 -0000 1.3 @@ -7,10 +7,6 @@ #include "php.h" #include "php_mb.h" -#include "php_mb_str.h" -#include "php_mb_buf.h" -#include "php_mb_filter.h" -#include "php_mb_function.h" #include "ext/standard/php_string.h" @@ -59,27 +55,43 @@ } +/* Return 0 if input contains any illegal encoding, otherwise 1. + * Even if any illegal encoding is detected the result may contain a list + * of parsed encodings. + */ PHPAPI int -php_mb_parse_encoding_list(const char *string_val, int string_len, php_mb_enc ***return_list, int *return_size, int persistent TSRMLS_DC) +php_mb_parse_encoding_list( + const char *string_val, + int string_len, + php_mb_enc ***return_list, + int *return_size, + int persistent TSRMLS_DC) { - int n, size; + int i, n, size, bauto, result; char *p, *p1, *p2, *endp, *tmpstr; php_mb_enc **list, *encoding; + result = 0; + if (return_list) { + *return_list = NULL; + } + if (return_size) { + *return_size = 0; + } if (string_val == NULL) { MBG(last_errno) = PHP_MB_ERR_NULL_POINTER; - return FAILURE; + return result; } if (string_len <= 0) { MBG(last_errno) = PHP_MB_ERR_ILLEGAL_ARGUMENT; - return FAILURE; + return result; } - /* copy the value string for work */ + /* copy the string for work */ tmpstr = (char *)estrndup(string_val, string_len); if (tmpstr == NULL) { MBG(last_errno) = PHP_MB_ERR_NO_MEMORY; - return FAILURE; + return result; } /* count the number of listed encoding names */ @@ -90,16 +102,18 @@ p1 = p2 + 1; n++; } - size = n; + size = n + MBG(language_r)->auto_detect_count; /* make list */ list = (php_mb_enc **)pecalloc(size + 1, sizeof(php_mb_enc *), persistent); if (list == NULL) { MBG(last_errno) = PHP_MB_ERR_NO_MEMORY; efree(tmpstr); - return FAILURE; + return result; } + result = 1; + bauto = 0; n = 0; p1 = tmpstr; do { @@ -120,25 +134,52 @@ /* convert to the encoding number and check encoding */ encoding = php_mb_enc_resolve(p1); if (encoding != NULL) { - list[n++] = encoding; + if (encoding->id == php_mb_encid_auto) { + if (!bauto) { + bauto = 1; + i = 0; + while (i < MBG(language_r)->auto_detect_count) { + list[n] = php_mb_enc_get(MBG(language_r)->auto_detect_list[i]); + i++; + n++; + } + } + } else { + list[n] = encoding; + n++; + } + } else { + result = 0; } p1 = p2 + 1; } while (n < size && p2 != NULL); list[n] = NULL; - if (return_list != NULL) { - *return_list = list; + if (n > 0) { + if (return_list) { + *return_list = list; + } else { + pefree(list, persistent); + } } else { pefree(list, persistent); + result = 0; } if (return_size != NULL) { *return_size = n; } efree(tmpstr); - return SUCCESS; + return result; } +/* {{{ php_mb_check_encoding_list */ +PHPAPI int +php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) { + return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC); +} +/* }}} */ + /* * encoding converter @@ -228,6 +269,82 @@ } +/* {{{ php_mb_convert_encoding */ +PHPAPI char * +_php_mb_det_conv_encoding( + const char *input, + size_t length, + const char *_to_encoding, + const char *_from_encodings, + size_t *output_len TSRMLS_DC) +{ + int size; + char *output=NULL; + php_mb_enc *from_encoding, *to_encoding, **list; + php_mb_bufconv *convd; + + if (output_len != NULL) { + *output_len = 0; + } + if ( !input || !length) { + return NULL; + } + /* new encoding */ + if (_to_encoding && strlen(_to_encoding)) { + to_encoding = php_mb_enc_resolve(_to_encoding); + if (to_encoding == NULL) { + php_error(E_WARNING, "%s() unknown encoding \"%s\"", + get_active_function_name(TSRMLS_C), _to_encoding); + return NULL; + } + } else { + to_encoding = MBG(internal_encoding_r); + } + + /* pre-conversion encoding */ + if (_from_encodings != NULL) { + list = NULL; + size = 0; + php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC); + if (size == 1) { + from_encoding = list[0]; + } else if (size > 1) { + /* auto detect */ + from_encoding = php_mb_detect_encoding(input, length, list, size TSRMLS_CC); + if (from_encoding == NULL) { + php_error(E_WARNING, "%s() unable to detect character encoding", + get_active_function_name(TSRMLS_C)); + from_encoding = php_mb_enc_get(php_mb_encid_pass); + to_encoding = from_encoding; + } + } else { + php_error(E_WARNING, "$s() illegal character encoding specified", + get_active_function_name(TSRMLS_C)); + } + if (list != NULL) { + efree(list); + } + } + + /* initialize converter */ + convd = php_mb_bufconv_create(to_encoding, from_encoding, length TSRMLS_CC); + if (convd == NULL) { + php_error(E_WARNING, "%s() unable to create character encoding converter", + get_active_function_name(TSRMLS_C)); + return NULL; + } + + /* do it */ + php_mb_bufconv_feed(convd, input, length TSRMLS_CC); + php_mb_bufconv_flush(convd TSRMLS_CC); + php_mb_bufconv_extract(convd, &output, output_len TSRMLS_CC); + php_mb_bufconv_free(convd TSRMLS_CC); + + return output; +} +/* }}} */ + + PHPAPI php_mb_buf * _php_mb_convert_to_wchar(const char *string_val, int string_len, php_mb_enc *from TSRMLS_DC) { @@ -2983,4 +3100,161 @@ return result; } + + +#ifdef ZEND_MULTIBYTE + +PHPAPI int +php_mbstring_set_zend_encoding(TSRMLS_D) +{ + zend_encoding_detector encoding_detector; + zend_encoding_converter encoding_converter; + zend_multibyte_oddlen multibyte_oddlen; + + /* notify script encoding to Zend Engine */ + zend_multibyte_set_script_encoding(MBG(script_encoding), strlen(MBG(script_encoding)) TSRMLS_CC); + + encoding_detector = php_mbstring_encoding_detector; + encoding_converter = NULL; + multibyte_oddlen = php_mbstring_oddlen; + +#if defined(MBSTR_ENC_TRANS) + /* notify internal encoding to Zend Engine */ + zend_multibyte_set_internal_encoding((char*)MBG(internal_encoding_r)->name, strlen(MBG(internal_encoding_r)->name) TSRMLS_CC); + + encoding_converter = php_mbstring_encoding_converter; +#endif /* defined(MBSTR_ENC_TRANS) */ + + zend_multibyte_set_functions(encoding_detector, encoding_converter, + multibyte_oddlen TSRMLS_CC); + + return 0; +} + +/* + * mb_detect_encoding (interface for Zend Engine) + */ +char * +php_mbstring_encoding_detector(char *arg_string, int arg_length, char *arg_list TSRMLS_DC) +{ + php_mb_enc *encoding, **elist; + int size; + + /* make encoding list */ + list = NULL; + size = 0; + php_mb_parse_encoding_list(arg_list, strlen(arg_list), &elist, &size, 0 TSRMLS_CC); + if (size <= 0) { + return NULL; + } + + if (elist == NULL) { + php_mb_parse_encoding_list(MBG(detect_order), strlen(MBG(detect_order)), &elist, &size, 0 TSRMLS_CC); + } + + encoding = php_mb_detect_encoding(arg_string, arg_length, elist, size TSRMLS_CC); + if (elist != NULL) { + efree(elist); + } + if (encoding != NULL) { + return estrdup(encoding->name); + } else { + return NULL; + } +} + + +/* + * mb_convert_encoding (interface for Zend Engine) + */ +int +php_mbstring_encoding_converter( + char **to, + int *to_length, + char *from, + int from_length, + const char *encoding_to, + const char *encoding_from + TSRMLS_DC) +{ + int result; + php_mb_enc *from_encoding, *to_encoding; + php_mb_bufconv *convd; + + /* new encoding */ + to_encoding = php_mb_enc_resolve(encoding_to); + if (to_encoding == NULL) + return -1; + + /* old encoding */ + from_encoding = php_mb_enc_resolve(encoding_from); + if (from_encoding == NULL) + return -1; + + /* initialize converter */ + convd = php_mb_bufconv_create(to_encoding, from_encoding, from_length TSRMLS_CC); + if (convd == NULL) + return -1; + + /* do it */ + php_mb_bufconv_feed(convd, from, from_length TSRMLS_CC); + php_mb_bufconv_flush(convd TSRMLS_CC); + php_mb_bufconv_extruct(convd, to, to_length TSRMLS_CC); + php_mb_bufconv_free(convd); + + return 0; +} + + +/* + * returns number of odd (e.g. appears only first byte of multibyte + * character) chars + */ +int php_mbstring_oddlen(char *string, int length, const char *enc_name TSRMLS_DC) +{ + int n, m, k; + unsigned char *p; + const unsigned char *mbtab; + php_mb_enc *encoding; + + encoding = php_mb_enc_resolve(enc_name); + if (encoding == NULL) { + return 0; + } + + if (encoding->flag & MBFL_ENCTYPE_SBCS) { + return 0; + } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { + return length % 2; + } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { + return length % 4; + } else if (encoding->mblen_table != NULL) { + mbtab = encoding->mblen_table; + n = 0; + p = string; + k = length; + /* count */ + if (p != NULL) { + while (n < k) { + m = mbtab[*p]; + n += m; + p += m; + } + } + return n-k; + } else { + /* how can i do ? */ + return 0; + } + /* NOT REACHED */ +} + +#else + +PHPAPI int +php_mbstring_set_zend_encoding(TSRMLS_D){ + return 0; +} + +#endif /* ZEND_MULTIBYTE */