[php-i18n-commits] cvs commit: php4/ext/mbstring mbfunction.c

Back to archive index

Tsukada ttsuk****@users*****
2002年 5月 23日 (木) 04:55:52 JST


ttsukada    02/05/23 04:55:52

  Modified:    ext/mbstring mbfunction.c
  Log:
  MBFL update
  
  Revision  Changes    Path
  1.3       +290 -16   php4/ext/mbstring/mbfunction.c
  
  Index: mbfunction.c
  ===================================================================
  RCS file: /cvsroot/php-i18n/php4/ext/mbstring/mbfunction.c,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- mbfunction.c	21 May 2002 20:31:26 -0000	1.2
  +++ mbfunction.c	22 May 2002 19:55:52 -0000	1.3
  @@ -7,10 +7,6 @@
   
   #include "php.h"
   #include "php_mb.h"
  -#include "php_mb_str.h"
  -#include "php_mb_buf.h"
  -#include "php_mb_filter.h"
  -#include "php_mb_function.h"
   #include "ext/standard/php_string.h"
   
   
  @@ -59,27 +55,43 @@
   }
   
   
  +/*  Return 0 if input contains any illegal encoding, otherwise 1.
  + *  Even if any illegal encoding is detected the result may contain a list 
  + *  of parsed encodings.
  + */
   PHPAPI int
  -php_mb_parse_encoding_list(const char *string_val, int string_len, php_mb_enc ***return_list, int *return_size, int persistent  TSRMLS_DC)
  +php_mb_parse_encoding_list(
  +    const char *string_val,
  +    int string_len,
  +    php_mb_enc ***return_list,
  +    int *return_size,
  +    int persistent  TSRMLS_DC)
   {
  -	int n, size;
  +	int i, n, size, bauto, result;
   	char *p, *p1, *p2, *endp, *tmpstr;
   	php_mb_enc **list, *encoding;
   
  +	result = 0;
  +	if (return_list) {
  +		*return_list = NULL;
  +	}
  +	if (return_size) {
  +		*return_size = 0;
  +	}
   	if (string_val == NULL) {
   		MBG(last_errno) = PHP_MB_ERR_NULL_POINTER;
  -		return FAILURE;
  +		return result;
   	}
   	if (string_len <= 0) {
   		MBG(last_errno) = PHP_MB_ERR_ILLEGAL_ARGUMENT;
  -		return FAILURE;
  +		return result;
   	}
   
  -	/* copy the value string for work */
  +	/* copy the string for work */
   	tmpstr = (char *)estrndup(string_val, string_len);
   	if (tmpstr == NULL) {
   		MBG(last_errno) = PHP_MB_ERR_NO_MEMORY;
  -		return FAILURE;
  +		return result;
   	}
   
   	/* count the number of listed encoding names */
  @@ -90,16 +102,18 @@
   		p1 = p2 + 1;
   		n++;
   	}
  -	size = n;
  +	size = n + MBG(language_r)->auto_detect_count;
   
   	/* make list */
   	list = (php_mb_enc **)pecalloc(size + 1, sizeof(php_mb_enc *), persistent);
   	if (list == NULL) {
   		MBG(last_errno) = PHP_MB_ERR_NO_MEMORY;
   		efree(tmpstr);
  -		return FAILURE;
  +		return result;
   	}
   
  +	result = 1;
  +	bauto = 0;
   	n = 0;
   	p1 = tmpstr;
   	do {
  @@ -120,25 +134,52 @@
   		/* convert to the encoding number and check encoding */
   		encoding = php_mb_enc_resolve(p1);
   		if (encoding != NULL) {
  -			list[n++] = encoding;
  +			if (encoding->id == php_mb_encid_auto) {
  +				if (!bauto) {
  +					bauto = 1;
  +					i = 0;
  +					while (i < MBG(language_r)->auto_detect_count) {
  +						list[n] = php_mb_enc_get(MBG(language_r)->auto_detect_list[i]);
  +						i++;
  +						n++;
  +					}
  +				}
  +			} else {
  +				list[n] = encoding;
  +				n++;
  +			}
  +		} else {
  +			result = 0;
   		}
   		p1 = p2 + 1;
   	} while (n < size && p2 != NULL);
   	list[n] = NULL;
   
  -	if (return_list != NULL) {
  -		*return_list = list;
  +	if (n > 0) {
  +		if (return_list) {
  +			*return_list = list;
  +		} else {
  +			pefree(list, persistent);
  +		}
   	} else {
   		pefree(list, persistent);
  +		result = 0;
   	}
   	if (return_size != NULL) {
   		*return_size = n;
   	}
   	efree(tmpstr);
   
  -	return SUCCESS;
  +	return result;
   }
   
  +/* {{{ php_mb_check_encoding_list */
  +PHPAPI int
  +php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
  +	return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
  +}
  +/* }}} */
  +
   
   /*
    * encoding converter
  @@ -228,6 +269,82 @@
   }
   
   
  +/* {{{ php_mb_convert_encoding */
  +PHPAPI char *
  +_php_mb_det_conv_encoding(
  +    const char *input,
  +    size_t length,
  +    const char *_to_encoding,
  +    const char *_from_encodings,
  +    size_t *output_len TSRMLS_DC)
  +{
  +	int size;
  +	char *output=NULL;
  +	php_mb_enc *from_encoding, *to_encoding, **list;
  +	php_mb_bufconv *convd;
  +
  +	if (output_len != NULL) {
  +		*output_len = 0;
  +	}
  +	if ( !input || !length) {
  +		return NULL;
  +	}
  +	/* new encoding */
  +	if (_to_encoding && strlen(_to_encoding)) {
  +		to_encoding = php_mb_enc_resolve(_to_encoding);
  +		if (to_encoding == NULL) {
  +			php_error(E_WARNING, "%s() unknown encoding \"%s\"",
  +					  get_active_function_name(TSRMLS_C), _to_encoding);
  +			return NULL;
  +		}
  +	} else {
  +		to_encoding = MBG(internal_encoding_r);
  +	}
  +
  +	/* pre-conversion encoding */
  +	if (_from_encodings != NULL) {
  +		list = NULL;
  +		size = 0;
  +		php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0  TSRMLS_CC);
  +		if (size == 1) {
  +			from_encoding = list[0];
  +		} else if (size > 1) {
  +			/* auto detect */
  +			from_encoding = php_mb_detect_encoding(input, length, list, size TSRMLS_CC);
  +			if (from_encoding == NULL) {
  +				php_error(E_WARNING, "%s() unable to detect character encoding",
  +						  get_active_function_name(TSRMLS_C));
  +				from_encoding = php_mb_enc_get(php_mb_encid_pass);
  +				to_encoding = from_encoding;
  +			}
  +		} else {
  +			php_error(E_WARNING, "$s() illegal character encoding specified",
  +					  get_active_function_name(TSRMLS_C));
  +		}
  +		if (list != NULL) {
  +			efree(list);
  +		}
  +	}
  +
  +	/* initialize converter */
  +	convd = php_mb_bufconv_create(to_encoding, from_encoding, length TSRMLS_CC);
  +	if (convd == NULL) {
  +		php_error(E_WARNING, "%s() unable to create character encoding converter",
  +				  get_active_function_name(TSRMLS_C));
  +		return NULL;
  +	}
  +
  +	/* do it */
  +	php_mb_bufconv_feed(convd, input, length TSRMLS_CC);
  +	php_mb_bufconv_flush(convd TSRMLS_CC);
  +	php_mb_bufconv_extract(convd, &output, output_len TSRMLS_CC);
  +	php_mb_bufconv_free(convd TSRMLS_CC);
  +
  +	return output;
  +}
  +/* }}} */
  +
  +
   PHPAPI php_mb_buf *
   _php_mb_convert_to_wchar(const char *string_val, int string_len, php_mb_enc *from  TSRMLS_DC)
   {
  @@ -2983,4 +3100,161 @@
   	return result;
   }
   
  +
  +
  +#ifdef ZEND_MULTIBYTE
  +
  +PHPAPI int
  +php_mbstring_set_zend_encoding(TSRMLS_D)
  +{
  +	zend_encoding_detector encoding_detector;
  +	zend_encoding_converter encoding_converter;
  +	zend_multibyte_oddlen multibyte_oddlen;
  +
  +	/* notify script encoding to Zend Engine */
  +	zend_multibyte_set_script_encoding(MBG(script_encoding), strlen(MBG(script_encoding)) TSRMLS_CC);
  +
  +	encoding_detector = php_mbstring_encoding_detector;
  +	encoding_converter = NULL;
  +	multibyte_oddlen = php_mbstring_oddlen;
  +
  +#if defined(MBSTR_ENC_TRANS)
  +	/* notify internal encoding to Zend Engine */
  +	zend_multibyte_set_internal_encoding((char*)MBG(internal_encoding_r)->name, strlen(MBG(internal_encoding_r)->name) TSRMLS_CC);
  +
  +	encoding_converter = php_mbstring_encoding_converter;
  +#endif /* defined(MBSTR_ENC_TRANS) */
  +
  +	zend_multibyte_set_functions(encoding_detector, encoding_converter,
  +			multibyte_oddlen TSRMLS_CC);
  +
  +	return 0;
  +}
  +
  +/*
  + *	mb_detect_encoding (interface for Zend Engine)
  + */
  +char *
  +php_mbstring_encoding_detector(char *arg_string, int arg_length, char *arg_list TSRMLS_DC)
  +{
  +	php_mb_enc *encoding, **elist;
  +	int size;
  +
  +	/* make encoding list */
  +	list = NULL;
  +	size = 0;
  +	php_mb_parse_encoding_list(arg_list, strlen(arg_list), &elist, &size, 0 TSRMLS_CC);
  +	if (size <= 0) {
  +		return NULL;
  +	}
  +
  +	if (elist == NULL) {
  +		php_mb_parse_encoding_list(MBG(detect_order), strlen(MBG(detect_order)), &elist, &size, 0 TSRMLS_CC);
  +	}
  +
  +	encoding = php_mb_detect_encoding(arg_string, arg_length, elist, size TSRMLS_CC);
  +	if (elist != NULL) {
  +		efree(elist);
  +	}
  +	if (encoding != NULL) {
  +		return estrdup(encoding->name);
  +	} else {
  +		return NULL;
  +	}
  +}
  +
  +
  +/*
  + *	mb_convert_encoding (interface for Zend Engine)
  + */
  +int
  +php_mbstring_encoding_converter(
  +    char **to,
  +    int *to_length,
  +    char *from,
  +    int from_length,
  +    const char *encoding_to,
  +    const char *encoding_from
  +    TSRMLS_DC)
  +{
  +	int result;
  +	php_mb_enc *from_encoding, *to_encoding;
  +	php_mb_bufconv *convd;
  +
  +	/* new encoding */
  +	to_encoding = php_mb_enc_resolve(encoding_to);
  +	if (to_encoding == NULL)
  +		return -1;
  +	
  +	/* old encoding */
  +	from_encoding = php_mb_enc_resolve(encoding_from);
  +	if (from_encoding == NULL)
  +		return -1;
  +
  +	/* initialize converter */
  +	convd = php_mb_bufconv_create(to_encoding, from_encoding, from_length  TSRMLS_CC);
  +	if (convd == NULL)
  +		return -1;
  +
  +	/* do it */
  +	php_mb_bufconv_feed(convd, from, from_length  TSRMLS_CC);
  +	php_mb_bufconv_flush(convd  TSRMLS_CC);
  +	php_mb_bufconv_extruct(convd, to, to_length  TSRMLS_CC);
  +	php_mb_bufconv_free(convd);
  +
  +	return 0;
  +}
  +
  +
  +/*
  + *	returns number of odd (e.g. appears only first byte of multibyte
  + *	character) chars
  + */
  +int php_mbstring_oddlen(char *string, int length, const char *enc_name TSRMLS_DC)
  +{
  +	int n, m, k;
  +	unsigned char *p;
  +	const unsigned char *mbtab;
  +	php_mb_enc *encoding;
  +
  +	encoding = php_mb_enc_resolve(enc_name);
  +	if (encoding == NULL) {
  +		return 0;
  +	}
  +
  +	if (encoding->flag & MBFL_ENCTYPE_SBCS) {
  +		return 0;
  +	} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
  +		return length % 2;
  +	} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
  +		return length % 4;
  +	} else if (encoding->mblen_table != NULL) {
  + 		mbtab = encoding->mblen_table;
  + 		n = 0;
  +		p = string;
  +		k = length;
  +		/* count */
  +		if (p != NULL) {
  +			while (n < k) {
  +				m = mbtab[*p];
  +				n += m;
  +				p += m;
  +			}
  +		}
  +		return n-k;
  +	} else {
  +		/* how can i do ? */
  +		return 0;
  +	}
  +	/* NOT REACHED */
  +}
  +
  +#else
  +
  +PHPAPI int
  +php_mbstring_set_zend_encoding(TSRMLS_D){
  +	return 0;
  +}
  +
  +#endif /* ZEND_MULTIBYTE */
   
  
  
  



php-i18n-commits メーリングリストの案内
Back to archive index