Revision: 8824 https://osdn.net/projects/ttssh2/scm/svn/commits/8824 Author: zmatsuo Date: 2020-07-03 23:43:08 +0900 (Fri, 03 Jul 2020) Log Message: ----------- UTF-32 文字列変換追加 Modified Paths: -------------- trunk/teraterm/common/codeconv.cpp trunk/teraterm/common/codeconv.h -------------- next part -------------- Modified: trunk/teraterm/common/codeconv.cpp =================================================================== --- trunk/teraterm/common/codeconv.cpp 2020-07-03 14:42:55 UTC (rev 8823) +++ trunk/teraterm/common/codeconv.cpp 2020-07-03 14:43:08 UTC (rev 8824) @@ -732,6 +732,71 @@ utf32_to_mb); } +/** + * wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82\xF0UTF32\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7 + * + * @param[in] *wstr_ptr wchar_t\x95\xB6\x8E\x9A\x97\xF1 + * @param[in,out] *wstr_len wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7 + * NULL\x82܂\xBD\x82\xCD*wstr_len==0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE(L'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6) + * NULL\x88ȊO\x82̂Ƃ\xAB\x93\xFC\x97͂\xB5\x82\xBD\x95\xB6\x8E\x9A\x90\x94\x82\xF0\x95Ԃ\xB7 + * @param[in] *u32_ptr \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8E\xFB\x94[\x82\xB7\x82\xE9\x83|\x83C\x83\x93\x83^ + * (NULL\x82̂Ƃ\xAB\x95ϊ\xB7\x82\xB9\x82\xB8\x82ɕ\xB6\x8E\x9A\x90\x94\x82\xF0\x83J\x83E\x83\x93\x83g\x82\xB7\x82\xE9) + * @param[in,out] *u32_len \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8E\xFB\x94[\x82ł\xAB\x82\xE9\x83T\x83C\x83Y,byte\x90\x94, + * \x95ϊ\xB7\x82\xB5\x82\xBD\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82̒\xB7\x82\xB3\x82\xF0\x95Ԃ\xB7 + * L'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7'\0'\x82\xE0\x8A܂\xDE + * u32_ptr\x82\xAANULL\x82̂Ƃ\xAB\x82ł\xE0\x92\xB7\x82\xB3\x82͕Ԃ\xB7 + */ +void WideCharToUTF32(const wchar_t *wstr_ptr, size_t *wstr_len_, + char32_t *u32_ptr, size_t *u32_len_) +{ + size_t wstr_len; + size_t u32_len; + size_t u32_out = 0; + size_t wstr_in = 0; + + assert(wstr_ptr != NULL); + if (u32_ptr == NULL) { + // \x95ϊ\xB7\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x8F\x91\x82\xAB\x8Fo\x82\xB3\x82Ȃ\xA2 + u32_len = 4; // 1\x95\xB6\x8E\x9A4byte\x82ɂ͎\xFB\x82܂\xE9\x82͂\xB8 + } else { + u32_len = *u32_len_; + } + if (wstr_len_ == NULL || *wstr_len_ == 0) { + wstr_len = (int)wcslen(wstr_ptr) + 1; + } else { + wstr_len = *wstr_len_; + } + + while(u32_len > 0 && wstr_len > 0) { + char32_t u32; + unsigned int u32_; + size_t wb_in = UTF16ToUTF32(wstr_ptr, wstr_len, &u32_); + u32 = u32_; + if (wb_in == 0) { + // \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA2\x8Fꍇ\x81A1\x95\xB6\x8E\x9A\x8F\xC1\x94\x82\xC4'?'\x8Fo\x97\xCD + wstr_len -= 1; + wstr_in += 1; + wstr_ptr++; + u32 = '?'; + } + else { + wstr_len -= wb_in; + wstr_in += wb_in; + wstr_ptr += wb_in; + } + if (u32_ptr != NULL) { + *u32_ptr++ = u32; + u32_len--; + } + u32_out++; + } + + if (wstr_len_ != NULL) { + *wstr_len_ = wstr_in; + } + *u32_len_ = u32_out; +} + // MultiByteToWideChar\x82\xCCUTF8\x93\xC1\x89\xBB\x94\xC5 int UTF8ToWideChar(const char *u8_ptr, int u8_len_, wchar_t *wstr_ptr, int wstr_len_) { @@ -865,6 +930,48 @@ } /** + * wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82\xF0UTF-32\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7 + * \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA2\x95\xB6\x8E\x9A\x82\xCD '?' \x82ŏo\x97͂\xB7\x82\xE9 + * + * @param[in] *wstr_ptr wchar_t\x95\xB6\x8E\x9A\x97\xF1 + * @param[in] wstr_len wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81A\x8E\xA9\x93\xAE\x82̂Ƃ\xAB\x82\xCDL'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6) + * @param[out] *u32_len_ \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x92\xB7,byte\x90\x94,'\0'\x82\xF0\x95ϊ\xB7\x82\xB5\x82\xBD\x82\xE7'\0'\x82\xE0\x8A܂\xDE + * (NULL\x82̂Ƃ\xAB\x95\xB6\x8E\x9A\x97\xF0\x95Ԃ\xB3\x82Ȃ\xA2) + * @retval UTF-32\x95\xB6\x8E\x9A\x97\xF1\x82ւ̃|\x83C\x83\x93\x83^(NULL\x82̎\x9E\x95ϊ\xB7\x83G\x83\x89\x81[) + * \x8Eg\x97p\x8C\xE3 free() \x82\xB7\x82邱\x82\xC6 + */ +char32_t *_WideCharToUTF32(const wchar_t *wstr_ptr, size_t wstr_len, size_t *u32_len_) +{ + const DWORD flags = 0; + if (u32_len_ != NULL) { + *u32_len_ = 0; + } + if (wstr_len == 0) { + wstr_len = wcslen(wstr_ptr) + 1; + } + size_t u32_len; + size_t wl = wstr_len; + WideCharToUTF32(wstr_ptr, &wl, NULL, &u32_len); + if (u32_len == 0) { + return NULL; + } + char32_t *u32_ptr = (char32_t *)malloc(u32_len * 4); + if (u32_ptr == NULL) { + return NULL; + } + WideCharToUTF32(wstr_ptr, &wl, u32_ptr, &u32_len); + if (u32_len == 0) { + free(u32_ptr); + return NULL; + } + if (u32_len_ != NULL) { + // \x95ϊ\xB7\x82\xB5\x82\xBD\x95\xB6\x8E\x9A\x97\xF1\x90\x94(byte\x90\x94)\x82\xF0\x95Ԃ\xB7 + *u32_len_ = u32_len; + } + return u32_ptr; +} + +/** * \x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82\xF0wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7 * @param[in] *str_ptr mb(char)\x95\xB6\x8E\x9A\x97\xF1 * @param[in] str_len mb(char)\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81A\x8E\xA9\x93\xAE\x82̂Ƃ\xAB\x82\xCD'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6) @@ -989,6 +1096,12 @@ return strU8; } +char32_t *ToU32W(const wchar_t *strW) +{ + char32_t *strU32 = _WideCharToUTF32(strW, NULL, NULL); + return strU32; +} + ////////////////////////////////////////////////////////////////////////////// u8::u8() Modified: trunk/teraterm/common/codeconv.h =================================================================== --- trunk/teraterm/common/codeconv.h 2020-07-03 14:42:55 UTC (rev 8823) +++ trunk/teraterm/common/codeconv.h 2020-07-03 14:43:08 UTC (rev 8824) @@ -54,10 +54,13 @@ // MultiByteToWideChar() wrappers void WideCharToUTF8(const wchar_t *wstr_ptr, size_t *wstr_len, char *u8_ptr, size_t *u8_len); void WideCharToCP932(const wchar_t *wstr_ptr, size_t *wstr_len, char *cp932_ptr, size_t *cp932_len); +void WideCharToUTF32(const wchar_t *wstr_ptr, size_t *wstr_len_, + char32_t *u32_ptr, size_t *u32_len_); int UTF8ToWideChar(const char *u8_ptr, int u8_len, wchar_t *wstr_ptr, int wstr_len); // API wrappers char *_WideCharToMultiByte(const wchar_t *wstr_ptr, size_t wstr_len, int code_page, size_t *mb_len_); +char32_t *_WideCharToUTF32(const wchar_t *wstr_ptr, size_t wstr_len, size_t *u32_len_); wchar_t *_MultiByteToWideChar(const char *str_ptr, size_t str_len, int code_page, size_t *w_len_); // convinience funcs (for windows api params) @@ -69,6 +72,7 @@ wchar_t *ToWcharU8(const char *strU8); char *ToU8A(const char *strA); char *ToU8W(const wchar_t *strW); +char32_t *ToU32W(const wchar_t *strW); #if defined(_UNICODE) #define ToTcharA(s) ToWcharA(s)