这就是我使用 ICU 在 std::string (在 UTF-8 中)和 std::wstring 之间转换的方式
/** Converts a std::wstring into a std::string with UTF-8 encoding.
*/
template < typename StringT >
StringT utf8 ( std::wstring const & rc_string );
/** Converts a std::String with UTF-8 encoding into a std::wstring.
*/
template < typename StringT >
StringT utf8 ( std::string const & rc_string );
/** Nop specialization for std::string.
*/
template < >
inline std::string utf8 ( std::string const & rc_string )
{
return rc_string;
}
/** Nop specialization for std::wstring.
*/
template < >
inline std::wstring utf8 ( std::wstring const & rc_string )
{
return rc_string;
}
template < >
std::string utf8 ( std::wstring const & rc_string )
{
std::string result;
if(rc_string.empty())
return result;
std::vector<UChar> buffer;
result.resize(rc_string.size() * 3); // UTF-8 uses max 3 bytes per char
buffer.resize(rc_string.size() * 2); // UTF-16 uses max 2 bytes per char
UErrorCode status = U_ZERO_ERROR;
int32_t len = 0;
u_strFromWCS(
&buffer[0],
buffer.size(),
&len,
&rc_string[0],
rc_string.size(),
&status
);
if(!U_SUCCESS(status))
{
throw XXXException("utf8: u_strFromWCS failed");
}
buffer.resize(len);
u_strToUTF8(
&result[0],
result.size(),
&len,
&buffer[0],
buffer.size(),
&status
);
if(!U_SUCCESS(status))
{
throw XXXException("utf8: u_strToUTF8 failed");
}
result.resize(len);
return result;
}/* end of utf8 ( ) */
template < >
std::wstring utf8 ( std::string const & rc_string )
{
std::wstring result;
if(rc_string.empty())
return result;
std::vector<UChar> buffer;
result.resize(rc_string.size());
buffer.resize(rc_string.size());
UErrorCode status = U_ZERO_ERROR;
int32_t len = 0;
u_strFromUTF8(
&buffer[0],
buffer.size(),
&len,
&rc_string[0],
rc_string.size(),
&status
);
if(!U_SUCCESS(status))
{
throw XXXException("utf8: u_strFromUTF8 failed");
}
buffer.resize(len);
u_strToWCS(
&result[0],
result.size(),
&len,
&buffer[0],
buffer.size(),
&status
);
if(!U_SUCCESS(status))
{
throw XXXException("utf8: u_strToWCS failed");
}
result.resize(len);
return result;
}/* end of utf8 ( ) */
使用它就这么简单:
std::string s = utf8<std::string>(std::wstring(L"some string"));
std::wstring s = utf8<std::wstring>(std::string("some string"));