所以我们得到一个字符串 Новая папка
,它是 utf-16 编码行(Новая папка
在 utf-16 中)的 utf-8 表示,我们希望将此字符串转换为 wstring 而不更改编码.. 意思是从字面上将所有数据从字符串带到 wstring 而无需任何转换. 所以我们会得到 wstring 的Новая папка
内容。怎么做这样的事情?
更新:
我的意思是 - 我们在字符串中拥有正确 utf-16 字符串的所有数据。我们所需要做的就是将该数据放入 wstring... 这意味着如果 wstring 包含 wchar 可能恰好是0000
我们必须将 2 个字符串字符00
一起00
才能得到它。那就是我不知道该怎么做。
Update2 我是如何来到这里的——我有义务在我的服务器上使用的 C++ 库是 C 风格的解析器。它以 std::string 的形式返回我的用户请求地址。当我让我的客户以这种格式向我发送请求时。
url_encode(UTF16toUTF8(wstring)) //pseudocode.
在哪里
string UTF16toUTF8(const wstring & in)
{
string out;
unsigned int codepoint;
bool completecode = false;
for (wstring::const_iterator p = in.begin(); p != in.end(); ++p)
{
if (*p >= 0xd800 && *p <= 0xdbff)
{
codepoint = ((*p - 0xd800) << 10) + 0x10000;
completecode = false;
}
else if (!completecode && *p >= 0xdc00 && *p <= 0xdfff)
{
codepoint |= *p - 0xdc00;
completecode = true;
}
else
{
codepoint = *p;
completecode = true;
}
if (completecode)
{
if (codepoint <= 0x7f)
out.push_back(codepoint);
else if (codepoint <= 0x7ff)
{
out.push_back(0xc0 | ((codepoint >> 6) & 0x1f));
out.push_back(0x80 | (codepoint & 0x3f));
}
else if (codepoint <= 0xffff)
{
out.push_back(0xe0 | ((codepoint >> 12) & 0x0f));
out.push_back(0x80 | ((codepoint >> 6) & 0x3f));
out.push_back(0x80 | (codepoint & 0x3f));
}
else
{
out.push_back(0xf0 | ((codepoint >> 18) & 0x07));
out.push_back(0x80 | ((codepoint >> 12) & 0x3f));
out.push_back(0x80 | ((codepoint >> 6) & 0x3f));
out.push_back(0x80 | (codepoint & 0x3f));
}
}
}
return out;
}
std::string url_encode( std::string sSrc )
{
const char SAFE[256] =
{
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
/* 0 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 1 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 2 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 3 */ 1,1,1,1, 1,1,1,1, 1,1,0,0, 0,0,0,0,
/* 4 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
/* 5 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
/* 6 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
/* 7 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
/* 8 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 9 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* A */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* B */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* C */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* D */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* E */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* F */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0
};
const char DEC2HEX[16 + 1] = "0123456789ABCDEF";
const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
const int SRC_LEN = sSrc.length();
unsigned char * const pStart = new unsigned char[SRC_LEN * 3];
unsigned char * pEnd = pStart;
const unsigned char * const SRC_END = pSrc + SRC_LEN;
for (; pSrc < SRC_END; ++pSrc)
{
if (SAFE[*pSrc])
*pEnd++ = *pSrc;
else
{
// escape this char
*pEnd++ = '%';
*pEnd++ = DEC2HEX[*pSrc >> 4];
*pEnd++ = DEC2HEX[*pSrc & 0x0F];
}
}
std::string sResult((char *)pStart, (char *)pEnd);
delete [] pStart;
return sResult;
}
std::string url_decode( std::string sSrc )
{
// Note from RFC1630: "Sequences which start with a percent sign
// but are not followed by two hexadecimal characters (0-9, A-F) are reserved
// for future extension"
const char HEX2DEC[256] =
{
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
/* 0 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* 1 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* 2 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* 3 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
/* 4 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* 5 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* 6 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* 7 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* 8 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* 9 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* A */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* B */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* C */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* D */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* E */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
/* F */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
};
const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
const int SRC_LEN = sSrc.length();
const unsigned char * const SRC_END = pSrc + SRC_LEN;
const unsigned char * const SRC_LAST_DEC = SRC_END - 2; // last decodable '%'
char * const pStart = new char[SRC_LEN];
char * pEnd = pStart;
while (pSrc < SRC_LAST_DEC)
{
if (*pSrc == '%')
{
char dec1, dec2;
if (-1 != (dec1 = HEX2DEC[*(pSrc + 1)])
&& -1 != (dec2 = HEX2DEC[*(pSrc + 2)]))
{
*pEnd++ = (dec1 << 4) + dec2;
pSrc += 3;
continue;
}
}
*pEnd++ = *pSrc++;
}
// the last 2- chars
while (pSrc < SRC_END)
*pEnd++ = *pSrc++;
std::string sResult(pStart, pEnd);
delete [] pStart;
return sResult;
}
当然我调用 url_decode,但我得到一个字符串..( 所以我希望现在我的问题更清楚了。