如何在应用程序将字符接收为 UTF-8 编码 %C3%BA 并将其存储为 unicode 等效 %FA 的 C++ 应用程序中转换 ú。我只想知道我将如何编写代码来执行此编码过程
问问题
16175 次
2 回答
8
我昨天刚刚写了一些代码来做到这一点......
我并不是说这是执行此操作的“完美”方式,但它似乎适用于我运行过的所有测试用例(为此我编写了两个方向)。
我将把它留给您将“%NN”转换为整数值。
#include <iostream>
#include <deque>
std::deque<int> unicode_to_utf8(int charcode)
{
std::deque<int> d;
if (charcode < 128)
{
d.push_back(charcode);
}
else
{
int first_bits = 6;
const int other_bits = 6;
int first_val = 0xC0;
int t = 0;
while (charcode >= (1 << first_bits))
{
{
t = 128 | (charcode & ((1 << other_bits)-1));
charcode >>= other_bits;
first_val |= 1 << (first_bits);
first_bits--;
}
d.push_front(t);
}
t = first_val | charcode;
d.push_front(t);
}
return d;
}
int utf8_to_unicode(std::deque<int> &coded)
{
int charcode = 0;
int t = coded.front();
coded.pop_front();
if (t < 128)
{
return t;
}
int high_bit_mask = (1 << 6) -1;
int high_bit_shift = 0;
int total_bits = 0;
const int other_bits = 6;
while((t & 0xC0) == 0xC0)
{
t <<= 1;
t &= 0xff;
total_bits += 6;
high_bit_mask >>= 1;
high_bit_shift++;
charcode <<= other_bits;
charcode |= coded.front() & ((1 << other_bits)-1);
coded.pop_front();
}
charcode |= ((t >> high_bit_shift) & high_bit_mask) << total_bits;
return charcode;
}
int main()
{
int charcode;
for(;;)
{
std::cout << "Enter unicode value:" << std::endl;
std::cin >> charcode;
auto x = unicode_to_utf8(charcode);
for(auto c : x)
{
std::cout << "\\x" << std::hex << c << " ";
}
std::cout << std::endl;
int c = utf8_to_unicode(x);
std::cout << "reversed:" << std::dec << c << std::hex << " in hex:" << c << std::endl;
}
}
于 2013-08-30T14:00:21.670 回答
1
这实际上在标准库中:
#include <string>
#include <codecvt> // for std::codecvt_utf8
#include <locale> // for std::wstring_convert
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv_utf8_utf32;
int main() {
std::string utf8_bytes = "ú";
std::u32string unicode_codepoints = conv_utf8_utf32.from_bytes(utf8_bytes);
return 0;
}
另一种方法是使用conv_utf8_utf32.to_bytes
.
%hex
使用 printf以您的格式打印的示例:
#include <string>
#include <codecvt> // for std::codecvt_utf8
#include <locale> // for std::wstring_convert
#include <cstdio>
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv_utf8_utf32;
int main() {
std::string utf8_bytes = "ú";
// print the bytes in %hex format
for (char byte: utf8_bytes) {
printf("%%%2X", reinterpret_cast<unsigned char&>(byte));
}
printf("\n");
std::u32string unicode_codepoints = conv_utf8_utf32.from_bytes(utf8_bytes);
// print the code points in %hex format
for (char32_t chr: unicode_codepoints) {
printf("%%%2X", chr);
}
printf("\n");
return 0;
}
于 2020-01-14T09:37:25.130 回答