0

我正在使用 Linux 系统,我认为标准 Linuxstd::string支持 Unicode 和 ASCII 字符。所以,我想std::string在我的代码中使用,但我从应用程序接收格式为std::basic_string<unsigned short int, TRAIT_CLASS>(支持 Windows 和 Linux)的字符串。TRAITS_CLASS如下:

class TRAITS_CLASS
{
    public:
        typedef unsigned short char_type;
        typedef unsigned short int_type;
        typedef size_t pos_type;
        typedef size_t off_type;
        typedef int state_type;

        static inline void assign(unsigned short &dest, const unsigned short &src)
        {
            dest = src;
        }

        static inline bool eq(const unsigned short &left, const unsigned short &right)
        {
            return left == right;
        }

        static inline bool lt(const unsigned short &left, const unsigned short &right)
        {
            return left < right;
        }

        static int compare(const unsigned short *p1, const unsigned short *p2, size_t count)
        {
            for (; 0 < count; --count, ++p1, ++p2)
            {
                if (!eq(*p1, *p2))
                {
                    return lt(*p1, *p2) ? -1 : 1;
                }
            }
            return 0;
        }

        static size_t length(const unsigned short *p)
        {
            size_t count = 0;
            while (*p++)
            {
                ++count;
            }
            return count;
        }

        static unsigned short* copy(unsigned short *p1, const unsigned short *p2, size_t count)
        {
            unsigned short *res = p1;
            for (; 0 < count; --count, ++p1, ++p2)
            {
                assign(*p1, *p2);
            }
            return res;
        }

        static const unsigned short* find(const unsigned short *p, size_t count,
                const unsigned short &value)
        {
            for (; 0 < count; --count, ++p)
            {
                if (eq(*p, value))
                {
                    return p;
                }
            }
            return 0;
        }

        static unsigned short* move(unsigned short *dest, const unsigned short *src, size_t count)
        {
            unsigned short *res = dest;
            if ((src < dest) && (dest < src + count))
            {
                for (dest += count, src += count; 0 < count; --count)
                {
                    assign(*--dest, *--src);
                }
            }
            else
            {
                for (; 0 < count; --count, ++dest, ++src)
                {
                    assign(*dest, *src);
                }
            }
            return res;
        }

        static unsigned short* assign(unsigned short *dest, size_t count, unsigned short value)
        {
            unsigned short *res = dest;
            for (; 0 < count; --count, ++dest)
            {
                assign(*dest, value);
            }
            return res;
        }

        static inline unsigned short to_char_type(const int_type &arg)
        {
            return static_cast<unsigned short>(arg);
        }

        static inline int_type to_int_type(const unsigned short &value)
        {
            return static_cast<int_type>(value);
        }

        static inline bool eq_int_type(const int_type &left, const int_type &right)
        {
            return left == right;
        }

        static inline int_type eof()
        {
            return static_cast<int_type>(EOF);
        }

        static inline int_type not_eof(const int_type &value)
        {
            return value != eof() ? value : 1;
        }
};

如何std::string为上述std::basic_string模板分配法线?喜欢:

basic_string<unsigned short int, TRAIT_ClASS> temp = u"string";

如果无法分配,我该如何使用上述basic_string模板?

4

1 回答 1

0

我认为标准 Linux std::string 同时支持 Unicode 和 ASCII 字符

std::string(aka std::basic_string<char>) 没有 Unicode 或 ASCII 的概念,它只知道char元素,仅此而已。您可能会对 Linux 应用程序通常使用 UTF-8 字符串这一事实感到困惑,而 UTF-8 可以存储在 a std::string(或者最好是 C++20中的std::u8stringaka )中。std:::basic_string<char8_t>但是将此类责任分配给std::string.

如何std::string为上述std::basic_string模板分配法线?

您不能直接将a 分配std::string给/从另一个与 .不同的字符类型。std::basic_string<CharT>CharTchar

假设数据是兼容的,您将不得不使用类型转换来解决这个问题 - 在您的示例中并非如此!char大小为 1 个字节,但unsigned short int为 2 个字节。因此,您的其他应用程序basic_string最有可能使用 UCS-2/UTF-16,您不能将其存储在 a std::string(好吧,无论如何,不​​是您想要的方式),但您可以存储在std::u16string(aka std::basic_string<char16_t>) 或Windows 上的 a std::wstring(aka ),例如:std::basic_string<wchar_t>

std::basic_string<unsigned short int, TRAITS_CLASS> temp =
    reinterpret_cast<const unsigned short int*>(u"string");

// or:
std::basic_string<unsigned short int, TRAITS_CLASS> temp(
    reinterpret_cast<const unsigned short int*>(u"string"),
    6);
std::u16string str = u"string";

std::basic_string<unsigned short int, TRAITS_CLASS> temp =
    reinterpret_cast<const unsigned short int*>(str.c_str());

// or:
std::basic_string<unsigned short int, TRAITS_CLASS> temp(
    reinterpret_cast<const unsigned short int*>(str.c_str()),
    str.size());
std::basic_string<unsigned short int, TRAITS_CLASS> temp = ...;

std::u16string str =
    reinterpret_cast<const char16_t*>(temp.c_str());

// or:
std::u16string str(
    reinterpret_cast<const char16_t*>(temp.c_str()),
    temp.size());

如果您绝对需要std::string在代码中使用,那么您必须在 UTF-8(或您想要的任何其他兼容字符集)和其他应用程序的 16 位格式(假设为 UCS-2/UTF-16)之间进行转换char,例如使用std::wstring_convert或第三方 Unicode 库,如 libiconv、ICU 等。

于 2022-01-27T19:09:47.210 回答