
#include <cstdio>
#include "ucpp"
main() {
  ustring a = "test";
  ustring b = "ing";
  ustring c = "- -";
  ustring d = "cafe\xcc\x81";
  printf("%s\n", (a + b + c[1] + d).encode());

我收到一条不错的“测试咖啡馆”消息。但是,如果我稍微修改代码以便单独完成 const char * 转换,则声明后:

#include <cstdio>
#include "ucpp"
main() {
  ustring a = "test";
  ustring b = "ing";
  ustring c = "- -";
  ustring d;
  d = "cafe\xcc\x81";
  printf("%s\n", (a + b + c[1] + d).encode());

名为 d 的 ustring 变为空白,所有输出都是“测试”。我的新代码有三个构造函数,一个是 void(可能是被错误使用的那个,并且在 operator+ 函数中使用),一个接受 const ustring &,一个接受 const char *。以下是我的新库代码:

#include <cstdlib>
#include <cstring>
class ustring {
  int * values;
  long len;
  long length() {
    return len;
  ustring() {
    len = 0;
    values = (int *) malloc(0);
  ustring(const ustring &input) {
    len = input.len;
    values = (int *) malloc(sizeof(int) * len);
    for (long i = 0; i < len; i++)
      values[i] = input.values[i];
  ustring operator=(ustring input) {
    ustring result(input);
    return result;
  ustring(const char * input) {
    values = (int *) malloc(0);
    long s = 0;                                                                 // s = number of parsed chars
    int a, b, c, d, contNeed = 0, cont = 0;
    for (long i = 0; input[i]; i++)
      if (input[i] < 0x80) {                                                    // ASCII, direct copy (00-7f)
        values = (int *) realloc(values, sizeof(int) * ++s);
        values[s - 1] = input[i];
      } else if (input[i] < 0xc0) {                                             // this is a continuation (80-bf)
        if (cont == contNeed) {                                                 // no need for continuation, use U+fffd
          values = (int *) realloc(values, sizeof(int) * ++s);
          values[s - 1] = 0xfffd;
        cont = cont + 1;
        values[s - 1] = values[s - 1] | ((input[i] & 0x3f) << ((contNeed - cont) * 6));
        if (cont == contNeed) cont = contNeed = 0;
      } else if (input[i] < 0xc2) {                                             // invalid byte, use U+fffd (c0-c1)
        values = (int *) realloc(values, sizeof(int) * ++s);
        values[s - 1] = 0xfffd;
      } else if (input[i] < 0xe0) {                                             // start of 2-byte sequence (c2-df)
        contNeed = 1;
        values = (int *) realloc(values, sizeof(int) * ++s);
        values[s - 1] = (input[i] & 0x1f) << 6;
      } else if (input[i] < 0xf0) {                                             // start of 3-byte sequence (e0-ef)
        contNeed = 2;
        values = (int *) realloc(values, sizeof(int) * ++s);
        values[s - 1] = (input[i] & 0x0f) << 12;
      } else if (input[i] < 0xf5) {                                             // start of 4-byte sequence (f0-f4)
        contNeed = 3;
        values = (int *) realloc(values, sizeof(int) * ++s);
        values[s - 1] = (input[i] & 0x07) << 18;
      } else {                                                                  // restricted or invalid (f5-ff)
        values = (int *) realloc(values, sizeof(int) * ++s);
        values[s - 1] = 0xfffd;
    len = s;
  ustring operator=(const char * input) {
    ustring result(input);
    return result;
  ustring operator+(ustring input) {
    ustring result;
    result.len = len + input.len;
    result.values = (int *) malloc(sizeof(int) * result.len);
    for (long i = 0; i < len; i++)
      result.values[i] = values[i];
    for (long i = 0; i < input.len; i++)
      result.values[i + len] = input.values[i];
    return result;
  ustring operator[](long index) {
    ustring result;
    result.len = 1;
    result.values = (int *) malloc(sizeof(int));
    result.values[0] = values[index];
    return result;
  char * encode() {
    char * r = (char *) malloc(0);
    long s = 0;
    for (long i = 0; i < len; i++) {
      if (values[i] < 0x80)
        r = (char *) realloc(r, s + 1),
        r[s + 0] = char(values[i]),
        s += 1;
      else if (values[i] < 0x800)
        r = (char *) realloc(r, s + 2),
        r[s + 0] = char(values[i] >> 6 | 0x60),
        r[s + 1] = char(values[i] & 0x3f | 0x80),
        s += 2;
      else if (values[i] < 0x10000)
        r = (char *) realloc(r, s + 3),
        r[s + 0] = char(values[i] >> 12 | 0xe0),
        r[s + 1] = char(values[i] >> 6 & 0x3f | 0x80),
        r[s + 2] = char(values[i] & 0x3f | 0x80),
        s += 3;
        r = (char *) realloc(r, s + 4),
        r[s + 0] = char(values[i] >> 18 | 0xf0),
        r[s + 1] = char(values[i] >> 12 & 0x3f | 0x80),
        r[s + 2] = char(values[i] >> 6 & 0x3f | 0x80),
        r[s + 3] = char(values[i] & 0x3f | 0x80),
        s += 4;
    return r;

2 回答 2



a = b = c;
(a = b).foo();
于 2010-05-02T09:23:19.820 回答


  ustring operator=(const char * input) {
    ustring result(input);
    return result;


  ustring& operator=(ustring input) {
    return *this;

  ustring& operator=(const char * input) {
    return *this;

  void swap(ustring& s) {
    int* tv = values; values = s.values; s.values = tv;
    long tl = len; len = s.len; s.len = tl;
于 2010-05-02T09:22:15.363 回答