-2

有人知道它在引擎盖下是如何工作的吗?我已经阅读了这个API,但是还不是很清楚。有人能用更简单的方式把它写下来吗?提前致谢。

4

1 回答 1

1
  1. 首先读取一个无符号的short,它是字符串的长度。
  2. 对字符串的长度重复以下步骤:
  3. 读取一个字节。如果字节匹配位模式 0xxxxxxx 那么它是 1 个字符。如果字节与位模式 110xxxxx 匹配,则该字符由 2 个字节(unicode)组成。如果字节与位模式 1110xxxx 匹配,则该字符由 3 个字节组成。当这个新字符被组合起来时,它被附加到要返回的字符串的末尾。

查看函数背后的代码可能会有所帮助:

 public final static String readUTF(DataInput in) throws IOException {
int utflen = in.readUnsignedShort();
byte[] bytearr = null;
char[] chararr = null;
if (in instanceof DataInputStream) {
    DataInputStream dis = (DataInputStream)in;
    if (dis.bytearr.length < utflen){
        dis.bytearr = new byte[utflen*2];
        dis.chararr = new char[utflen*2];
    }
    chararr = dis.chararr;
    bytearr = dis.bytearr;
} else {
    bytearr = new byte[utflen];
    chararr = new char[utflen];
}

int c, char2, char3;
int count = 0;
int chararr_count=0;

in.readFully(bytearr, 0, utflen);

while (count < utflen) {
    c = (int) bytearr[count] & 0xff;
    if (c > 127) break;
    count++;
    chararr[chararr_count++]=(char)c;
}

while (count < utflen) {
    c = (int) bytearr[count] & 0xff;
    switch (c >> 4) {
        case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
            /* 0xxxxxxx*/
            count++;
            chararr[chararr_count++]=(char)c;
            break;
        case 12: case 13:
            /* 110x xxxx   10xx xxxx*/
            count += 2;
            if (count > utflen)
                throw new UTFDataFormatException(
                    "malformed input: partial character at end");
            char2 = (int) bytearr[count-1];
            if ((char2 & 0xC0) != 0x80)
                throw new UTFDataFormatException(
                    "malformed input around byte " + count);
            chararr[chararr_count++]=(char)(((c & 0x1F) << 6) |
                                            (char2 & 0x3F));
            break;
        case 14:
            /* 1110 xxxx  10xx xxxx  10xx xxxx */
            count += 3;
            if (count > utflen)
                throw new UTFDataFormatException(
                    "malformed input: partial character at end");
            char2 = (int) bytearr[count-2];
            char3 = (int) bytearr[count-1];
            if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
                throw new UTFDataFormatException(
                    "malformed input around byte " + (count-1));
            chararr[chararr_count++]=(char)(((c     & 0x0F) << 12) |
                                            ((char2 & 0x3F) << 6)  |
                                            ((char3 & 0x3F) << 0));
            break;
        default:
            /* 10xx xxxx,  1111 xxxx */
            throw new UTFDataFormatException(
                "malformed input around byte " + count);
    }
}
// The number of chars produced may be less than utflen
return new String(chararr, 0, chararr_count);

}

于 2013-07-31T09:50:48.473 回答