您可以使用此功能(要点):
function toUTF8Array(str) {
var utf8 = [];
for (var i=0; i < str.length; i++) {
var charcode = str.charCodeAt(i);
if (charcode < 0x80) utf8.push(charcode);
else if (charcode < 0x800) {
utf8.push(0xc0 | (charcode >> 6),
0x80 | (charcode & 0x3f));
}
else if (charcode < 0xd800 || charcode >= 0xe000) {
utf8.push(0xe0 | (charcode >> 12),
0x80 | ((charcode>>6) & 0x3f),
0x80 | (charcode & 0x3f));
}
else {
// let's keep things simple and only handle chars up to U+FFFF...
utf8.push(0xef, 0xbf, 0xbd); // U+FFFE "replacement character"
}
}
return utf8;
}
使用示例:
>>> toUTF8Array("中€")
[228, 184, 173, 226, 130, 172]
如果你想要超过 127 的值的负数,就像 Java 的字节到整数转换一样,你必须调整常量并使用
utf8.push(0xffffffc0 | (charcode >> 6),
0xffffff80 | (charcode & 0x3f));
和
utf8.push(0xffffffe0 | (charcode >> 12),
0xffffff80 | ((charcode>>6) & 0x3f),
0xffffff80 | (charcode & 0x3f));