javascript - 通过 Thrift 从 Chrome 扩展程序保存到 Evernote 的二进制文件在 Unicode 转换后出现乱码

Question

编辑：这个问题归结为 UTF-8 转换，请在此处讨论： UTF-8：它应该保存那个字符串 = 编码（解码（字符串））吗？如果没有，如何实现？

我正在尝试使用非官方的 Kanda 软件的 API从 Chrome 扩展程序将 PDF 文件保存到 Evernote 。本质上，它使用 Thrift 并提供根据 Evernote API 创建和发送数据结构的方法。为了测试，我使用了一个字节长度为 2898 的示例 PDF 文件。

当 CreateNote() 方法被调用时，数据最终会进入 SDK 的 thrift.js，在该文件中进行 UTF 编码并放入适当的数据结构中。这些函数称为：

    writeString:function(str) {
        var result = Array.prototype.map.call(this.encode(str).split(""), this.stringToHex); // <----- I tried to skip this encoding for the data body
        this.writeI32(result.length);
        for (var i = 0; i < result.length; i++) {
            this.ra.push(result[i]);
        }
    },
...
    encode:function(string) {
        string = string.replace(/\r\n/g, "\n");
        var utftext = "";
        for (var n = 0; n < string.length; n++) {

            var c = string.charCodeAt(n);

            if (c < 128) {
                utftext += String.fromCharCode(c);
            }
            else if ((c > 127) && (c < 2048)) {
                utftext += String.fromCharCode((c >> 6) | 192);
                utftext += String.fromCharCode((c & 63) | 128);
            }
            else {
                utftext += String.fromCharCode((c >> 12) | 224);
                utftext += String.fromCharCode(((c >> 6) & 63) | 128);
                utftext += String.fromCharCode((c & 63) | 128);
            }
        }
        return utftext;
    },
...
    writeI32:function(i32) {
        var i32out = new Array();
        i32out[0] = 0xff & i32 >> 24;
        i32out[1] = 0xff & i32 >> 16;
        i32out[2] = 0xff & i32 >> 8;
        i32out[3] = 0xff & i32;

        var result = Array.prototype.map.call(i32out, this.byteToHex);
        for (var i = 0; i < 4; i++) {
            this.ra.push(result[i]);
        }

    },

我真的不明白 Thrift 在这个级别是如何工作的，以及为什么它需要对包括二进制附件正文在内的所有数据进行编码，但是正如你所看到的那样，它会产生 PDF 的数据（以及所有其他要传输的字符串数据）问题）是 UTF 编码的，所以 .length 现在是 3018 字节。这一切都通过 API 进行，文件显示在 Evernote 前端（见图），但它没有被解码回来，大小为 3018 字节，所以 PDF 是拙劣的。

Evernote 前端的结果图片

我试图通过仅跳过对数据主体的 encode() 调用来强制解决方案，但这会导致文件以某种方式被丢弃。

您能否告诉我问题出在我的误解、SDK 还是 Evernote 后端，以及如何解决？我为此失眠了。

供参考：我通过 XMLHttpRequest 获取原始文件，如下所示：

function getLink(url, callback) {
  var xhr = new XMLHttpRequest();
  xhr.open('GET', document.getElementById('url1').value, true);
  xhr.responseType = 'text';
  xhr.overrideMimeType('text/plain; charset=x-user-defined');

  xhr.onload = function(e) {
    if (this.status == 200) {
      // Note: .response instead of .responseText
      alert("String(this.response) " + String(this.response).length);
      alert("String(this.responseText) " + String(this.responseText).length);
      blob = String(this.response);
      AddNote1(url, blob, function(response) {
        document.getElementById('result').innerHTML += String(response).replace(/\n/g, "<br/>") + "<br/>";
        document.getElementById('result').innerHTML += blob.slice(1, 20);
        } );
      }
    };
  xhr.send();
  }

结果数据看起来不错，string.length 为 2898。然后我继续按照示例中的说明添加注释。同样，这一切都检查得很好，并且字节字符串完好无损地进入了 CreateNote()，所以这再次仅供参考：

function AddNote1(Name, ContentFile, callback)
{
    var noteStore = get_NoteStore();
    var note = new Note();
    note.title = Name;
    var na = new NoteAttributes();
    //na.author = "someone";
    na.sourceURL = Name;
    na.source = "web.clip";
    note.attributes = na;

    var data = new Data();
    var resource = new Resource();
    binaryData = ContentFile;
    data.size = binaryData.length;
    alert(binaryData.length + '*' + ContentFile.length);
    data.bodyHash = raw_md5(binaryData);
    data.body = binaryData;
    resource.mime = "application/pdf";
    resource.data = data;
    var resAttributes = new ResourceAttributes();
    resAttributes.fileName = String(Name).replace(/^.*[\/\\]/g, '');
    resource.attributes = resAttributes;
    note.resources = Array(resource);
    //important to set correct content
    var content = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
        + "<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\"><en-note>";
    content += String("Oi Wei").replace(/\n/g, "<br/>") + "<br/>";
    content += "<en-media type=\"application/pdf\" hash=\"" + md5(binaryData) + "\"/>";
    content += "</en-note>";
    note.content = content;
    //response is a created note
    //callback function process response
    var response = noteStore.createNote(Eventnote.Auth.get_auth_token(), note);
    if (callback !== undefined) {
      callback(response);
      }

score 0 · Accepted Answer

有没有没有这个问题的 Javascript utf8_decode() 实现？

取自这里：

function encode_utf8( s )
{
  return unescape( encodeURIComponent( s ) );
}

function decode_utf8( s )
{
  return decodeURIComponent( escape( s ) );
}

javascript - 通过 Thrift 从 Chrome 扩展程序保存到 Evernote 的二进制文件在 Unicode 转换后出现乱码

1 回答 1

Related

Reference