警告:压缩音频文件之类的东西最好使用专门针对该类型数据的算法来完成,也许是有损的。但是,知道找到如下提供的合理无损实现是多么困难,我非常担心很难在 Javascript 中找到专门满足您需求的那种数据的良好实现。
无论如何,我也对 Javascript 中的压缩/解压缩有这种普遍需求,并且我需要相同的算法来同时在客户端(浏览器)和服务器端(node.js)工作,并且我需要它在非常大文件。我已经检查了 jszip,并且我还尝试了 LZW 算法,其中至少有五六个都不满足要求。我不记得每个问题具体是什么问题,但我只想说很难在 JavaScript 中找到一个既能工作又能在服务器端和客户端工作并处理大文件的好的快速压缩器/解压缩器!我尝试了至少十几种不同压缩算法的不同实现,最后选择了这个——它还没有让我失望!
更新
这是原始来源:
https ://code.google.com/p/jslzjb/source/browse/trunk/Iuppiter.js?r=2
由一个叫熊的人——感谢熊,无论你是谁,你都是最棒的。它是 LZJB:http ://en.wikipedia.org/wiki/LZJB
更新 2
- 更正了缺少分号的问题 - 不应再给对象而不是函数错误。
- 此实现停止处理长度小于约 80 个字符的数据。所以我更新了这个例子来反映这一点。
- 意识到 base64 编码/解码方法实际上暴露在为此版本传入的对象上,所以......
- 目前看到我们可以对特定的 blob 类型做些什么——例如,最好的方法是图像与音频等,因为这对一般的 JS 人很有用......将在此处更新找到的内容。
更新 3
Bear 的原始 Iuppiter 源代码有一个比我在下面发布的更好的包装器。它由 cscott 编写,在 github 上: https ://github.com/cscott/lzjb
我将切换到这个,因为它也可以流式传输。
下面是 Node.js 中与 wav 文件一起使用的示例。但在复制示例之前,让我先告诉你一个可怕的消息,至少对于我尝试过的这个 wav 文件:
63128 Jun 19 14:09 beep-1.wav
63128 Jun 19 17:47 beep-2.wav
89997 Jun 19 17:47 beep-2.wav.compressed
所以它成功地重新生成了 wav(并播放了)。但是,压缩后的文件似乎比原始文件大。拍得好。无论如何,尝试您的数据可能会很好,您永远不会知道,您可能会很幸运。这是我使用的代码:
var fs = require('fs');
var lzjb = require('lzjb');
fs.readFile('beep-1.wav', function(err, wav){
// base 64 first
var encoded = wav.toString('base64');
// then utf8 - you don't want to go utf-8 directly
var data = new Buffer(encoded, 'utf8');
// now compress
var compressed = lzjb.compressFile(data, null, 9);
// the next two lines are unnecessary, but to see what kind of
// size is written to disk to compare with the original binary file
var compressedBuffer = new Buffer(compressed, 'binary');
fs.writeFile('beep-2.wav.compressed', compressedBuffer, 'binary', function(err) {});
// decompress
var uncompressed = lzjb.decompressFile(compressed);
// decode from utf8 back to base64
var encoded2 = new Buffer(uncompressed).toString('utf8');
// decode back to binary original from base64
var decoded = new Buffer(encoded2, 'base64');
// write it out, make sure it is identical
fs.writeFile('beep-2.wav', decoded, function(err) {});
});
归根结底,我认为在大多数形式的二进制数据上实现任何级别的压缩都太难了,而这些二进制数据不会被由此产生的 base64 编码所破坏。直到今天,终端控制字符的时代仍然困扰着我们。您可以尝试升级到不同的基础,但这也有其风险和问题。
例如看这个:
什么是最有效的二进制文本编码?
还有这个:
为什么人们不使用base128?
不过有一件事,绝对在您接受答案之前,请在您的 blob 上尝试一下,我主要将其用于压缩 utf-8,并且我想确保它适用于您的特定数据。
无论如何,它就在这里!
/**
$Id: Iuppiter.js 3026 2010-06-23 10:03:13Z Bear $
Copyright (c) 2010 Nuwa Information Co., Ltd, and individual contributors.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of Nuwa Information nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
$Author: Bear $
$Date: 2010-06-23 18:03:13 +0800 (星期三, 23 六月 2010) $
$Revision: 3026 $
*/
var fastcompressor = {};
(function (k) {
k.toByteArray = function (c) {
var h = [],
b, a;
for (b = 0; b < c.length; b++) a = c.charCodeAt(b), 127 >= a ? h.push(a) : (2047 >= a ? h.push(a >> 6 | 192) : (65535 >= a ? h.push(a >> 12 | 224) : (h.push(a >> 18 | 240), h.push(a >> 12 & 63 | 128)), h.push(a >> 6 & 63 | 128)), h.push(a & 63 | 128));
return h
};
k.Base64 = {
CA: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
CAS: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
IA: Array(256),
IAS: Array(256),
init: function () {
var c;
for (c = 0; 256 > c; c++) k.Base64.IA[c] = -1, k.Base64.IAS[c] = -1;
c = 0;
for (iS = k.Base64.CA.length; c < iS; c++) k.Base64.IA[k.Base64.CA.charCodeAt(c)] = c, k.Base64.IAS[k.Base64.CAS.charCodeAt(c)] = c;
k.Base64.IA["="] = k.Base64.IAS["="] = 0
},
encode: function (c, h) {
var b, a, d, e, m, g, f, l, j;
b = h ? k.Base64.CAS : k.Base64.CA;
d = c.constructor == Array ? c : k.toByteArray(c);
e = d.length;
m = 3 * (e / 3);
g = (e - 1) / 3 + 1 << 2;
a = Array(g);
for (l = f = 0; f < m;) j = (d[f++] & 255) << 16 | (d[f++] & 255) << 8 | d[f++] & 255, a[l++] = b.charAt(j >> 18 & 63), a[l++] = b.charAt(j >> 12 & 63), a[l++] = b.charAt(j >> 6 & 63), a[l++] = b.charAt(j & 63);
f = e - m;
0 < f && (j = (d[m] &
255) << 10 | (2 == f ? (d[e - 1] & 255) << 2 : 0), a[g - 4] = b.charAt(j >> 12), a[g - 3] = b.charAt(j >> 6 & 63), a[g - 2] = 2 == f ? b.charAt(j & 63) : "=", a[g - 1] = "=");
return a.join("")
},
decode: function (c, h) {
var b, a, d, e, m, g, f, l, j, p, q, n;
b = h ? k.Base64.IAS : k.Base64.IA;
c.constructor == Array ? (d = c, m = !0) : (d = k.toByteArray(c), m = !1);
e = d.length;
g = 0;
for (f = e - 1; g < f && 0 > b[d[g]];) g++;
for (; 0 < f && 0 > b[d[f]];) f--;
l = "=" == d[f] ? "=" == d[f - 1] ? 2 : 1 : 0;
a = f - g + 1;
j = 76 < e ? ("\r" == d[76] ? a / 78 : 0) << 1 : 0;
e = (6 * (a - j) >> 3) - l;
a = Array(e);
q = p = 0;
for (eLen = 3 * (e / 3); p < eLen;) n = b[d[g++]] << 18 | b[d[g++]] <<
12 | b[d[g++]] << 6 | b[d[g++]], a[p++] = n >> 16 & 255, a[p++] = n >> 8 & 255, a[p++] = n & 255, 0 < j && 19 == ++q && (g += 2, q = 0);
if (p < e) {
for (j = n = 0; g <= f - l; j++) n |= b[d[g++]] << 18 - 6 * j;
for (b = 16; p < e; b -= 8) a[p++] = n >> b & 255
}
if (m) return a;
for (n = 0; n < a.length; n++) a[n] = String.fromCharCode(a[n]);
return a.join("")
}
};
k.Base64.init();
NBBY = 8;
MATCH_BITS = 6;
MATCH_MIN = 3;
MATCH_MAX = (1 << MATCH_BITS) + (MATCH_MIN - 1);
OFFSET_MASK = (1 << 16 - MATCH_BITS) - 1;
LEMPEL_SIZE = 256;
k.compress = function (c) {
var h = [],
b, a = 0,
d = 0,
e, m, g = 1 << NBBY - 1,
f, l, j = Array(LEMPEL_SIZE);
for (b = 0; b < LEMPEL_SIZE; b++) j[b] =
3435973836;
c = c.constructor == Array ? c : k.toByteArray(c);
for (b = c.length; a < b;) {
if ((g <<= 1) == 1 << NBBY) {
if (d >= b - 1 - 2 * NBBY) {
f = b;
for (d = a = 0; f; f--) h[d++] = c[a++];
break
}
g = 1;
m = d;
h[d++] = 0
}
if (a > b - MATCH_MAX) h[d++] = c[a++];
else if (e = (c[a] + 13 ^ c[a + 1] - 13 ^ c[a + 2]) & LEMPEL_SIZE - 1, l = a - j[e] & OFFSET_MASK, j[e] = a, e = a - l, 0 <= e && e != a && c[a] == c[e] && c[a + 1] == c[e + 1] && c[a + 2] == c[e + 2]) {
h[m] |= g;
for (f = MATCH_MIN; f < MATCH_MAX && c[a + f] == c[e + f]; f++);
h[d++] = f - MATCH_MIN << NBBY - MATCH_BITS | l >> NBBY;
h[d++] = l;
a += f
} else h[d++] = c[a++]
}
return h
};
k.decompress = function (c,
h) {
var b, a = [],
d, e = 0,
m = 0,
g, f, l = 1 << NBBY - 1,
j;
b = c.constructor == Array ? c : k.toByteArray(c);
for (d = b.length; e < d;) {
if ((l <<= 1) == 1 << NBBY) l = 1, f = b[e++];
if (f & l)
if (j = (b[e] >> NBBY - MATCH_BITS) + MATCH_MIN, g = (b[e] << NBBY | b[e + 1]) & OFFSET_MASK, e += 2, 0 <= (g = m - g))
for (; 0 <= --j;) a[m++] = a[g++];
else break;
else a[m++] = b[e++]
}
if (!("undefined" == typeof h ? 0 : h)) {
for (b = 0; b < m; b++) a[b] = String.fromCharCode(a[b]);
a = a.join("")
}
return a
}
})(fastcompressor);
如果记忆有用......这是你如何使用它:
var compressed = fastcompressor.compress("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"); // data less than this length poses issues.
var decompressed = fastcompressor.decompress(compressed);
Rgds....Hoonto/马特
此外,我发布的内容已缩小但经过美化,并且非常易于使用。检查上面更新中的链接以获取原始内容。