java中有一个标准的压缩工具:java.util.zip - 它是通用库,但由于绝对的可用性是一个不错的解决方案。如果需要,应该研究专门的压缩,编码,我很少推荐 zip 作为选择的灵魂。
这是一个如何通过 .zip 处理 zip 的示例Deflater/Inflater
。大多数人都知道 ZipInput/Output Stream(尤其是 Gzip)。他们在处理来自 mem->zlib 和 esp 的副本时都有缺点。GZip 完全是一场灾难,因为 CRC32 调用了本机代码(调用本机代码会消除优化能力并引入更多性能损失)。
几个重要的注意事项:不要提高 zip 压缩率,这会破坏任何性能 - 当然可以尝试并在 CPU 和磁盘活动之间调整最佳比例。
该代码还展示了一个真正的缺点java.util.zip
- 它不支持直接缓冲区。支持是微不足道的,但没有人费心去做。直接缓冲区将节省少量内存副本并减少内存占用。
最后一点: (j)zlib有 java 版本,它击败了原生 impl。在压缩上相当不错。
package t1;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Random;
import java.util.zip.DataFormatException;
import java.util.zip.Deflater;
import java.util.zip.Inflater;
public class ZInt {
private static final int bucketSize = 1<<17;//in real world should not be const, but we bored horribly
static final int zipLevel = 2;//feel free to experiement, higher compression (5+)is likely to be total waste
static void write(int[] a, File file, boolean sync) throws IOException{
byte[] bucket = new byte[Math.min(bucketSize, Math.max(1<<13, Integer.highestOneBit(a.length >>3)))];//128KB bucket
byte[] zipOut = new byte[bucket.length];
final FileOutputStream fout = new FileOutputStream(file);
FileChannel channel = fout.getChannel();
try{
ByteBuffer buf = ByteBuffer.wrap(bucket);
//unfortunately java.util.zip doesn't support Direct Buffer - that would be the perfect fit
ByteBuffer out = ByteBuffer.wrap(zipOut);
out.putInt(a.length);//write length aka header
if (a.length==0){
doWrite(channel, out, 0);
return;
}
Deflater deflater = new Deflater(zipLevel, false);
try{
for (int i=0;i<a.length;){
i = put(a, buf, i);
buf.flip();
deflater.setInput(bucket, buf.position(), buf.limit());
if (i==a.length)
deflater.finish();
//hacking and using bucket here is tempting since it's copied twice but well
for (int n; (n= deflater.deflate(zipOut, out.position(), out.remaining()))>0;){
doWrite(channel, out, n);
}
buf.clear();
}
}finally{
deflater.end();
}
}finally{
if (sync)
fout.getFD().sync();
channel.close();
}
}
static int[] read(File file) throws IOException, DataFormatException{
FileChannel channel = new FileInputStream(file).getChannel();
try{
byte[] in = new byte[(int)Math.min(bucketSize, channel.size())];
ByteBuffer buf = ByteBuffer.wrap(in);
channel.read(buf);
buf.flip();
int[] a = new int[buf.getInt()];
if (a.length==0)
return a;
int i=0;
byte[] inflated = new byte[Math.min(1<<17, a.length*4)];
ByteBuffer intBuffer = ByteBuffer.wrap(inflated);
Inflater inflater = new Inflater(false);
try{
do{
if (!buf.hasRemaining()){
buf.clear();
channel.read(buf);
buf.flip();
}
inflater.setInput(in, buf.position(), buf.remaining());
buf.position(buf.position()+buf.remaining());//simulate all read
for (;;){
int n = inflater.inflate(inflated,intBuffer.position(), intBuffer.remaining());
if (n==0)
break;
intBuffer.position(intBuffer.position()+n).flip();
for (;intBuffer.remaining()>3 && i<a.length;i++){//need at least 4 bytes to form an int
a[i] = intBuffer.getInt();
}
intBuffer.compact();
}
}while (channel.position()<channel.size() && i<a.length);
}finally{
inflater.end();
}
// System.out.printf("read ints: %d - channel.position:%d %n", i, channel.position());
return a;
}finally{
channel.close();
}
}
private static void doWrite(FileChannel channel, ByteBuffer out, int n) throws IOException {
out.position(out.position()+n).flip();
while (out.hasRemaining())
channel.write(out);
out.clear();
}
private static int put(int[] a, ByteBuffer buf, int i) {
for (;buf.hasRemaining() && i<a.length;){
buf.putInt(a[i++]);
}
return i;
}
private static int[] generateRandom(int len){
Random r = new Random(17);
int[] n = new int[len];
for (int i=0;i<len;i++){
n[i]= r.nextBoolean()?0: r.nextInt(1<<23);//limit bounds to have any sensible compression
}
return n;
}
public static void main(String[] args) throws Throwable{
File file = new File("xxx.xxx");
int[] n = generateRandom(3000000); //{0,2,4,1,2,3};
long start = System.nanoTime();
write(n, file, false);
long elapsed = System.nanoTime() - start;//elapsed will be fairer if the sync is true
System.out.printf("File length: %d, for %d ints, ratio %.2f in %.2fms %n", file.length(), n.length, ((double)file.length())/4/n.length, java.math.BigDecimal.valueOf(elapsed, 6) );
int[] m = read(file);
//compare, Arrays.equals doesn't return position, so it sucks/kinda
for (int i=0; i<n.length; i++){
if (m[i]!=n[i]){
System.err.printf("Failed at %d%n",i);
break;
}
}
System.out.printf("All done!");
};
}
请注意,代码不是正确的基准!
延迟回复的原因是代码很无聊,又是一个 zip 示例,抱歉