把纯文本字符串用Gzip压缩再转换为Base64能有多少压缩率呢？

最新推荐文章于 2024-09-10 21:23:32 发布

iteye_21091

最新推荐文章于 2024-09-10 21:23:32 发布

阅读量2.5k

点赞数

分类专栏： Java 文章标签： java gzip base64

Java 专栏收录该内容

29 篇文章 0 订阅

订阅专栏

其实具体多大压缩率要看源文件的内容，一般来说重复的单词越多，压缩率越高。

下面是把/usr/share/dict/words压缩的测试程序



import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.codec.binary.Base64;

public class GzipBase64Tests {

	public static void main(String[] args) throws Exception {
		File input = new File("/usr/share/dict/words");

		if (!input.exists()) {
			System.out.println("input file not exists!");
			return;
		}

		if (output.exists()) {
			output.delete();
		}

		ByteArrayOutputStream buffer = new ByteArrayOutputStream();
		GZIPOutputStream gout = new GZIPOutputStream(buffer);

		FileInputStream in = new FileInputStream(input);

		long t1 = System.currentTimeMillis();
		byte[] buf = new byte[1024];
		int total=0;
		int rd;
		while ((rd = in.read(buf)) != -1) {
			total += rd;
			gout.write(buf,0, rd);
		}

		gout.close();
		in.close();

		byte[] result = buffer.toByteArray();

		long t2 = System.currentTimeMillis();
		String base64 = Base64.encodeBase64String(result);
		long t3 = System.currentTimeMillis();

		System.out.printf("raw %d -> gzip %d -> base64 %d, time1 %dms, time2 %dms", total, result.length, base64.length(), t2-t1, t3-t2);
	}
}