通过赫夫曼树将文件构建赫夫曼编码来压缩文件的样例。
结点信息如下
public class Node implements Comparable<Node> {
Integer weight;//表示权重再赫夫曼编码中用来存储字符出现的次数
Byte data;//用来存放出现的字符
Node left;
Node right;
public Node() {
super();
}
public Node(Integer weight, Byte data) {
super();
this.weight = weight;
this.data = data;
}
@Override
public String toString() {
return "Node [weight=" + weight + ", data=" + data + "]";
}
/**
* 用来排序结点
*/
@Override
public int compareTo(Node node) {
// TODO Auto-generated method stub
return this.weight - node.weight;
}
}
接下来便是赫夫曼编码压缩的方法
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class HuffmanCode {
// 存储赫夫曼编码表
static Map<Byte, String> map = new HashMap<>();
// 存储编码的中间变量
static StringBuilder sb = new StringBuilder();
/**
* 将byte数组转化成赫夫曼树
*
* @param data 待转化的byte数组
* @return
*/
public Node createNodeTree(byte[] data) {
Map<Byte, Integer> map = new HashMap<Byte, Integer>();
// 统计字符的次数
for (Byte b : data) {
Integer count = map.get(b);
if (count == null) {
map.put(b, 1);
} else {
map.put(b, count + 1);
}
}
// 根据次数把字符放到list里面排序
List<Node> nodes = new ArrayList<Node>();
Node node = null;
for (Map.Entry<Byte, Integer> entry : map.entrySet()) {
node = new Node(entry.getValue(), entry.getKey());
nodes.add(node);
}
// 构建赫夫曼树
while (nodes.size() > 1) {
Collections.sort(nodes);
Node leftChild = nodes.get(0);
Node rightChild = nodes.get(1);
Node newNode = new Node((leftChild.weight + rightChild.weight), null);
newNode.left = leftChild;
newNode.right = rightChild;
nodes.remove(leftChild);
nodes.remove(rightChild);
nodes.add(newNode);
}
return nodes.get(0);
}
/**
* 设置赫夫曼编码表
*
* @param node 结点
* @param code 编码0或者1
* @param sbBuilder 编码字符串
*/
public void setCodeTable(Node node, String code, StringBuilder sbBuilder) {
StringBuilder stringBuilder = new StringBuilder(sbBuilder);
stringBuilder.append(code);
if (node != null) {
if (node.data == null) {
setCodeTable(node.left, "0", stringBuilder);
setCodeTable(node.right, "1", stringBuilder);
} else {
map.put(node.data, stringBuilder.toString());
}
}
}
/**
* 获取赫夫曼的编码表
*
* @param node
* @return
*/
public Map<Byte, String> getCodeTable(Node node) {
if (node == null) {
return null;
}
setCodeTable(node.left, "0", sb);
setCodeTable(node.right, "1", sb);
return map;
}
/**
* 压缩数据方法
*
* @param bytes 待压缩的数组
* @return
*/
public byte[] zip(byte[] bytes, Map<Byte, String> map) {
StringBuilder sbuf = new StringBuilder();
// 将byte[]数组转化成编码数组
for (byte b : bytes) {
String string = map.get(b);
sbuf.append(string);
}
// 转化成字段的长度
int len = (sbuf.length() + 7) / 8;// 求字符长度
byte[] huffmanbytes = new byte[len];// 用来存放压缩后的数据
for (int i = 0, index = 0; i < sbuf.length(); i += 8, index++) {
String string = null;
if (i + 8 > sbuf.length()) {
string = sbuf.substring(i);
} else {
string = sbuf.substring(i, i + 8);
}
byte b = (byte) (Integer.parseInt(string, 2));
huffmanbytes[index] = b;
}
return huffmanbytes;
}
/**
* 将byte 类型的数据转化成二进制数据字符串
*
* @param b byte数据
* @param flag 是否需要补高位,最后一位不需要补高位,如果是false 是不需要补高位
* @return
*/
public String byteTobitString(byte b, boolean flag) {
int temp = b;
// 正数补高位
if (flag) {
temp |= 256;// 按位与 1 0000 0000 |0000 0001 =>1 0000 0001
}
String string = Integer.toBinaryString(temp);// 返回的是补码
if (flag) {
return string.substring(string.length() - 8);
} else {
return string;
}
}
public String unzip(byte[] huffmanBytes) {
return new String(unzip(map, huffmanBytes));
}
public byte[] unzip(Map<Byte, String> huffmanTable, byte[] huffmanBytes) {
// 循环把huffmanBytes 转化成 二进制字符串
StringBuilder sbs = new StringBuilder();
for (int i = 0; i < huffmanBytes.length; i++) {
boolean flag = (i == huffmanBytes.length - 1);
String ss = byteTobitString(huffmanBytes[i], !flag);
sbs.append(ss);
}
// 将赫夫曼编码表转变一下
Map<String, Byte> reverseMap = new HashMap<String, Byte>();
for (Map.Entry<Byte, String> entry : huffmanTable.entrySet()) {
reverseMap.put(entry.getValue(), entry.getKey());
}
List<Byte> list = new ArrayList<Byte>();
// 将二进制字符串根据逆转的huffmanTable 生成byte[]
int count;
for (int i = 0; i < sbs.length();) {
count = 1;
boolean flag = true;
while (flag) {
String key = sbs.substring(i, i + count);
Byte bs = reverseMap.get(key);
if (bs == null) {
count++;
} else {
list.add(bs);
flag = false;
}
}
i = i + count;
}
byte[] blist = new byte[list.size()];
for (int i = 0; i < list.size(); i++) {
blist[i] = list.get(i);
}
return blist;
}
/**
* 压缩字符串方法
*
* @param string
* @return
*/
public byte[] Hzip(byte[] string) {
Node rootNode = createNodeTree(string);
Map<Byte, String> map = getCodeTable(rootNode);
byte[] bysr = zip(string, map);
return bysr;
}
/**
* 压缩文件
*
* @param src 源文件路径
* @param dest 压缩后的文件路径
*/
public void zipFile(String src, String dest) {
InputStream in = null;// 文件输入流
OutputStream os = null;// 文件输出流
ObjectOutputStream oos = null;// 对象输出流
try {
// 文件输入流读取文件
in = new FileInputStream(src);
byte[] b = new byte[in.available()];// in.available()表示文件的字节大小
in.read(b);// 把输入流的数据读到b数组中
byte[] bys = Hzip(b);// 压缩
os = new FileOutputStream(dest);// 创建文件输出流
oos = new ObjectOutputStream(os);// 创建对象输出流
// 将压缩后的数据写入对象流
oos.writeObject(bys);
// 把Huffman编码表输出到压缩文件,方便后期解压
oos.writeObject(map);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
in.close();
os.close();
oos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 解压文件
*
* @param src 压缩文件的路径
* @param dest 解压后的存放路径
*/
public void unzipFile(String src, String dest) {
InputStream in = null;// 文件输入流
OutputStream os = null;// 文件输出流
ObjectInputStream ois = null;// 对象输出流
try {
// 文件输入流读取压缩文件
in = new FileInputStream(src);
// 包装输入流
ois = new ObjectInputStream(in);
byte[] b = (byte[]) ois.readObject();// 读取压缩文件数组
Map<Byte, String> huffmanMap = (Map<Byte, String>) ois.readObject();// 读取赫夫曼编码
System.out.println(huffmanMap.isEmpty());
byte[] res = unzip(huffmanMap, b);// 解压文件
os = new FileOutputStream(dest);// 创建输出流
os.write(res);// 写出解压后的数据
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
in.close();
os.close();
ois.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
// 前序遍历
public void preOrder(Node node) {
if (node == null) {
return;
}
System.out.println(node);
preOrder(node.left);
preOrder(node.right);
}
}
测试样例:
import java.util.Arrays;
public class HuffmanTest {
public static void main(String[] args) {
String string = "I like you just like you!";
HuffmanCode huffmanCode = new HuffmanCode();
byte[] bytes = huffmanCode.Hzip(string.getBytes());// 压缩
String string2 = huffmanCode.unzip(bytes);// 解压
System.out.println("压缩前:" + string);
System.out.println("压缩后的样子:" + Arrays.toString(bytes));
System.out.println("还原后:" + string2);
System.out.println("============================================");
System.out.println("开始压缩");
huffmanCode.zipFile("E://12.png", "E://12.zip");
System.out.println("压缩成功");
System.out.println("============================================");
System.out.println("开始解压");
huffmanCode.unzipFile("E://12.zip", "E://123.png");
System.out.println("解压成功");
}
}