哈夫曼编码及实现
哈夫曼编码是一种编码算法,广泛应用于数据文件压缩,压缩率通常在20%~90%,是一种可变字长(VLC)编码。哈夫曼树根据排序方式不同,也可能不太一样,这样对应的哈夫曼编码也不完全一样,但是树的带权路径长度(WPL)都是最小的。
- 数据压缩的原理
(1)生成哈夫曼树;
(2)生成哈夫曼树对应的编码表,使用哈夫曼编码来生成哈夫曼编码数据;
(3)通过哈夫曼编码表把内容对应的字节数组压缩。
import lombok.Data;
import java.io.*;
import java.util.*;
public class HuffmanCode {
public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte[] bytes = content.getBytes();
System.out.println(bytes.length);
//分步
List<Node> nodes = getNodes(bytes);
System.out.println("nodes=" + nodes);
System.out.println("哈夫曼树:");
Node root = createHuffManTree(nodes);
System.out.println("先序遍历:");
prevOrder(root);
Map<Byte, String> huffManCodes = getCodes(root);
System.out.println("生成的哈夫曼编码表是:" + huffManCodes);
byte[] huffManCodeBytes = zip(bytes, huffManCodes);
System.out.println("哈夫曼编码后的字节数组:" + Arrays.toString(huffManCodeBytes));
}
private static Map<Byte, String> huffManCodes = new HashMap<>();
private static StringBuilder builder = new StringBuilder();
private static Map<Byte, String> getCodes(Node root) {
if (root == null) {
return null;
}
getCodes(root.left, "0", builder);
getCodes(root.right, "1", builder);
return huffManCodes;
}
/**
* 生成哈夫曼树对应的编码,拼接路径
* 得到传入的node节点的所有叶子节点的哈夫曼编码并保存到集合中
* @param node 传入节点
* @param code 路径:左子节点是0,右子节点是1
* @param builder 拼接路径
*/
private static void getCodes(Node node, String code, StringBuilder builder) {
StringBuilder stringBuilder = new StringBuilder(builder);
stringBuilder.append(code);
if (node != null) {
if (node.data == null) {
//非叶子节点递归
getCodes(node.left, "0", stringBuilder);
getCodes(node.right, "1", stringBuilder);
} else {
//找到某个叶子节点的路径
huffManCodes.put(node.data, stringBuilder.toString());
}
}
}
private static void prevOrder(Node root){
if(root != null){
root.previousOrder();
}else{
System.out.println("哈夫曼树为空");
}
}
private static Node createHuffManTree(List<Node> nodes) {
while (nodes.size() > 1) {
Collections.sort(nodes);
Node leftNode = ndoes.get(0);
Node rightNode = nodes.get(1);
Node parentNode = new Node(leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parentNode);
}
return nodes.get(0);
}
private static List<Node> getNodes(byte[] bytes) {
List<Node> nodes