赫夫曼编码是一种无损编码,尽管生成的赫夫曼树不同,也不会有二义性,最后得到的编码可能会因为不同的赫夫曼树有所不同,但是得到的压缩编码长度是一样的,这里我们先以一个字符串为例生成其对应的赫夫曼编码表。
import java.util.*;
public class HuffmanCode {
public static void main(String[] args) {
// TODO Auto-generated method stub
String str = "i love love a a man man and and monocle monocle";
byte[] strBytes = str.getBytes();
System.out.println(strBytes.length);
List<Node> nodes = getNode(strBytes);
System.out.println("nodes=" + nodes);
// 测试一把创建的二叉树
System.out.println("赫夫曼树");
Node huffmanTreeRoot = createHuffmanTree(nodes);
System.out.println("前序遍历");
huffmanTreeRoot.preOrder();
// 测试是否生成了对应的赫夫曼编码
Map<Byte, String> huffmanCodes = getCodes(huffmanTreeRoot);
System.out.println("生成的赫夫曼编码表" + huffmanCodes);
}
// 生成赫夫曼编码表
static Map<Byte, String> huffmanCodes = new HashMap<Byte, String>();
static StringBuilder stringBuilder = new StringBuilder();
// 将传入的node结点的所有叶子结点的赫夫曼编码得到,并放入到huffmanCodes集合
// StringBuilder用于拼接路径
// 路径:左子结点是0,右是1
public static void getCodes(Node node, String code, StringBuilder stringBuilder) {
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
stringBuilder2.append(code);
if (node != null) {// node==null不处理
// 判断当前node是不是叶子结点
if (node.data == null) {
getCodes(node.left, "0", stringBuilder2);
getCodes(node.right, "1", stringBuilder2);
} else {
// 找到叶子结点
huffmanCodes.put(node.data, stringBuilder2.toString());
}
}
}
// 为了调用方便,重载getCodes
private static Map<Byte, String> getCodes(Node root) {
if (root == null) {
return null;
}
getCodes(root.left, "0", stringBuilder);
getCodes(root.right, "1", stringBuilder);
return huffmanCodes;
}
// 前序遍历的方法
private static void preOrder(Node root) {
if (root != null) {
root.preOrder();
} else {
System.out.println("赫夫曼树为空");
}
}
// bytes接收字节数组
// 返回的就是List形式
private static List<Node> getNode(byte[] bytes) {
// 创建一个ArrayList
ArrayList<Node> nodes = new ArrayList<Node>();
// 遍历bytes,统计每一个byte出现的次数
Map<Byte, Integer> counts = new HashMap<>();
for (byte b : bytes) {
Integer count = counts.get(b);
if (count == null) {// Map还没有这个字符数据,第一次
counts.put(b, 1);
} else {
counts.put(b, count + 1);
}
}
// 把每一个键值对转成Node对象并加入到nodes集合
// 遍历map
for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {
nodes.add(new Node(entry.getKey(), entry.getValue()));
}
return nodes;
}
private static Node createHuffmanTree(List<Node> nodes) {
while (nodes.size() > 1) {
// 排序,从小到大
Collections.sort(nodes);
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
// 创建一棵新的二叉树,它的根节点没有data,只有权值
Node parent = new Node(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
// 将已经处理的两棵二叉树从nodes删除
nodes.remove(leftNode);
nodes.remove(rightNode);
// 将新的二叉树加入
nodes.add(parent);
}
return nodes.get(0);
}
}
//创建Node,存放数据和权值
class Node implements Comparable<Node> {
Byte data;// 存放数据(字符),比如'a'→97
int weight;// 权值,表示字符出现的次数
Node left;
Node right;
public Node(Byte data, int weight) {
super();
this.data = data;
this.weight = weight;
}
public int compareTo(Node o) {
// 从小到大排序
return this.weight - o.weight;
}
@Override
public String toString() {
return "Node [data=" + data + ", weight=" + weight + "]";
}
public Byte getData() {
return data;
}
public void setData(Byte data) {
this.data = data;
}
public int getWeight() {
return weight;
}
public void setWeight(int weight) {
this.weight = weight;
}
public Node getLeft() {
return left;
}
public void setLeft(Node left) {
this.left = left;
}
public Node getRight() {
return right;
}
public void setRight(Node right) {
this.right = right;
}
// 前序遍历
public void preOrder() {
System.out.println(this);
if (this.left != null) {
this.left.preOrder();
}
if (this.right != null) {
this.right.preOrder();
}
}
}