哈夫曼树是一个二叉树,它通过遍历树的叶子节点实现对相应字符的编码。通常情况下这些编码出来的编码是0和1组成的串,但不是每个字符出来的编码长度都一样,而是出现次数最多的字符的编码长度最小,最后我们把这些二进制串转为了字节数组进行储存或者网络传输以达到压缩的目的。当然要解码这些串还是需要一个哈夫曼编码与字符一一对应的表,当我们在一个整体的系统里使用这种技术的时候,我们通过提前建立好一个包括接近全部可能所需要的字符对就的 哈夫曼编码-字符表,然后这个表共享于编码与解码方。详细的原理,大家可以百度,这里我贴出来一个示例代码,大家可以通过代码去学习。
1. Node 节点类
//此节点类用于演示哈夫曼树编码
public class Node2 {
public char aChar;
public int frequency;
public Node2 leftNode; //<key
public Node2 rightNode; //>=key
public Node2(char ch,int frequency) {
this.aChar = ch;
this.frequency = frequency;
}
public void display() {
System.out.print( aChar + " ");
}
}
2. 哈夫曼树
import java.util.Comparator;
import java.util.HashMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
//哈夫曼树 演示
public class HaFuManTree {
private ConcurrentHashMap<Character, String> codeTabe = new ConcurrentHashMap<>();
private Node2 root;
//创建编码表 通常是对一次数据编码时要生成这个表,然后解编时再通过这个表来解码
public void createCodeTable(String str) {
//统计每个字符出现的次数并排序
TreeSet<Node2> treeSet = new TreeSet<>(new Comparator<Node2>() {
@Override
public int compare(Node2 o1, Node2 o2) {
int cmp = o1.frequency - o2.frequency;
return cmp == 0 ? -1 : cmp;
}
});
HashMap<Character, Integer> hashMap = new HashMap<Character, Integer>();
int count = 0;
char ch;
for (int i = 0; i < str.length(); i++) {
ch = str.charAt(i);
if (hashMap.containsKey(ch)) {
hashMap.put(ch, hashMap.get(ch) + 1);
} else {
hashMap.put(ch, 1);
}
}
//排序
for (char key : hashMap.keySet()) {
treeSet.add(new Node2(key, hashMap.get(key)));
}
//开始创建哈夫曼树
Node2 firstNode = treeSet.pollFirst();
Node2 secondNode;
Node2 newNode;
while ((secondNode = treeSet.pollFirst()) != null) { //一个个移除,不为空时
newNode = new Node2(' ', firstNode.frequency + secondNode.frequency);
newNode.leftNode = firstNode;
newNode.rightNode = secondNode;
treeSet.add(newNode);
firstNode = treeSet.pollFirst();
}
root= firstNode;// 记录好树的根节点
//通过遍历树去创建一个字符对应的字典,方便编码时直接取码
bianli1(firstNode, new StringBuilder(100));
}
//遍历哈夫曼树
private void bianli1(Node2 node,StringBuilder sb) {
if (node != null) {
if (node.leftNode == null) { //实际上这个时候左右子节点都为空了
codeTabe.put(node.aChar, sb.toString());
return;
} else {
sb.append('0');
bianli1(node.leftNode, sb);
//移除当前这个结点的后加的编码
sb.setLength(sb.length() - 1);
sb.append('1');
bianli1(node.rightNode, sb);
//移除当前这个结点的后加的编码
sb.setLength(sb.length() - 1);
}
}
}
public String enCode(String str) throws Exception {
StringBuilder sb = new StringBuilder(100);
String hafumanCode;
for (int i = 0; i < str.length(); i++) {
if (!codeTabe.containsKey(str.charAt(i))) {
throw new Exception("can not find the coding of the char:" + str.charAt(i));
}
sb.append(codeTabe.get(str.charAt(i))+",");
}
return sb.toString();
}
//解码
public String deCode(String hafuman) throws Exception {
StringBuilder sb = new StringBuilder(100);
Node2 firstNode = this.root;
if(firstNode.leftNode==null || firstNode.rightNode==null){
return String.valueOf(firstNode.aChar);
}
for(int i=0;i<hafuman.length();i++){
if(hafuman.charAt(i)=='0'){
firstNode = firstNode.leftNode;
}
else if(hafuman.charAt(i)=='1'){
firstNode = firstNode.rightNode;
}
else{
// throw new Exception("checked the invalidate char, must be '1' or '0' !");
continue;
}
if(firstNode.leftNode==null){ //找到了原始编码
sb.append(firstNode.aChar);
//找到后一次搜索应该继续从根开始
firstNode = this.root;
}
}
return sb.toString();
}
public static void main(String[] args) {
try {
String str = "这个算法有一点难度,主要是细节的地方。";
System.out.println("原始要转码的字符串:"+str);
HaFuManTree haFuManTree = new HaFuManTree();
haFuManTree.createCodeTable(str);
String coding = haFuManTree.enCode(str);
System.out.println("哈夫曼编码:"+coding); //哈夫曼编码
coding = haFuManTree.deCode(coding);
System.out.println("解码后字符串:"+coding); //原始串
} catch (Exception ex) {
ex.printStackTrace();
}
}
}