1,根据被压缩的内容生成哈夫曼树。
2,根据哈夫曼树生成每个字符对应的哈夫曼编码。
3,对原文进行压缩。
package tree;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
class HCNode implements Comparable<HCNode>{
Byte data;
int weight;
HCNode left;
HCNode right;
public HCNode(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public String toString() {
return "HCNode [data=" + data + ", weight=" + weight + "]";
}
@Override
public int compareTo(HCNode o) {
// TODO Auto-generated method stub
return this.weight - o.weight;
}
}
public class HuffmanCode{
HCNode root;
//存储要压缩的内容
String content;
//存储哈夫曼编码表
Map<Byte, String> HuffmanCodes = new HashMap<Byte, String>();
public HuffmanCode(String content) {
this.content = content;
creatHuffmanTree();
StringBuilder sb = new StringBuilder();
getCodes(root, "", sb);
System.out.println(HuffmanCodes.toString());
}
/**
* 根据要压缩的内容生成一个哈夫曼树
* @param content 被压缩的内容
*/
public void creatHuffmanTree() {
byte[] conBytes = content.getBytes();
Map<Byte, Integer> map = new HashMap<Byte, Integer>();
for(byte b : conBytes) {
Integer count = map.get(b);
if(count == null) {
map.put(b, 1);
}else {
map.put(b, count+1);
}
}
List<HCNode> nodes = new ArrayList<HCNode>();
for(Map.Entry<Byte, Integer> entry : map.entrySet()) {
nodes.add(new HCNode(entry.getKey(), entry.getValue()));
}
while(nodes.size() > 1) {
Collections.sort(nodes);
HCNode leftNode = nodes.get(0);
HCNode rightNode = nodes.get(1);
HCNode parent = new HCNode(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
this.root = nodes.get(0);
}
/**
* 按照哈夫曼树生成哈夫曼编码表
* @param node 节点
* @param code 路径,向左为0,向右为1
* @param sb 用于拼接路径
*/
public void getCodes(HCNode node, String code, StringBuilder sb) {
if(node == null) {
return;
}else {
StringBuilder sb2 = new StringBuilder(sb);
sb2.append(code);
if(node.data == null) {
getCodes(node.left, "0", sb2);
getCodes(node.right, "1", sb2);
}else {
HuffmanCodes.put(node.data, sb2.toString());
}
}
}
public byte[] zip() {
byte[] bytes = content.getBytes();
StringBuilder sb = new StringBuilder();
for(byte b : bytes) {
sb.append(HuffmanCodes.get(b));
}
int len = (sb.length() + 7) / 8;
byte[] zipBytes = new byte[len];
int index = 0;
for(int i=0; i<sb.length(); i += 8) {
if(i + 8 > sb.length()) {
zipBytes[index++] = (byte) Integer.parseInt(sb.substring(i), 2);
}else {
zipBytes[index++] = (byte) Integer.parseInt(sb.substring(i, i+8), 2);
}
}
return zipBytes;
}
public static void main(String[] args) {
String content = "i like like like java do you like a java";
HuffmanCode hf = new HuffmanCode(content);
byte[] bytes = hf.zip();
System.out.println(Arrays.toString(bytes));
}
}
//32=01, 97=100, 100=11000, 117=11001, 101=1110, 118=11011, 105=101, 121=11010, 106=0010, 107=1111, 108=000, 111=0011}
//[-88, -65, -56, -65, -56, -65, -55, 77, -57, 6, -24, -14, -117, -4, -60, -90, 28]