public class Huffman {
public static void main(String[] args) {
String str = "i like Java but i am very vegetable";
byte[] bytes = huffmanZip(str);
System.out.println("压缩后:" + Arrays.toString(bytes));
String s = decode(bytes, huffmanCode);
System.out.println("解压后:" + s);
}
// 解压
public static String decode(byte[] bytes, Map<Byte, String> huffmanCode) {
StringBuilder sb = new StringBuilder();
String s;
for (int i = 0; i < bytes.length; i++) {
int temp = bytes[i];
if (i != bytes.length - 1) {
temp |= 256; // 补位处理
s = Integer.toBinaryString(temp);
s = s.substring(s.length() - 8);
} else {
s = Integer.toBinaryString(temp);
}
sb.append(s);
}
// 字符对应的编码 -> 字符
Map<String, Byte> map = new HashMap<>();
for (Map.Entry<Byte, String> entry : huffmanCode.entrySet()) {
map.put(entry.getValue(), entry.getKey());
}
List<Byte> list = new ArrayList();
for (int i = 0; i < sb.length();) {
int count = 1;
boolean flag = true;
while (flag) {
String key = sb.substring(i, i + count);
if(map.containsKey(key)){
list.add(map.get(key));
flag = false;
}else{
count++;
}
}
i = i + count;
}
byte[] bytesDecode = new byte[list.size()];
for (int i = 0; i < list.size(); i++) {
bytesDecode[i] = list.get(i);
}
return new String(bytesDecode);
}
// 压缩。将字符串 str 压缩为一个 byte 数组
public static byte[] huffmanZip(String str) {
// 创建哈夫曼树
Node huffmanTree = createHuffmanTree(str);
// 得到哈夫曼树对应的哈夫曼编码表
Map<Byte, String> huffmanCode = createHuffmanCode(huffmanTree);
byte[] bytes = huffmanCodeZip(str, huffmanCode);
return bytes;
}
// 由于每个字符对应的编码是使用字符串来表示的,因此可能会出现将每个字符转换成编码后字符串长度还增加的现象,因此还需要再次压缩
// 比如:"abcde" 会被转换为 "110111000110",字符串的长度反而增加了,因此我们还需要将 "110111000110" 转换成 byte 数组
public static byte[] huffmanCodeZip(String str, Map<Byte, String> huffmanCode) {
// 将字符串(如:"abcde")转成哈夫曼编码对应的二进制的字符串表示("110111000110")
byte[] bytes = str.getBytes();
StringBuilder sb = new StringBuilder();
for (byte b : bytes) {
sb.append(huffmanCode.get(b));
}
// 下面一行代码等价:if(len % 8) == 0 { len = sb.length() / 8} else { len = sb.length() / 8 + 1}
int len = (sb.length() + 7) / 8;
byte[] huffmanCodeBytes = new byte[len];
int index = 0;
for (int i = 0; i < sb.length(); i += 8) { // 每 8 位对应一个 byte
if (i + 8 > sb.length()) {
huffmanCodeBytes[index] = (byte) Integer.parseInt(sb.substring(i), 2);
} else {
huffmanCodeBytes[index] = (byte) Integer.parseInt(sb.substring(i, i + 8), 2);
index++;
}
}
return huffmanCodeBytes;
}
// 根据字符串构造哈夫曼树
public static Node createHuffmanTree(String str) {
byte[] bytes = str.getBytes(); // 得到 str 中每个字符的 byte 字节
// System.out.println(Arrays.toString(content));
Map<Byte, Integer> map = new HashMap<>(); // 字符(的字节表示) -> 字符在 str 中出现的次数
for (Byte b : bytes) {
map.put(b, map.getOrDefault(b, 0) + 1);
}
List<Node> nodes = new ArrayList();
for (Map.Entry<Byte, Integer> entry : map.entrySet()) {
Node node = new Node(entry.getKey(), entry.getValue());
nodes.add(node);
}
while (nodes.size() > 1) {
Collections.sort(nodes, new Comparator<Node>() { // 按照权值(字符出现的次数)由小到大进行排序
@Override
public int compare(Node o1, Node o2) {
return o1.weight - o2.weight;
}
});
// 得到权值最小的两个结点
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
// 上面两个结点的父结点的权值是上面两个结点的权值之和
Node parent = new Node(null, rightNode.weight + leftNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(rightNode);
nodes.remove(leftNode);
nodes.add(parent);
}
return nodes.get(0);
}
// 存放每个字符对应的编码(字符 -> 字符对应的编码)
static Map<Byte, String> huffmanCode = new HashMap<>();
// 生成哈夫曼树对应的哈夫曼编码表,并将其保存在 Map 中。规定左 0 右 1
public static Map<Byte, String> createHuffmanCode(Node root) {
if (root == null) {
return null;
}
getPath(root, "");
return huffmanCode;
}
public static void getPath(Node root, String path) {
if (root == null) {
return;
}
if (root.left == null && root.right == null) {
huffmanCode.put(root.value, path);
return;
}
getPath(root.left, path + "0");
getPath(root.right, path + "1");
}
}
class Node {
Byte value; // 存放字符本身,比如 'a' -> 97 'b' -> 98
int weight; // 字符出现的次数
Node left;
Node right;
public Node(Byte value, int weight) {
this.value = value;
this.weight = weight;
}
}
哈夫曼编码
最新推荐文章于 2024-07-31 19:38:21 发布