哈夫曼编码
1. 步骤
- 传输的字符串:
i like like like java do you like a java
。 - 计算各个字符对应的个数。
- 按照字符出现的次数构建一颗赫夫曼树, 次数作为权值。
- 根据赫夫曼树,给各个字符,规定编码 (前缀编码), 向左的路径为 0 向右的路径为 1。则有,
o: 1000 u: 10010 d: 100110 y: 100111 i: 101 a : 110 k: 1110 e: 1111 j: 0000 v: 0001 l: 001 : 01
。 - 按照上面的赫夫曼编码,
i like like like java do you like a java
字符串对应的编码为1010100110111101111010011011110111101001101111011110100001100001110011001111000011001111000100100100110111101111011100100001100001110
。
2. 数据压缩
-
定义哈夫曼树节点类
// 定义节点类 class HuffmanCodeNode implements Comparable<HuffmanCodeNode>{ private Byte character; private int value; private HuffmanCodeNode leftNode; private HuffmanCodeNode rightNode; // 前序遍历 public void preOrder() { System.out.println(this); if (this.leftNode != null) { this.leftNode.preOrder(); } if (this.rightNode != null) { this.rightNode.preOrder(); } } public HuffmanCodeNode(Byte character, int value) { this.character = character; this.value = value; } public Byte getCharacter() { return character; } public void setCharacter(byte character) { this.character = character; } public int getValue() { return value; } public void setValue(int value) { this.value = value; } public HuffmanCodeNode getLeftNode() { return leftNode; } public void setLeftNode(HuffmanCodeNode leftNode) { this.leftNode = leftNode; } public HuffmanCodeNode getRightNode() { return rightNode; } public void setRightNode(HuffmanCodeNode rightNode) { this.rightNode = rightNode; } @Override public String toString() { return "HuffmanCodeNode{" + "character=" + character + ", value=" + value + '}'; } @Override public int compareTo(HuffmanCodeNode o) { // 升序排序 return this.value - o.value; } }
-
得到每个节点的数据与权值
public static List<HuffmanCodeNode> getNodes(String str) { // 存放所有的节点 ArrayList<HuffmanCodeNode> nodes = new ArrayList<>(); // 临时存放节点的符号与权值 HashMap<Byte, Integer> nodeMap = new HashMap<>(); byte[] strBytes = str.getBytes(); // 计算字符串中每个字符的权值 for (byte strByte : strBytes) { if (nodeMap.get(strByte) == null) { nodeMap.put(strByte, 1); } else { nodeMap.put(strByte, nodeMap.get(strByte) + 1); } } Set<Map.Entry<Byte, Integer>> entrySet = nodeMap.entrySet(); // 将所有节点加入到列表中 for (Map.Entry<Byte, Integer> byteIntegerEntry : entrySet) { nodes.add(new HuffmanCodeNode(byteIntegerEntry.getKey(), byteIntegerEntry.getValue())); } // 返回所有节点的符号与权值 return nodes; }
-
根据节点构造哈夫曼树
// 构造哈夫曼树 public static HuffmanCodeNode createHuffmanCodeTree(List<HuffmanCodeNode> huffmanCodeNodes) { while (huffmanCodeNodes.size() > 1) { // 根据权值对节点进行升序排序 Collections.sort(huffmanCodeNodes); // 获取最小权值的节点 HuffmanCodeNode leftNode = huffmanCodeNodes.remove(0); // 获取第二小权值的节点 HuffmanCodeNode rightNode = huffmanCodeNodes.remove(0); // 构造父节点 HuffmanCodeNode parent = new HuffmanCodeNode(null, leftNode.getValue() + rightNode.getValue()); parent.setLeftNode(leftNode); parent.setRightNode(rightNode); // 将父节点添加进列表中,继续参与运算 huffmanCodeNodes.add(parent); } // 返回树的根节点 return huffmanCodeNodes.get(0); }
-
得到每个节点(字符)的哈夫曼编码
static StringBuilder stringBuilder = new StringBuilder(); // 保存符号与该符号对应的哈夫曼编码 static HashMap<Byte, String> hashMap = new HashMap<>(); // 重载 getHuffmanCodes public static Map<Byte, String> getHuffmanCodes(HuffmanCodeNode root) { if (root == null) { return null; } // 向左递归 getHuffmanCodes(root.getLeftNode(), "0", stringBuilder); // 向右递归 getHuffmanCodes(root.getRightNode(), "1", stringBuilder); return hashMap; } // 得到哈夫曼编码 private static void getHuffmanCodes(HuffmanCodeNode node, String code, StringBuilder sb) { StringBuilder stringBuilder = new StringBuilder(sb); // 追加当前节点的编码 stringBuilder.append(code); if (node != null) { if (node.getCharacter() == null) { // 如果是非叶子节点 // 向左递归 getHuffmanCodes(node.getLeftNode(), "0", stringBuilder); // 向右递归 getHuffmanCodes(node.getRightNode(), "1", stringBuilder); } else { // 如果是叶子节点 hashMap.put(node.getCharacter(), stringBuilder.toString()); } } }
-
压缩字符串
/** * 使用哈夫曼编码压缩字符串 * @param strBytes 要压缩的字符串的字节数组 * @param huffmanCodes 节点的哈夫曼编码 * @return 压缩后的字节数组 */ public static byte[] zip(byte[] strBytes, Map<Byte, String> huffmanCodes) { StringBuilder sb = new StringBuilder(); byte[] huffmanCodesBytes = null; for (byte charByte : strBytes) { sb.append(huffmanCodes.get(charByte)); } if (sb.length() % 8 == 0) { huffmanCodesBytes = new byte[sb.length() / 8]; } else { huffmanCodesBytes = new byte[sb.length() / 8 + 1]; } int index = 0; for (int i = 0; i < sb.length(); i += 8) { if (i + 8 > sb.length()) { huffmanCodesBytes[index] = (byte) Integer.parseInt(sb.substring(i), 2); } else { huffmanCodesBytes[index] = (byte) Integer.parseInt(sb.substring(i, i + 8), 2); } index++; } return huffmanCodesBytes; }
-
整合代码,方便使用
// 重载 zip() 方法 public static byte[] zip(String str) { byte[] strBytes = str.getBytes(); List<HuffmanCodeNode> nodes = getNodes(str); HuffmanCodeNode root = createHuffmanCodeTree(nodes); Map<Byte, String> huffmanCodes = getHuffmanCodes(root); byte[] huffmanCodesBytes = zip(str.getBytes(), huffmanCodes); return huffmanCodesBytes; }
-
测试
String str = "i like like like java do you like a java"; byte[] zip = HuffmanCodes.zip(str); System.out.println(Arrays.toString(zip));
输出:
[-88, -65, -56, -65, -56, -65, -55, 77, -57, 6, -24, -14, -117, -4, -60, -90, 28]
3. 数据解压
-
将压缩后的字节转为二进制字符串
// 字节转二进制字符串 public static String byteToBitString(boolean flag, byte b) { int temp = b; if (flag) { temp |= 256; } String str = Integer.toBinaryString(temp); if (flag) { return str.substring(str.length() - 8); } else { return str; } }
-
将字符串的哈夫曼编码转回为字符序列
// 将哈夫曼编码转回字符序列 public static byte[] decode(byte[] huffmanCodesBytes, Map<Byte, String> huffmanCodes) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < huffmanCodesBytes.length; i++) { byte temp = huffmanCodesBytes[i]; boolean flag = (i == huffmanCodesBytes.length - 1); sb.append(byteToBitString(!flag, temp)); } HashMap<String, Byte> map = new HashMap<>(); for (Map.Entry<Byte, String> byteStringEntry : huffmanCodes.entrySet()) { map.put(byteStringEntry.getValue(), byteStringEntry.getKey()); } List<Byte> list = new ArrayList<>(); for (int i = 0; i < sb.length();) { int count = 1; boolean flag = true; Byte b = null; while (flag) { String key = sb.substring(i, i + count); b = map.get(key); if (b == null) { count++; } else { flag = false; } } list.add(b); i += count; } byte[] bytes = new byte[list.size()]; for (int i = 0; i < list.size(); i++) { bytes[i] = list.get(i); } return bytes; }