目标:将字符串“can you can a can as a can canner can a can.”编码再解码
流程:
- 将字符串转成bytes (byte[]格式)(eg.[99,97,110....])
- 霍夫曼编码
- 统计bytes中各字节出现的次数,存于Map中(形如:{“字节”:c,“次数”:11}....),继而创建出对应的List<Node>
- 构建霍夫曼树
- 将List<node>中按node的value排序
- 合并最小的二个node(一开始是node,后面是树),构成一个新树
- 将新树加入List<node>中,并移除最小的node(一开始是node,后面是树)
- 循环至List<node>中只剩一颗树
- 利用霍夫曼树构建霍夫曼编码表(个人感觉是难点)
- 利用霍夫曼编码表编码bytes获得霍夫曼码(只有0、1)
- 将霍夫曼码转成字节数组(byte[]格式,8个一组,剩余直接转)
- 霍夫曼解码
- 将字节数组转成霍夫曼码(只有0、1)
- 对照霍夫曼编码表去解码霍夫曼码
- 将byte[]格式转字符串输出
代码实现:
霍夫曼编解码代码
package demo10;
import java.nio.file.attribute.AclEntry.Builder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64.Decoder;
import java.util.List;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
public class WangTestHuffmanCode {
public static void main(String[] args) {
String msg = "can you can a can as a can canner can a can.";
//将字符串转字节
byte[] bytes = msg.getBytes();
//霍夫曼编码
byte[] b = huffmanZip(bytes);
//霍夫曼解码
byte[] newBytes = decoder(huffList, b);
//将字节转字符串
System.out.println(new String(newBytes));
}
/**
* 霍夫曼解码
* @param huffList2
* @param b
* @return
*/
private static byte[] decoder(Map<Byte, String> huffList, byte[] b) {
//将b转成二进制字符串(非最后以为转成8位字符串,最后一位正常转)
StringBuffer sb = new StringBuffer();
for(int i=0; i<b.length; i++) {
if(i<b.length-1) {
String binStr = Integer.toBinaryString((b[i] & 0xFF) + 0x100).substring(1);
sb.append(binStr);
}else {
String binStr = Integer.toBinaryString(b[i] & 0xFF);
sb.append(binStr);
}
}
System.out.println(sb);
//对照huffList,转化成byte数组
//重定义霍夫曼编码表
HashMap<String, Byte> newhuffList = new HashMap<>();
for(Map.Entry<Byte, String> entry:huffList.entrySet()){
newhuffList.put(entry.getValue(), entry.getKey());
}
//对应霍夫曼表转成字节,保存在list中
ArrayList<Byte> arrayList = new ArrayList<Byte>();
for(int i= 0; i<sb.length();) {
int count= 1;
boolean flag = true;
Byte pdbyte = null;
while(flag) {
String substring = sb.substring(i, i+count);
pdbyte = newhuffList.get(substring);
if(pdbyte== null) {
count++;
}else {
flag= false;
}
}
i+=count;
arrayList.add(pdbyte);
}
//将list转成数组
byte[] bytes = new byte[arrayList.size()];
for(int i=0; i<arrayList.size(); i++) {
bytes[i]= arrayList.get(i);
}
return bytes;
}
/**
* 霍夫曼编码
* @param bytes
* @return
*/
private static byte[] huffmanZip(byte[] bytes) {
//统计各字符出现的次数,转化成树节点list
List<Node> nodes = getNodes(bytes);
//构建霍夫曼树
Node hufTree = createHuffmanTree(nodes);
//创建霍夫曼编码表
Map<Byte, String> hufListMap = createHuffmanList(hufTree);
//编码
byte[] code = zip(bytes, hufListMap);
return code;
}
/**
* 按照霍夫曼表去编码字符串
* @param bytes
* @param hufListMap
* @return
*/
private static byte[] zip(byte[] bytes, Map<Byte, String> hufListMap) {
//创建一个变量来存储字符对应的霍夫曼码
StringBuilder sb= new StringBuilder();
for(byte b :bytes) {
sb.append(hufListMap.get(b));
}
//确定转出的2进制数对应多少个字节
int length;
if(sb.length()%8==0) {
length = sb.length()/8;
}else {
length = sb.length()/8+1;
}
//每8个为一组,多余的另算
byte zipbytes[] = new byte[length];
int index = 0;
for(int i=0; i<sb.length(); i+=8) {
String strByte;
if(i+8>sb.length()) {
strByte = sb.substring(i);
}else {
strByte = sb.substring(i, i+8);
}
zipbytes[index] = (byte)Integer.parseInt(strByte, 2);
index++;
}
return zipbytes;
}
//存储暂时路径的变量
static StringBuilder sb = new StringBuilder();
//用于存储赫夫曼编码
static Map<Byte, String> huffList= new HashMap<>();
/**
* 将霍夫曼树转成霍夫曼编码表
* @param hufTree
* @return
* 输出霍夫曼树,输出霍夫曼表
* 若空,返回;若有,遍历左,给路径加0,遍历右,给路径加1;
* 递归至叶节点,保存霍夫曼表
*/
private static Map<Byte, String> createHuffmanList(Node hufTree) {
if(hufTree==null) {
return null;
}else {
//处理霍夫曼树的左子树
transferToList(hufTree.left, "0", sb);
//处理霍夫曼树的右子树
transferToList(hufTree.right, "1", sb);
return huffList;
}
}
private static void transferToList(Node node, String code, StringBuilder sb) {
StringBuilder sb2 = new StringBuilder(sb);
sb2.append(code);
if(node.data==null) {//非叶节点
transferToList(node.left, "0", sb2);
transferToList(node.right, "1", sb2);
}else {
huffList.put(node.data, sb2.toString());
}
}
/**
* 创建赫夫曼树
* @param nodes
* @return
*/
private static Node createHuffmanTree(List<Node> nodes) {
while(nodes.size()>1) {
//按树中的value排序
Collections.sort(nodes);
//System.out.println(nodes);
//取出、合并最小的两个树
Node little1 = nodes.get(nodes.size()-1);
Node little2 = nodes.get(nodes.size()-2);
Node newNode = new Node(null, (little1.weight+little2.weight));
//原2个二叉树为新创建的子树
newNode.left = little1;
newNode.right = little2;
//移除最小的两个树
nodes.remove(little1);
nodes.remove(little2);
//将合并后的树加入集合,重新排序(循环)
nodes.add(newNode);
}
return nodes.get(0);
}
/**
* 将字节转成List
* @param bytes
* @return 存储node的List
*/
private static List<Node> getNodes(byte[] bytes) {
List<Node> nodes = new ArrayList<>();
//存储每一个字符及出现次数
Map<Byte, Integer> counts = new HashMap<>();
//遍历有哪些字符,并记录数量
for(byte b: bytes) {
if(counts.get(b)==null) {
counts.put(b, 1);
}else {
counts.put(b, counts.get(b)+1);
}
}
//将每一个enter转成一个node,放入list中
for(Map.Entry<Byte, Integer> entry: counts.entrySet()) {
nodes.add(new Node(entry.getKey(), entry.getValue()));
}
return nodes;
}
}
树节点创建代码:
package demo10;
public class Node implements Comparable<Node> {
Byte data;
int weight;
Node left;
Node right;
public Node(Byte data,int weight) {
this.data=data;
this.weight=weight;
}
@Override
public String toString() {
return "Node [data=" + data + ", weight=" + weight + "]";
}
@Override
public int compareTo(Node o) {
return o.weight-this.weight;
}
}