压缩:
1.将文件中读到的字节进行整理,统计各个字节出现次数
2.将次数与值添加到List中,并进行排序
3.根据List生成哈夫曼树
4.根据哈夫曼树生成哈夫曼编码
5.对应哈夫曼编码生成新的bytes,并将bytes和哈夫曼编码一同输出到压缩文件中
解压:
1.先读取数据和哈夫曼编码
2.反转哈夫曼编码(key,value互换)
3.对应反转后的哈夫曼编码恢复原来文件
3.输出恢复好的文件
注意:当生成的压缩后编码最后一位中时0开头时,转换byte会数据丢失,所以特殊处理,将bytes的第一位存储丢失的0的个数
package com.wangyq.datastructrue.huffman;
import java.io.*;
import java.util.*;
public class Huffman {
public static void main(String[] args) {
String str = "哈夫曼编码压缩与解压。";
String zipFile = "D:\\123.jpg";
String dstFile = "D:\\123.huZip";
String newFile = "D:\\new123.jpg";
//压缩
zipFile(zipFile, dstFile);
//解压
unZipFile(dstFile, newFile);
}
/**
* 压缩文件
*
* @param zipFile 被压缩文件路径
* @param dstFile 压缩文件输出路劲
*/
private static void zipFile(String zipFile, String dstFile) {
FileInputStream inputStream = null;
FileOutputStream outputStream = null;
ObjectOutputStream objectOutputStream = null;
try {
//获取文件输入流
inputStream = new FileInputStream(zipFile);
//获取文件输出流
outputStream = new FileOutputStream(dstFile);
objectOutputStream = new ObjectOutputStream(outputStream);
//定义bytes长度并读取
byte[] bytes = new byte[inputStream.available()];
inputStream.read(bytes);
//获取哈夫曼编码
Map huffmanCode = getHuffmanCode(bytes);
//打印哈夫曼编码
System.out.println("哈夫曼编码:" + Arrays.toString(huffmanCode.entrySet().toArray()));
//进行编码,压缩
byte[] zipByte = zip(bytes, huffmanCode);
objectOutputStream.writeObject(zipByte);
objectOutputStream.writeObject(huffmanCode);
} catch (Exception e) {
System.out.println(e.getMessage());
} finally {
try {
inputStream.close();
outputStream.close();
objectOutputStream.close();
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
System.out.println("压缩成功!!");
}
/**
* 解压文件
*
* @param dstFile 被解压文件路径
* @param newFile 新文件路径
*/
private static void unZipFile(String dstFile, String newFile) {
FileInputStream inputStream = null;
ObjectInputStream objectInputStream = null;
FileOutputStream outputStream = null;
try {
inputStream = new FileInputStream(dstFile);
objectInputStream = new ObjectInputStream(inputStream);
outputStream = new FileOutputStream(newFile);
//读取文件和哈夫曼编码
byte[] bytes = (byte[]) objectInputStream.readObject();
Map huffmanCode = (Map) objectInputStream.readObject();
//解码
byte[] unZipByte = unZip(bytes, huffmanCode);
outputStream.write(unZipByte);
} catch (Exception e) {
System.out.println(e.getMessage());
} finally {
try {
inputStream.close();
objectInputStream.close();
outputStream.close();
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
System.out.println("解压成功!!");
}
private static Map getHuffmanCode(byte[] bytes) {
//统计各个字符出现次数
Map<Byte, Integer> count = countNums(bytes);
//将map转化为node进行存储
List<Node> nodeList = new ArrayList();
for (Map.Entry<Byte, Integer> nodeEntry : count.entrySet()) {
Node node = new Node(nodeEntry.getValue(), nodeEntry.getKey());
nodeList.add(node);
}
//对nodeList进行升序排列
nodeList.sort(new Node());
//生成哈夫曼树
HuffmanTree huffmanTree = new HuffmanTree(nodeList);
//获取哈夫曼编码
Map huffmanCode = huffmanTree.getHuffmanCode();
return huffmanCode;
}
private static byte[] unZip(byte[] zipByte, Map<Byte, String> huffmanCode) {
//先获取反向编码表
Map<String, Byte> unHuffmanEntry = new HashMap<>();
for (Map.Entry<Byte, String> huffmanEntry : huffmanCode.entrySet()) {
unHuffmanEntry.put(huffmanEntry.getValue(), huffmanEntry.getKey());
}
//将byte转为String
StringBuilder stringBuilder = new StringBuilder();
//获取最后一位丢失几个0
int num = zipByte[0];
for (int i = 1; i < zipByte.length; i++) {
StringBuilder byteString = new StringBuilder(byteToBitString(i != zipByte.length - 1, zipByte[i]));
//填中最后一位丢失的0
if (i == zipByte.length - 1 && num > 0) {
for (int j = 0; j < num; j++) {
stringBuilder.append("0");
}
}
stringBuilder.append(byteString);
}
//对应哈夫曼编码进行解析
String str = stringBuilder.toString();
StringBuilder key = new StringBuilder();
List<Byte> unZIpList = new ArrayList<>();
for (int i = 0; i < str.length(); i++) {
key.append(str.substring(i, i + 1));
if (unHuffmanEntry.get(key.toString()) != null) {
unZIpList.add(unHuffmanEntry.get(key.toString()));
key = new StringBuilder();
}
}
//将list中的值放到byte中并返回
byte[] unZipByte = new byte[unZIpList.size()];
for (int i = 0; i < unZipByte.length; i++) {
unZipByte[i] = unZIpList.get(i);
}
return unZipByte;
}
//转化二进制
private static String byteToBitString(boolean flag, byte b) {
int temp = b;
if (flag || b < 0) {
temp |= 256;
}
String str = Integer.toBinaryString(temp);
//todo 长度判断
if (flag || str.length() > 8) {
return str.substring(str.length() - 8);
} else {
return str;
}
}
private static byte[] zip(byte[] strBytes, Map huffmanCode) {
//通过哈夫曼编码生成编码后字符串
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < strBytes.length; i++) {
byte b = strBytes[i];
stringBuilder.append(huffmanCode.get(b));
}
String huffManString = stringBuilder.toString();
//第一位记录最后一位记录0的个数
byte[] bytes = new byte[((huffManString.length() + 7) / 8) + 1];
int index = 1;
for (int i = 0; i < huffManString.length(); i += 8) {
if (i + 8 < huffManString.length()) {
bytes[index] = (byte) Integer.parseInt(huffManString.substring(i, i + 8), 2);
} else {
String lastStr = huffManString.substring(i);
int n = lastStr.length() - (String.valueOf(Integer.parseInt(lastStr)).length());
bytes[index] = (byte) Integer.parseInt(lastStr, 2);
bytes[0] = (byte) n;
}
index++;
}
return bytes;
}
/**
* 统计每种字符出现的数量
*
* @param bytes
* @return
*/
private static Map countNums(byte[] bytes) {
Map count = new HashMap<>();
for (int i = 0; i < bytes.length; i++) {
byte b = bytes[i];
count.put(b, null == count.get(b) ? 0 : (int) count.get(b) + 1);
}
return count;
}
}
class HuffmanTree {
private Node root = null;
HuffmanTree(List<Node> nodeList) {
if (null == nodeList || nodeList.isEmpty()) {
return;
}
while (nodeList.size() > 1) {
//获取第一个节点
Node noed1 = nodeList.get(0);
//获取第二个节点
Node noed2 = nodeList.get(1);
//获取权值
int weight1 = noed1.getWeight();
int weight2 = noed2.getWeight();
//生成子树父节点并绑定
Node newNode = new Node(weight1 + weight2, null);
newNode.setLeft(noed1);
newNode.setRight(noed2);
//删除1,2节点
nodeList.remove(noed1);
nodeList.remove(noed2);
//添加新节点
nodeList.add(newNode);
//重新排序
nodeList.sort(new Node());
}
root = nodeList.get(0);
}
/**
* 后序遍历
*/
public void postorder() {
if (null == root) {
return;
}
root.postorder();
}
public Map getHuffmanCode() {
Map huffmanCode = new HashMap();
if (root == null) {
return null;
}
root.getHuffmanCode(huffmanCode, new StringBuilder(""));
return huffmanCode;
}
}
class Node implements Comparator<Node> {
private int weight;
private Byte value;
private Node left;
private Node right;
public Node() {
}
@Override
public String toString() {
return "Node{" +
"weight=" + weight +
", value=" + value +
'}';
}
public Node(int weight, Byte value) {
this.weight = weight;
this.value = value;
}
public Byte getValue() {
return value;
}
public void setValue(Byte value) {
this.value = value;
}
public int getWeight() {
return weight;
}
public void setWeight(int weight) {
this.weight = weight;
}
public Node getLeft() {
return left;
}
public void setLeft(Node left) {
this.left = left;
}
public Node getRight() {
return right;
}
public void setRight(Node right) {
this.right = right;
}
@Override
public int compare(Node o1, Node o2) {
Integer weight1 = o1.getWeight();
Integer weight2 = o2.getWeight();
return weight1.compareTo(weight2);
}
/**
* 后序遍历
*/
public void postorder() {
if (left != null) {
left.postorder();
}
if (right != null) {
right.postorder();
}
System.out.println(this);
}
public void getHuffmanCode(Map huffmanCode, StringBuilder huffmanCodeString) {
if (this.left != null) {
this.left.getHuffmanCode(huffmanCode, new StringBuilder(huffmanCodeString).append(0));
}
if (this.right != null) {
this.right.getHuffmanCode(huffmanCode, new StringBuilder(huffmanCodeString).append(1));
}
if (this.value != null) {
huffmanCode.put(this.value, huffmanCodeString.toString());
}
}
}