package com.monster.huffmancode;
import java.io.*;
import java.util.*;
/**
* @author Monster
* @version v1.0
* @time 04-15-2021 18:50:57
* @description: 赫夫曼编码
*/
public class HuffmanCode {
public static void main(String[] args) {
/*String str = "i like like like java do you like a java i";
// 把字符串转化成字节数组
byte[] bytes = str.getBytes();
System.out.println("压缩前:" + bytes.length);
// 得到赫夫曼压缩数组
byte[] huffmanZip = huffmanZip(bytes);
System.out.println("压缩后:" + huffmanZip.length);
// 将赫夫曼压缩数组解压
byte[] decodes = decode(codes, huffmanZip);
System.out.println("decode = " + Arrays.toString(decodes));
for (byte decode : decodes) {
System.out.print((char) decode);
}*/
/*// 测试压缩文件
String srcFile = "C:\\Users\\Monster\\Desktop\\test\\image.png";
String dstFile = "C:\\Users\\Monster\\Desktop\\test\\image.zip";
zipFile(srcFile, dstFile);
System.out.println("压缩文件完成!!!");*/
// 测试解压文件
String zipFile = "C:\\Users\\Monster\\Desktop\\test\\image.zip";
String dstFile = "C:\\Users\\Monster\\Desktop\\test\\image1111.png";
deZipFile(zipFile, dstFile);
System.out.println("解压文件完成!!!");
}
// 解压哈夫曼编码压缩的文件
private static void deZipFile(String zipFile, String dstFile) {
// 输入流
FileInputStream fis = null;
ObjectInputStream ois = null;
// 输出流,使用对象输出流直接输出 哈夫曼编码压缩的字节流和哈夫曼字符编码流
FileOutputStream fos = null;
try {
fis = new FileInputStream(zipFile);
ois = new ObjectInputStream(fis);
byte[] huffmanZip = (byte[]) ois.readObject();
codes = (Map<Byte, String>) ois.readObject();
byte[] decode = decode(codes, huffmanZip);
fos = new FileOutputStream(dstFile);
fos.write(decode);
} catch (IOException | ClassNotFoundException e) {
e.printStackTrace();
} finally {
// 流的关闭操作
try {
if (fis != null) {
fis.close();
}
if (fos != null) {
fos.close();
}
if (ois != null) {
ois.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
// 使用赫夫曼编码的方式压缩文件
private static void zipFile(String srcFile, String dstFile) {
// 输入流
FileInputStream fis = null;
// 输出流,使用对象输出流直接输出 哈夫曼编码压缩的字节流和哈夫曼字符编码流
FileOutputStream fos = null;
ObjectOutputStream oos = null;
try {
fis = new FileInputStream(srcFile);
// 创建一个和原文件大小相同的字节数组
byte[] bytes = new byte[fis.available()];
// 读取文件
fis.read(bytes);
System.out.println("压缩前:" + bytes.length);
// 压缩原字符数组,得到哈夫曼压缩数组
byte[] huffmanZip = huffmanZip(bytes);
System.out.println("压缩后:" + huffmanZip.length);
// 以对象的形式写入到文件中
fos = new FileOutputStream(dstFile);
oos = new ObjectOutputStream(fos);
oos.writeObject(huffmanZip);
oos.writeObject(codes);
} catch (IOException e) {
e.printStackTrace();
} finally {
// 流的关闭操作
try {
if (fis != null) {
fis.close();
}
if (fos != null) {
fos.close();
}
if (oos != null) {
oos.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
// 封装
private static byte[] huffmanZip(byte[] bytes) {
// 统计bytes 数组中元素出现的次数,作为Node对象放入List集合中
List<Node> nodes = getNodes(bytes);
// 得到赫夫曼树
Node root = getHuffmanTree(nodes);
// 由赫夫曼树得到赫夫曼编码
Map<Byte, String> codes = getCode(root);
// 得到压缩数组
return zip(bytes, codes);
}
/**
* 将使用赫夫曼编码的字节数组解压
*
* @param codes 赫夫曼编码
* @param huffmanZip 赫夫曼编码的压缩数组
* @return 解压后的字节数组
*/
private static byte[] decode(Map<Byte, String> codes, byte[] huffmanZip) {
StringBuilder builder = new StringBuilder();
// 将赫夫曼编码压缩数组变成二进制的字符串
for (int i = 0; i < huffmanZip.length; i++) {
boolean flag = (i == huffmanZip.length - 1);
builder.append(byteToBitString(!flag, huffmanZip[i]));
}
// 将赫夫曼编码的键和值颠倒,方便读取二进制字符串找到匹配的赫夫曼编码--> 字节数组(解码)
Map<String, Byte> decodes = new HashMap<>();
for (Map.Entry<Byte, String> entry : codes.entrySet()) {
decodes.put(entry.getValue(), entry.getKey());
}
List<Byte> list = new ArrayList<>();
for (int i = 0; i < builder.length(); ) {
int count = 1;
boolean flag = true;
while (flag) {
String substring = builder.substring(i, i + count);
Byte aByte = decodes.get(substring);
if (aByte != null) {
list.add(aByte);
flag = false;
} else {
count++;
}
}
i += count;
}
byte[] bytes = new byte[list.size()];
// 把list集合变成byte数组
for (int i = 0; i < list.size(); i++) {
bytes[i] = list.get(i);
}
return bytes;
}
/**
* 将一个byte 转化为二进制字符串
*
* @param flag 是否是最后一个
* @param b 传入的byte
* @return 返回二进制字符串(按补码返回)
*/
private static String byteToBitString(boolean flag, byte b) {
int temp = b; // 将byte转化成为int类型
// 如果b 是正数,我们还需要补高位(因为要保持8 位)
if (flag) {
temp |= 256; // temp 和 256 按位与, 举例,如temp = 1; 即 0000 0001 | 1 0000 0000 = 1 0000 0001,再取后八位即可
}
String binaryString = Integer.toBinaryString(temp);
if (flag) {
return binaryString.substring(binaryString.length() - 8);
} else {
return binaryString;
}
}
/**
* 通过生成的赫夫曼编码,将字符串对应的数组转化成赫夫曼编码压缩后的数组并返回
*
* @param bytes 字符串对应的数组
* @param codes 生成的赫夫曼编码
* @return 赫夫曼编码压缩后的数组
*/
private static byte[] zip(byte[] bytes, Map<Byte, String> codes) {
StringBuilder stringBuilder = new StringBuilder();
for (byte b : bytes) {
stringBuilder.append(codes.get(b));
}
int length = stringBuilder.length();
// 计算byte[] 数组的长度
int len = (length + 7) / 8;
byte[] huffmanCodeBytes = new byte[len];
int index = 0;
String substring;
for (int i = 0; i < length; i += 8) {
if (i + 8 > length - 1) {
substring = stringBuilder.substring(i);
} else {
substring = stringBuilder.substring(i, i + 8);
}
huffmanCodeBytes[index] = (byte) Integer.parseInt(substring, 2);
index++;
}
return huffmanCodeBytes;
}
private static StringBuilder stringBuilder = new StringBuilder();
private static Map<Byte, String> codes = new HashMap<>();
// 方法重载
private static Map<Byte, String> getCode(Node root) {
if (root == null) {
return null;
}
getCode(root.left, "0", stringBuilder);
getCode(root.right, "1", stringBuilder);
return codes;
}
/**
* 生成赫夫曼树对应的赫夫曼编码
*
* @param node 当前节点
* @param code 路径:左子节点:0 右子节点:1
* @param stringBuilder 拼接的路径
*/
private static void getCode(Node node, String code, StringBuilder stringBuilder) {
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
if (node != null) {
stringBuilder2.append(code);
if (node.data == null) {
getCode(node.left, "0", stringBuilder2);
getCode(node.right, "1", stringBuilder2);
} else {
codes.put(node.data, stringBuilder2.toString());
}
}
}
// 生成赫夫曼树
private static Node getHuffmanTree(List<Node> nodes) {
while (nodes.size() > 1) {
Collections.sort(nodes);
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
Node parent = new Node(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
return nodes.get(0);
}
// 前序遍历
private static void preOrder(Node root) {
if (root != null) {
root.preOrder();
} else {
System.out.println("根節點為空!");
}
}
// 统计bytes 数组中元素出现的次数,作为Node对象放入List集合中
private static List<Node> getNodes(byte[] bytes) {
List<Node> list = new ArrayList<>();
Map<Byte, Integer> map = new HashMap<>();
for (byte data : bytes) {
if (map.containsKey(data))
map.put(data, map.get(data) + 1);
else
map.put(data, 1);
}
for (Map.Entry<Byte, Integer> entry : map.entrySet()) {
list.add(new Node(entry.getKey(), entry.getValue()));
}
return list;
}
}
class Node implements Comparable<Node> {
Byte data;
int weight;
Node left;
Node right;
public Node(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", weight=" + weight +
'}';
}
// 前序遍历
public void preOrder() {
System.out.println(this);
if (this.left != null) {
this.left.preOrder();
}
if (this.right != null) {
this.right.preOrder();
}
}
@Override
public int compareTo(Node o) {
return this.weight - o.weight;
}
}
03-18
632
10-11
3659