1.压缩:使用赫夫曼编码进行压缩
题目
构建赫夫曼树
package tree.huffmantree;
import java.util.*;
public class HuffmanCode {
public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte [] contentBytes = content.getBytes();
System.out.println(contentBytes.length);
List<Node1> nodes = getNodes(contentBytes);
//System.out.println(nodes);
//测试创建二叉树
Node1 huffmanTree = createHuffmanTree(nodes);
//前序遍历
preOrder(huffmanTree);
}
//前序遍历
public static void preOrder(Node1 root){
if (root != null){
root.preOrder();
}else {
System.out.println("赫夫曼树为空");
}
}
private static List<Node1> getNodes(byte [] bytes){
//1.创建ArrayList
ArrayList<Node1> node1s = new ArrayList<>();
//遍历bytes 统计乜咯 byte出现的次数,存储每个byte出现的次数 -> map
Map<Byte,Integer> counts = new HashMap<>();
for (byte b : bytes){
Integer count = counts.get(b);
if (count == null){ //说明map中还没有这个字符
counts.put(b,1);
}else {
counts.put(b,count+1);
}
}
//把每个键值对转成一个Node对象,并加入到nodes集合
//遍历map
for (Map.Entry<Byte,Integer> entry : counts.entrySet()){
node1s.add(new Node1(entry.getKey(),entry.getValue()));
}
return node1s;
}
//通过List创建赫夫曼树
private static Node1 createHuffmanTree(List<Node1> nodes){
while (nodes.size() > 1){
//排序 从小到大
Collections.sort(nodes);
//取出第一颗、第二颗最小的二叉树
Node1 leftNode = nodes.get(0);
Node1 rightNode = nodes.get(1);
//创建新的二叉树,新的二叉树没有数据,只有权值
Node1 parent = new Node1(null,leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
//将0,1移除List
nodes.remove(leftNode);
nodes.remove(rightNode);
//parent加入List
nodes.add(parent);
}
//nodes最后剩余就是哈弗曼树的根节点
return nodes.get(0);
}
}
class Node1 implements Comparable<Node1>{
Byte data; //存放数据 按照ascii
int weight; //权值,表示字符出现的次数
Node1 left;
Node1 right;
//前序遍历
public void preOrder(){
System.out.println(this);
if (this.left != null){
this.left.preOrder();
}
if (this.right != null){
this.right.preOrder();
}
}
@Override
public int compareTo(Node1 o) {
return this.weight - o.weight;
}
public Node1(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public String toString() {
return "Node1{" +
"data=" + data +
", weight=" + weight +
'}';
}
}
//生成赫夫曼树对应的赫夫曼编码
//思路:
//1.将赫夫曼编码表存放在Map<Byte,String> 形式
//32->01 97->100 100->11000等等
static Map<Byte,String> huffmanCodes = new HashMap<>();
//2.在生成赫夫曼编码表时需要拼接路径,创建Stringbuilder存储某个叶子节点的路径
static StringBuilder stringBuilder = new StringBuilder();
/**
* 功能:将传入的node节点的所有叶子节点赫夫曼编码得到,并放入到赫夫曼集合中
* @param node 传入节点
* @param code 路径:左子节点是0,右子节点是1
* @param stringBuilder 是用于拼接路径
*/
private static void getCondes(Node1 node,String code, StringBuilder stringBuilder){
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
//将传入的code加入到Stringbuilder2
stringBuilder2.append(code);
if (node != null){
//判断当前node是叶子节点还是非叶子节点
if (node.data == null){ //说明是非叶子节点
//递归处理
//向左递归
getCondes(node.left,"0",stringBuilder2);
//向右递归
getCondes(node.right,"1",stringBuilder2);
}else {//说明是叶子节点
//就表示找到了某个叶子节点最后
huffmanCodes.put(node.data,stringBuilder2.toString());
}
}
}
//编写一个方法,将字符串对应的byte[]数组,通过生成的赫夫曼编码表,返回赫夫曼编码压缩后的byte[]
/**
*
* @param bytes 原始的字符对应的byte[]
* @param huffmanCodes 生成的赫夫曼编码表map
* @return 返回赫夫曼编码处理后的byte[]
* 举例:String content = "i like like like java do you like a java";
* 返回的是字符串"10101000"。。。等等
* =>对应byte[] huffmancodeBytes,即8位对应一个byte,放入到huffmanCodeBytes
* huffmancodeBytes[0] = 10101000(补码) => byte [推导 10101000 => 10101000 -1 => 10100111(反码) => 11011000(原码)]
* huffmancodeBytes[1] = -88
*/
private static byte[] zip(byte [] bytes, Map<Byte,String> huffmanCodes){
//1.利用赫夫曼编码表将传进来的byte数组转成赫夫曼编码字符串
StringBuilder stringBuilder = new StringBuilder();
//遍历bytes数组
for (byte b : bytes){
stringBuilder.append(huffmanCodes.get(b));
}
//按照这个字符串发送肯定是变大了,不行,那么就要将字符串转成byte数组
System.out.println(stringBuilder.toString());
//统计返回的byte[] huffmanCodeBytes 长度
//一句话搞定int len = (stringBuilder.length() + 7) / 8;
int len;
if (stringBuilder.length() % 8 == 0){
len = stringBuilder.length() /8;
}else {
len = stringBuilder.length() /8 + 1;
}
//创建存储压缩后的byte数组
byte [] huffmanCodeBytes = new byte[len];
int index = 0;//记录是第几个byte
for (int i = 0; i < stringBuilder.length(); i += 8){//因为每8位对应一个byte
String strByte;
if (i+8 <= stringBuilder.length()){
strByte = stringBuilder.substring(i,i+8);
}else {
strByte = stringBuilder.substring(i); //-88
}
//将StringByte转成byte数组放入到huffmanCodeBytes
huffmanCodeBytes[index] = (byte) Integer.parseInt(strByte,2);
index++;
}
return huffmanCodeBytes;
}
完整代码
package tree.huffmantree;
import java.util.*;
public class HuffmanCode {
public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte [] contentBytes = content.getBytes();
byte[] bytes = huffmanZip(contentBytes);
System.out.println("压缩后的结果: " + Arrays.toString(bytes));
// System.out.println(contentBytes.length);
//
// List<Node1> nodes = getNodes(contentBytes);
// //System.out.println(nodes);
//
// //测试创建二叉树
// Node1 huffmanTree = createHuffmanTree(nodes);
// //前序遍历
// preOrder(huffmanTree);
//
// //测试是否生成了对应的哈夫曼编码
// Map<Byte, String> huffmancondes = getCondes(huffmanTree);
// System.out.println("生成的赫夫曼编码表" + huffmancondes);
//
// //测试
// byte[] huffmanCodeBytes = zip(contentBytes, huffmancondes);
// System.out.println("huffmanCodeBytes=" + Arrays.toString(huffmanCodeBytes));
}
//封装前面所写的,便于调用
/**
*
* @param bytes 原始字符串对应的字节数组
* @return 返回的是经过赫夫曼编码处理后的字节数组(压缩后的数组)
*/
private static byte[] huffmanZip(byte [] bytes){
//第一步:创建节点
List<Node1> nodes = getNodes(bytes);
//第二步:创建赫夫曼树
Node1 huffmanTree = createHuffmanTree(nodes);
//第三步:生成对应的赫夫曼编码(根据赫夫曼树)
Map<Byte, String> hufumanCodes = getCondes(huffmanTree);
//第四步:根据赫夫曼编码压缩,生成赫夫曼字节数组
byte[] huffmanBytes = zip(bytes, hufumanCodes);
return huffmanBytes;
}
//编写一个方法,将字符串对应的byte[]数组,通过生成的赫夫曼编码表,返回赫夫曼编码压缩后的byte[]
/**
*
* @param bytes 原始的字符对应的byte[]
* @param huffmanCodes 生成的赫夫曼编码表map
* @return 返回赫夫曼编码处理后的byte[]
* 举例:String content = "i like like like java do you like a java";
* 返回的是字符串"10101000"。。。等等
* =>对应byte[] huffmancodeBytes,即8位对应一个byte,放入到huffmanCodeBytes
* huffmancodeBytes[0] = 10101000(补码) => byte [推导 10101000 => 10101000 -1 => 10100111(反码) => 11011000(原码)]
* huffmancodeBytes[1] = -88
*/
private static byte[] zip(byte [] bytes, Map<Byte,String> huffmanCodes){
//1.利用赫夫曼编码表将传进来的byte数组转成赫夫曼编码字符串
StringBuilder stringBuilder = new StringBuilder();
//遍历bytes数组
for (byte b : bytes){
stringBuilder.append(huffmanCodes.get(b));
}
//按照这个字符串发送肯定是变大了,不行,那么就要将字符串转成byte数组
System.out.println(stringBuilder.toString());
//统计返回的byte[] huffmanCodeBytes 长度
//一句话搞定int len = (stringBuilder.length() + 7) / 8;
int len;
if (stringBuilder.length() % 8 == 0){
len = stringBuilder.length() /8;
}else {
len = stringBuilder.length() /8 + 1;
}
//创建存储压缩后的byte数组
byte [] huffmanCodeBytes = new byte[len];
int index = 0;//记录是第几个byte
for (int i = 0; i < stringBuilder.length(); i += 8){//因为每8位对应一个byte
String strByte;
if (i+8 <= stringBuilder.length()){
strByte = stringBuilder.substring(i,i+8);
}else {
strByte = stringBuilder.substring(i); //-88
}
//将StringByte转成byte数组放入到huffmanCodeBytes
huffmanCodeBytes[index] = (byte) Integer.parseInt(strByte,2);
index++;
}
return huffmanCodeBytes;
}
//生成赫夫曼树对应的赫夫曼编码
//思路:
//1.将赫夫曼编码表存放在Map<Byte,String> 形式
//32->01 97->100 100->11000等等
static Map<Byte,String> huffmanCodes = new HashMap<>();
//2.在生成赫夫曼编码表时需要拼接路径,创建Stringbuilder存储某个叶子节点的路径
static StringBuilder stringBuilder = new StringBuilder();
//为了调用方便重载getCondes
private static Map<Byte,String> getCondes(Node1 root){
if (root == null){
return null;
}
//处理root
getCondes(root,"",stringBuilder);
return huffmanCodes;
}
/**
* 功能:将传入的node节点的所有叶子节点赫夫曼编码得到,并放入到赫夫曼集合中
* @param node 传入节点
* @param code 路径:左子节点是0,右子节点是1
* @param stringBuilder 是用于拼接路径
*/
private static void getCondes(Node1 node,String code, StringBuilder stringBuilder){
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
//将传入的code加入到Stringbuilder2
stringBuilder2.append(code);
if (node != null){
//判断当前node是叶子节点还是非叶子节点
if (node.data == null){ //说明是非叶子节点
//递归处理
//向左递归
getCondes(node.left,"0",stringBuilder2);
//向右递归
getCondes(node.right,"1",stringBuilder2);
}else {//说明是叶子节点
//就表示找到了某个叶子节点最后
huffmanCodes.put(node.data,stringBuilder2.toString());
}
}
}
//前序遍历
public static void preOrder(Node1 root){
if (root != null){
root.preOrder();
}else {
System.out.println("赫夫曼树为空");
}
}
private static List<Node1> getNodes(byte [] bytes){
//1.创建ArrayList
ArrayList<Node1> node1s = new ArrayList<>();
//遍历bytes 统计乜咯 byte出现的次数,存储每个byte出现的次数 -> map
Map<Byte,Integer> counts = new HashMap<>();
for (byte b : bytes){
Integer count = counts.get(b);
if (count == null){ //说明map中还没有这个字符
counts.put(b,1);
}else {
counts.put(b,count+1);
}
}
//把每个键值对转成一个Node对象,并加入到nodes集合
//遍历map
for (Map.Entry<Byte,Integer> entry : counts.entrySet()){
node1s.add(new Node1(entry.getKey(),entry.getValue()));
}
return node1s;
}
//通过List创建赫夫曼树
private static Node1 createHuffmanTree(List<Node1> nodes){
while (nodes.size() > 1){
//排序 从小到大
Collections.sort(nodes);
//取出第一颗、第二颗最小的二叉树
Node1 leftNode = nodes.get(0);
Node1 rightNode = nodes.get(1);
//创建新的二叉树,新的二叉树没有数据,只有权值
Node1 parent = new Node1(null,leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
//将0,1移除List
nodes.remove(leftNode);
nodes.remove(rightNode);
//parent加入List
nodes.add(parent);
}
//nodes最后剩余就是哈弗曼树的根节点
return nodes.get(0);
}
}
class Node1 implements Comparable<Node1>{
Byte data; //存放数据 按照ascii
int weight; //权值,表示字符出现的次数
Node1 left;
Node1 right;
//前序遍历
public void preOrder(){
System.out.println(this);
if (this.left != null){
this.left.preOrder();
}
if (this.right != null){
this.right.preOrder();
}
}
@Override
public int compareTo(Node1 o) {
return this.weight - o.weight;
}
public Node1(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public String toString() {
return "Node1{" +
"data=" + data +
", weight=" + weight +
'}';
}
}
2.解压(解码)
//完成数据解压
//思路
//1.将huffmanCodeBytes[-88,-65..]重写先转成赫夫曼编码对应的二进制字符串
//2.赫夫曼编码对应的二进制字符串根据赫夫曼编码转成字符
//编写一个方法,对压缩数据解码
/**
*
* @param huffmanCodes 赫夫曼编码表 map
* @param huffmanBytes 需要解码的字节数组
* @return 返回原来字符串对应的数组
*/
private static byte[] decode(Map<Byte,String> huffmanCodes, byte[] huffmanBytes){
//1.先得到huffmanBytes对应的二进制的字符串
StringBuilder stringBuilder = new StringBuilder();
//将byte数组转成二进制字符串
for (int i = 0; i < huffmanBytes.length; i++) {
//判断是否是最后一个字节
boolean flag = (i == huffmanBytes.length - 1);
stringBuilder.append(byteToBitString(!flag,huffmanBytes[i]));
}
System.out.println("赫夫曼 解码后 对应的二进制字符串:" + stringBuilder.toString());
//把字符串按照指定的赫夫曼编码进行解码
//把赫夫曼编码表的k,v进行调换;因为要反向查询
Map<String,Byte> map = new HashMap<>();
for (Map.Entry<Byte,String> entry : huffmanCodes.entrySet()){
map.put(entry.getValue(),entry.getKey());
}
//System.out.println(map);
//创建一个集合存放byte
List<Byte> list = new ArrayList<>();
for (int i = 0; i < stringBuilder.length();) {
int count = 1; //小的计数器
boolean flag = true;
Byte b = null;
while (flag){
//1010100010111。。。。
String key = stringBuilder.substring(i, i + count);// i 不动让count移动,直到匹配到字符
b = map.get(key);
if (b == null){
count ++;
}else {
flag = false;
}
}
list.add(b);
i += count; //让 i 移动到count
}
//for循环结束后list中存放了所以的字符
//把list放入到byte[] 并返回
byte b [] = new byte[list.size()];
for (int i = 0; i < b.length; i++) {
b[i] = list.get(i);
}
return b;
}
/**
* 将一个byte转成二进制字符串
* @param b 传入一个byte
* @param flag 标志是否需要补高位,true需要补高位,如果是最后一个字节不需要补高位
* @return
*/
private static String byteToBitString(boolean flag, byte b){
//使用变量保存b
int temp = b;//将b转成int
if (flag){
temp |= 256; //按位与256 1 0000 0000 | 0000 0001 =》1 0000 0001
}
String str = Integer.toBinaryString(temp);
if (flag){
return str.substring(str.length() - 8);
}else {
return str;
}
}
3.对文件进行压缩(加入io,通过对象流把赫夫曼编码传入,解压的时候需要用)
//编写一个方法,进行文件压缩
public static void zipFile(String srcFile, String dstFile){
//创建输出流
FileInputStream is = null;
//文件输入流
OutputStream os = null;
ObjectOutputStream oos = null;
try{
is = new FileInputStream(srcFile);
//创建一个和源文件大小一样的btyte[]
byte[] b = new byte[is.available()];
//读取文件
is.read(b);
//直接对源文件压缩
byte[] huffmanBytes = huffmanZip(b);
//创建文件的输出流,存放压缩文件
os = new FileOutputStream(dstFile);
//创建一个和文件输出流关联的ObjectOutPutStream对象流
oos = new ObjectOutputStream(os);
//把赫夫曼编码后的字节数组写入压缩文件
oos.writeObject(huffmanBytes);
//这里我们以对象流的方式写入 赫夫曼编码,为了恢复原文件时使用
//!!!一定要把赫夫曼编码也写入,要不然无法恢复
oos.writeObject(huffmanCodes);
}catch (Exception e){
System.out.println(e.getMessage());
}finally {
try {
is.close();
os.close();
oos.close();
} catch (IOException ex) {
System.out.println(ex);
}
}
}
4.对文件进行解压
//编写一个方法,进行解压
public static void unzipFile(String zipFile,String dstFile){
//文件输入流
InputStream is = null;
//创建输出流
OutputStream os = null;
//对象输入流
ObjectInputStream ois = null;
try {
//创建文件输入流
is = new FileInputStream(zipFile);
//场景和is关联的对象输入流
ois = new ObjectInputStream(is);
//读取byte数组 huffmanBytes
byte [] huffmanBytes = (byte[]) ois.readObject();
//读取赫夫曼编码表
Map<Byte,String> huffmanCode = (Map<Byte,String>)ois.readObject();
//解码
byte [] bytes = decode(huffmanCode,huffmanBytes);
//将bytes数组写入到目标文件
os = new FileOutputStream(dstFile);
//写出数据
os.write(bytes);
} catch (Exception e) {
System.out.println(e.getMessage());
}finally {
try {
os.close();
ois.close();
is.close();
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
}
赫夫曼编码可以压缩所有类型的文件,因为是通过字节进行压缩
完整代码
package tree.huffmantree.ZipFile;
import java.io.*;
import java.util.*;
public class HuffmanZipFile {
public static void main(String[] args) {
//测试压缩文件
String srcFile = "D:\\薛艳春\\桌面\\新建文件夹 (3)\\薛艳春2.pdf";
String dstFile = "D:\\薛艳春\\桌面\\新建文件夹 (3)\\薛艳春2.zip";
zipFile(srcFile,dstFile);
System.out.println("压缩成功~~");
String zipFile = "D:\\薛艳春\\桌面\\新建文件夹 (3)\\dst.zip";
String dstFile2 = "D:\\薛艳春\\桌面\\新建文件夹 (3)\\src2.png";
//unzipFile(zipFile,dstFile2);
}
//编写一个方法,进行解压
public static void unzipFile(String zipFile,String dstFile){
//文件输入流
InputStream is = null;
//创建输出流
OutputStream os = null;
//对象输入流
ObjectInputStream ois = null;
try {
//创建文件输入流
is = new FileInputStream(zipFile);
//场景和is关联的对象输入流
ois = new ObjectInputStream(is);
//读取byte数组 huffmanBytes
byte [] huffmanBytes = (byte[]) ois.readObject();
//读取赫夫曼编码表
Map<Byte,String> huffmanCode = (Map<Byte,String>)ois.readObject();
//解码
byte [] bytes = decode(huffmanCode,huffmanBytes);
//将bytes数组写入到目标文件
os = new FileOutputStream(dstFile);
//写出数据
os.write(bytes);
} catch (Exception e) {
System.out.println(e.getMessage());
}finally {
try {
os.close();
ois.close();
is.close();
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
}
//编写一个方法,进行文件压缩
public static void zipFile(String srcFile, String dstFile){
//创建输出流
FileInputStream is = null;
//文件输入流
OutputStream os = null;
ObjectOutputStream oos = null;
try{
is = new FileInputStream(srcFile);
//创建一个和源文件大小一样的btyte[]
byte[] b = new byte[is.available()];
//读取文件
is.read(b);
//直接对源文件压缩
byte[] huffmanBytes = huffmanZip(b);
//创建文件的输出流,存放压缩文件
os = new FileOutputStream(dstFile);
//创建一个和文件输出流关联的ObjectOutPutStream对象流
oos = new ObjectOutputStream(os);
//把赫夫曼编码后的字节数组写入压缩文件
oos.writeObject(huffmanBytes);
//这里我们以对象流的方式写入 赫夫曼编码,为了恢复原文件时使用
//!!!一定要把赫夫曼编码也写入,要不然无法恢复
oos.writeObject(huffmanCodes);
}catch (Exception e){
System.out.println(e.getMessage());
}finally {
try {
is.close();
os.close();
oos.close();
} catch (IOException ex) {
System.out.println(ex);
}
}
}
//完成数据解压
//思路
//1.将huffmanCodeBytes[-88,-65..]重写先转成赫夫曼编码对应的二进制字符串
//2.赫夫曼编码对应的二进制字符串根据赫夫曼编码转成字符
//编写一个方法,对压缩数据解码
/**
*
* @param huffmanCodes 赫夫曼编码表 map
* @param huffmanBytes 需要解码的字节数组
* @return 返回原来字符串对应的数组
*/
private static byte[] decode(Map<Byte,String> huffmanCodes, byte[] huffmanBytes){
//1.先得到huffmanBytes对应的二进制的字符串
StringBuilder stringBuilder = new StringBuilder();
//将byte数组转成二进制字符串
for (int i = 0; i < huffmanBytes.length; i++) {
//判断是否是最后一个字节
boolean flag = (i == huffmanBytes.length - 1);
stringBuilder.append(byteToBitString(!flag,huffmanBytes[i]));
}
//System.out.println("赫夫曼 解码后 对应的二进制字符串:" + stringBuilder.toString());
//把字符串按照指定的赫夫曼编码进行解码
//把赫夫曼编码表的k,v进行调换;因为要反向查询
Map<String,Byte> map = new HashMap<>();
for (Map.Entry<Byte,String> entry : huffmanCodes.entrySet()){
map.put(entry.getValue(),entry.getKey());
}
//System.out.println(map);
//创建一个集合存放byte
List<Byte> list = new ArrayList<>();
for (int i = 0; i < stringBuilder.length();) {
int count = 1; //小的计数器
boolean flag = true;
Byte b = null;
while (flag){
//1010100010111。。。。
String key = stringBuilder.substring(i, i + count);// i 不动让count移动,直到匹配到字符
b = map.get(key);
if (b == null){
count ++;
}else {
flag = false;
}
}
list.add(b);
i += count; //让 i 移动到count
}
//for循环结束后list中存放了所以的字符
//把list放入到byte[] 并返回
byte b [] = new byte[list.size()];
for (int i = 0; i < b.length; i++) {
b[i] = list.get(i);
}
return b;
}
/**
* 将一个byte转成二进制字符串
* @param b 传入一个byte
* @param flag 标志是否需要补高位,true需要补高位,如果是最后一个字节不需要补高位
* @return
*/
private static String byteToBitString(boolean flag, byte b){
//使用变量保存b
int temp = b;//将b转成int
if (flag){
temp |= 256; //按位与256 1 0000 0000 | 0000 0001 =》1 0000 0001
}
String str = Integer.toBinaryString(temp);
if (flag){
return str.substring(str.length() - 8);
}else {
return str;
}
}
//封装前面所写的,便于调用
/**
*
* @param bytes 原始字符串对应的字节数组
* @return 返回的是经过赫夫曼编码处理后的字节数组(压缩后的数组)
*/
private static byte[] huffmanZip(byte [] bytes){
//第一步:创建节点
List<Node1> nodes = getNodes(bytes);
//第二步:创建赫夫曼树
Node1 huffmanTree = createHuffmanTree(nodes);
//第三步:生成对应的赫夫曼编码(根据赫夫曼树)
Map<Byte, String> hufumanCodes = getCondes(huffmanTree);
//第四步:根据赫夫曼编码压缩,生成赫夫曼字节数组
byte[] huffmanBytes = zip(bytes, hufumanCodes);
return huffmanBytes;
}
//编写一个方法,将字符串对应的byte[]数组,通过生成的赫夫曼编码表,返回赫夫曼编码压缩后的byte[]
/**
*
* @param bytes 原始的字符对应的byte[]
* @param huffmanCodes 生成的赫夫曼编码表map
* @return 返回赫夫曼编码处理后的byte[]
* 举例:String content = "i like like like java do you like a java";
* 返回的是字符串"10101000"。。。等等
* =>对应byte[] huffmancodeBytes,即8位对应一个byte,放入到huffmanCodeBytes
* huffmancodeBytes[0] = 10101000(补码) => byte [推导 10101000 => 10101000 -1 => 10100111(反码) => 11011000(原码)]
* huffmancodeBytes[1] = -88
*/
private static byte[] zip(byte [] bytes, Map<Byte,String> huffmanCodes){
//1.利用赫夫曼编码表将传进来的byte数组转成赫夫曼编码字符串
StringBuilder stringBuilder = new StringBuilder();
//遍历bytes数组
for (byte b : bytes){
stringBuilder.append(huffmanCodes.get(b));
}
//按照这个字符串发送肯定是变大了,不行,那么就要将字符串转成byte数组
//System.out.println("赫夫曼 编码后 对应的二进制字符串:" + stringBuilder.toString());
//统计返回的byte[] huffmanCodeBytes 长度
//一句话搞定int len = (stringBuilder.length() + 7) / 8;
int len;
if (stringBuilder.length() % 8 == 0){
len = stringBuilder.length() /8;
}else {
len = stringBuilder.length() /8 + 1;
}
//创建存储压缩后的byte数组
byte [] huffmanCodeBytes = new byte[len];
int index = 0;//记录是第几个byte
for (int i = 0; i < stringBuilder.length(); i += 8){//因为每8位对应一个byte
String strByte;
if (i+8 <= stringBuilder.length()){
strByte = stringBuilder.substring(i,i+8);
}else {
strByte = stringBuilder.substring(i); //-88
}
//将StringByte转成byte数组放入到huffmanCodeBytes
huffmanCodeBytes[index] = (byte) Integer.parseInt(strByte,2);
index++;
}
return huffmanCodeBytes;
}
//生成赫夫曼树对应的赫夫曼编码
//思路:
//1.将赫夫曼编码表存放在Map<Byte,String> 形式
//32->01 97->100 100->11000等等
static Map<Byte,String> huffmanCodes = new HashMap<>();
//2.在生成赫夫曼编码表时需要拼接路径,创建Stringbuilder存储某个叶子节点的路径
static StringBuilder stringBuilder = new StringBuilder();
//为了调用方便重载getCondes
private static Map<Byte,String> getCondes(Node1 root){
if (root == null){
return null;
}
//处理root
getCondes(root,"",stringBuilder);
return huffmanCodes;
}
/**
* 功能:将传入的node节点的所有叶子节点赫夫曼编码得到,并放入到赫夫曼集合中
* @param node 传入节点
* @param code 路径:左子节点是0,右子节点是1
* @param stringBuilder 是用于拼接路径
*/
private static void getCondes(Node1 node,String code, StringBuilder stringBuilder){
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
//将传入的code加入到Stringbuilder2
stringBuilder2.append(code);
if (node != null){
//判断当前node是叶子节点还是非叶子节点
if (node.data == null){ //说明是非叶子节点
//递归处理
//向左递归
getCondes(node.left,"0",stringBuilder2);
//向右递归
getCondes(node.right,"1",stringBuilder2);
}else {//说明是叶子节点
//就表示找到了某个叶子节点最后
huffmanCodes.put(node.data,stringBuilder2.toString());
}
}
}
//前序遍历
public static void preOrder(Node1 root){
if (root != null){
root.preOrder();
}else {
System.out.println("赫夫曼树为空");
}
}
private static List<Node1> getNodes(byte [] bytes){
//1.创建ArrayList
ArrayList<Node1> node1s = new ArrayList<>();
//遍历bytes 统计乜咯 byte出现的次数,存储每个byte出现的次数 -> map
Map<Byte,Integer> counts = new HashMap<>();
for (byte b : bytes){
Integer count = counts.get(b);
if (count == null){ //说明map中还没有这个字符
counts.put(b,1);
}else {
counts.put(b,count+1);
}
}
//把每个键值对转成一个Node对象,并加入到nodes集合
//遍历map
for (Map.Entry<Byte,Integer> entry : counts.entrySet()){
node1s.add(new Node1(entry.getKey(),entry.getValue()));
}
return node1s;
}
//通过List创建赫夫曼树
private static Node1 createHuffmanTree(List<Node1> nodes){
while (nodes.size() > 1){
//排序 从小到大
Collections.sort(nodes);
//取出第一颗、第二颗最小的二叉树
Node1 leftNode = nodes.get(0);
Node1 rightNode = nodes.get(1);
//创建新的二叉树,新的二叉树没有数据,只有权值
Node1 parent = new Node1(null,leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
//将0,1移除List
nodes.remove(leftNode);
nodes.remove(rightNode);
//parent加入List
nodes.add(parent);
}
//nodes最后剩余就是哈弗曼树的根节点
return nodes.get(0);
}
}
class Node1 implements Comparable<Node1>{
Byte data; //存放数据 按照ascii
int weight; //权值,表示字符出现的次数
Node1 left;
Node1 right;
//前序遍历
public void preOrder(){
System.out.println(this);
if (this.left != null){
this.left.preOrder();
}
if (this.right != null){
this.right.preOrder();
}
}
@Override
public int compareTo(Node1 o) {
return this.weight - o.weight;
}
public Node1(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public String toString() {
return "Node1{" +
"data=" + data +
", weight=" + weight +
'}';
}
}