package com.gxmedu.huffman_code;
import java.io.*;
import java.util.*;
/**
* @author 郭学明
* @version 1.0
*/
public class HuffmanCode {
public static void main(String[] args) {
HuffmanCode huffmanCode1 = new HuffmanCode();
String s1= "i like like like java do you like a java";
// s1 = "hello,world";
// List<Node> nodes = huffmanCode1.getNodes(s1.getBytes());
// Node huffmanTree = huffmanCode1.createHuffmanTree(nodes);
// huffmanCode1.getHuffmanCode(huffmanTree,"",huffmanCode1.huffmanCodeBuilder);
// System.out.println(huffmanCode1.huffmanCode);
byte[] result = huffmanCode1.zip(s1.getBytes());
System.out.println(Arrays.toString(result));
byte[] decompression = huffmanCode1.decompression(result,huffmanCode1.huffmanCode);
System.out.println(new String(decompression));
// String compressionSrcPath = "D:\\chapter19_case\\WorldWarII.png";
// String compressionDestPath = "D:\\chapter19_case\\WorldWarII.gxm";
// String decompressionDestPath = "D:\\chapter19_case\\WorldWarII2.png";
// huffmanCode1.compressionFile(compressionSrcPath,compressionDestPath);
// huffmanCode1.decompressionFile(compressionDestPath,decompressionDestPath);
}
public class Node implements Comparable<Node>{
private Byte data;
private int weigh;
private Node left;
private Node right;
public Node(Byte data, int weigh) {
this.data = data;
this.weigh = weigh;
}
public Byte getData() {
return data;
}
public void setData(byte data) {
this.data = data;
}
public int getWeigh() {
return weigh;
}
public void setWeigh(int weigh) {
this.weigh = weigh;
}
public Node getLeft() {
return left;
}
public void setLeft(Node left) {
this.left = left;
}
public Node getRight() {
return right;
}
public void setRight(Node right) {
this.right = right;
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", weigh=" + weigh +
'}';
}
@Override
public int compareTo(Node o) {
return this.weigh - o.weigh;
}
}
/**
* 1.将字节数组的元素进行次数统计,这个次数就是之后huffman tree结点的weigh,
* 目标是得到一个ArrayList<Node>,这里我们用hashmap,key不能重复的特性,来统计出现次数。
*/
public List<Node> getNodes(byte[] bytes){
HashMap<Byte, Integer> dataCount = new HashMap<>();
Set<Map.Entry<Byte, Integer>> entries = dataCount.entrySet();
for (byte b : bytes) {
Integer count = dataCount.get(b);
if(count == null){
dataCount.put(b,1);
}else{
dataCount.put(b,count + 1);
}
}
ArrayList<Node> nodes = new ArrayList<>();
for (Map.Entry<Byte, Integer> entry : entries) {
Node node = new Node(entry.getKey(), entry.getValue());
nodes.add(node);
}
return nodes;
}
/**
* 2.生成huffman tree
*/
public Node createHuffmanTree(List<Node> nodes){
while(nodes.size() > 1){
Collections.sort(nodes);
Node left = nodes.get(0);
Node right = nodes.get(1);
Node parent = new Node(null, left.weigh + right.weigh);
parent.setLeft(left);
parent.setRight(right);
nodes.remove(left);
nodes.remove(right);
nodes.add(parent);
}
return nodes.get(0);
}
/**
* 3.生成huffman code,这里也用hashmap 存储
*/
private StringBuilder huffmanCodeBuilder = new StringBuilder();
private HashMap<Byte,String> huffmanCode = new HashMap<>();
public void getHuffmanCode(Node node,String code,StringBuilder sb){
StringBuilder stringBuilder2 = new StringBuilder(sb);
stringBuilder2.append(code);
// HashMap<Byte, String> huffmanCode = new HashMap<>();
if(node != null){
if(node.data == null){
getHuffmanCode(node.left,"0",stringBuilder2);
getHuffmanCode(node.right,"1",stringBuilder2);
}else{
// stringBuilder2.append(code);
huffmanCode.put(node.data,stringBuilder2.toString());
}
}
}
/**
* 4.根据HuffmanCode将传入的字节数组进行压缩转换
*/
public byte[] huffmanCodeBytes(byte[] bytes,HashMap<Byte,String> huffmanCode){
StringBuilder stringBuilder = new StringBuilder();
for (byte b :bytes) {
String s = huffmanCode.get(b);
stringBuilder.append(s);
}
int length = stringBuilder.length();
// 这里给数组多加最后一位,用来记录最后一个字符串的长度
// 比如说最后一个字符串是"0001"或者"001",但是在byte[]中存的都是1,
// 这样就会导致huffmanCode解码错误。
byte[] huffmanCodeBytes = new byte[(length + 7)/8 + 1];
// String s = stringBuilder.toString();
int zipIndex = 0;
int lastStringLength = 0;
for (int i = 0; i < length ; i += 8) {
String s = "";
if(i + 7 < length){
s = stringBuilder.substring(i, i + 8);
}else{
s = stringBuilder.substring(i);
// lastStringLength = s.length();
}
lastStringLength = s.length();
huffmanCodeBytes[zipIndex++] = (byte)Integer.parseInt(s,2);
}
huffmanCodeBytes[huffmanCodeBytes.length - 1] = (byte)lastStringLength;
return huffmanCodeBytes;
}
/**
* 5.对上述方法进行封装
*/
public byte[] zip(byte[] bytes){
List<Node> nodes = getNodes(bytes);
Node huffmanTree = createHuffmanTree(nodes);
getHuffmanCode(huffmanTree,"",huffmanCodeBuilder);
byte[] zip = huffmanCodeBytes(bytes, huffmanCode);
return zip;
}
/**
* 1.解码 得到二进制的huffmanCode
*/
public String getBinaryHuffmanCode(byte[] bytes){
int length = bytes.length;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < length - 1; i++) {
int b = bytes[i];// 注意这里要int类型,因为下面要进行按位或操作
String s = Integer.toBinaryString(b | 256);
// 256 按位或 如果是正数 则倒数八位省略的零都能补上。
// 如果是负数 则倒数八位不会改变
if(i != length - 2){
String s2 = s.substring(s.length() - 8);
sb.append(s2);
}else{
// 根据bytes[]最后一个记录最后一个字符串的长度来截取转换后的字符串。
sb.append(s.substring(s.length() - bytes[length - 1]));
}
}
return sb.toString();
}
/**
* 2.根据huffmanCode还原原文
*/
public byte[] decode(HashMap<Byte,String> huffmanCode, String huffmanString){
HashMap<String, Byte> hm = new HashMap<>();
Set<Map.Entry<Byte, String>> entries = huffmanCode.entrySet();
Iterator<Map.Entry<Byte, String>> iterator = entries.iterator();
while (iterator.hasNext()) {
Map.Entry<Byte, String> next = iterator.next();
Byte key = next.getKey();
String value = next.getValue();
hm.put(value,key);
}
// 用来存储转换后的字节
ArrayList<Byte> bytes = new ArrayList<>();
int length = huffmanString.length();
int end = 1;
for (int i = 0; i < length; ) {
String substring = huffmanString.substring(i, end);
// if(hm.get(substring) == null){
// count++;
// }else{
// bytes.add(hm.get(substring));
// i = count;
// }
if(hm.get(substring) != null){
bytes.add(hm.get(substring));
i = end;
}
end++;
}
byte[] result = new byte[bytes.size()];
for (int i = 0; i < result.length; i++) {
result[i] = bytes.get(i);
}
return result;
}
/**
* 3. 对上面两个方法进行封装
*/
public byte[] decompression(byte[] bytes,HashMap<Byte,String> huffmanCode){
String huffmanString = getBinaryHuffmanCode(bytes);
byte[] decode = decode(huffmanCode, huffmanString);
return decode;
}
/**
* 对文件进行压缩
*/
public void compressionFile(String srcPath,String destPath){
FileInputStream fis = null;
byte[] bytes;
ObjectOutputStream oos = null;
FileOutputStream fos = null;
try {
fis = new FileInputStream(srcPath);
int available = fis.available();
bytes = new byte[available];
// int content;
fis.read(bytes);
// for (int i = 0; (content = fis.read()) != -1; i++) {
// bytes[i] = (byte)content;
// }
/**
* 下面进行huffman编码
*/
byte[] zip = zip(bytes);
fos = new FileOutputStream(destPath);
oos = new ObjectOutputStream(fos);
oos.writeObject(zip);
oos.writeObject(huffmanCode);
// while((content = fis.read()) != -1){
// bytes.[]
// }
} catch (IOException e) {
throw new RuntimeException(e);
}
finally{
try {
if(oos != null){
oos.close();
}
if(fos != null){
fos.close();
}
if(fis != null){
fis.close();
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
/**
* 对文件进行解压
*/
public void decompressionFile(String srcPath,String destPath){
ObjectInputStream ois = null;
FileOutputStream fos = null;
FileInputStream fis = null;
try {
fis = new FileInputStream(srcPath);
ois = new ObjectInputStream(fis);
byte[] bytes = (byte[])ois.readObject();
HashMap<Byte,String> hm = (HashMap<Byte,String>)ois.readObject();
byte[] decompression = decompression(bytes, hm);
fos = new FileOutputStream(destPath);
fos.write(decompression);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
} finally{
try{
if(fos != null){
fos.close();
}
if(ois != null){
ois.close();
}
if(fis != null){
fis.close();
}
}catch(IOException e){
throw new RuntimeException(e);
}
}
}
}
java huffman code 解压缩文件
最新推荐文章于 2024-10-06 20:16:20 发布