哈夫曼编码
1、基本概念
1)一种可变长的前缀编码,常用于数据压缩
2、实现步骤
1)把字符串转换为字符数组,统计出每个字符出现的次数;
2)把每个字符出现的次数作为该字符的权值,每个字符对应的ascii码作为数据;
3)利用每个字符的权值,构建哈夫曼树;
4)根据哈夫曼树,给每个字符编码,向左的路径编码为 0,向右的路径编码为 1;
// 构建哈夫曼树
package com.datastructure.huffmancode;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class HuffmanCode {
public static void main(String[] args) {
// TODO Auto-generated method stub
String str = "i like like like java do you like a java";
byte[] strBytes = str.getBytes();// 把字符串转化为字符数组
List<Node> nodes = getNodes(strBytes);
// System.out.println(nodes);
Node root = createHuffmanTree(nodes);
// preOrder(root);
Map<Byte, String> huffmanCode = getCodes(root, "", stringBuilder);
byte[] huffmanByte = zip(strBytes, huffmanCodes);
}
// 文件压缩
public static void(String oldPath, String newPath){
FileInputStream is = null;
OutputStream os = null;
ObjectOutputStream oos = null;
try{
is = new FileInputStream(oldPath);
byte[] b = byte[is.available()];
is.read(b);
byte[] huffmanByte = huffmanZip(b);
os = new FileOutputStream(newPath);
oos = new ObjectOutputStream(os);
oos.writeObject(huffmanByte);
oos.writeObject(huffmanCodes);
}catch(Exception e){
System.out.println(e.getMessage());
}finally{
try{
oos.close();
os.close();
is.close();
}catch(Exception e){
System.out.println(e.getMessage());
}
}
// 文件解压
public static void unzipFile(String oldPath, String newPath){
FileInputStream is = null;
ObjectInputStream ois = null;
OutputStream os = null;
try{
is = FileInputStream(oldPath);
ois = ObjectInputStream(is);
byte[] huffmanByte = (byte[])ois.readObject();
Map<Byte, String> huffmanCodes = (Map<Byte, String>)ois.readObject();
os = new FileOutputStream(newPath);
byte[] bytes = decode(huffmanCodes, huffmanByte);
os.write(bytes);
}catch(Exception e){
System.out.println(e.getMessage());
}finally{
try{
os.close();
ois.close();
is.close();
}catch(Exception e){
System.out.println(e.getMessage());
}
}
// 把main()函数对应的内容进行封装
// 即把原始字符数组转换为哈夫曼编码压缩后的字符数组
public static byte[] huffmanZip(byte[] bytes){
List<Node> nodes = getNodes(bytes);
Node root = createHuffmanTree(nodes);
Map<Byte, String> huffmanCode = getCodes(root, "", stringBuilder);
byte[] huffmanByte = zip(bytes, HuffmanCodes);
return huffmanByte;
}
// 解压缩
// 把huffmanByte数组转换为原来的bytes数组
public static byte[] decode(Map<Byte, String> huffmanCodes, byte[] huffmanByte){
for(int i = 0; i < huffmanByte.length; i++){
boolean flag = false;
if(i == huffmanByte.length - 1){
flag = true;
}
String str = byteToBitString(flag, huffmanByte[i])
stringBuilder.append(str);
}
Map<String, Byte> map = new HashMap<>();
for(Map.Entry<Byte, String> entry: huffmanCodes.entrySet()){
map.put(entry.getValue(), entry.getKey());
}
List<Byte> list = new ArrayList<Byte>();
for(int i = 0; i < stringBuilder.length(); ){
int count = 1;
byte b = null;
boolean flag = true;
while(flag){
String str = stringBuilder.substring(i, i + count);
b = map.get(str);
if(b == null){
count++;
}else{
flag = false;
}
}
list.add(b);
i += count;
}
byte[] bytes = new byte[list.size()];
for(int i = 0; i < bytes.length; i++){
bytes[i] = list.get(i);
}
return bytes;
}
// 把统计压缩后的huffmanByte中的字节数字转换为二进制字符串
public static String byteToBitString(boolean flag, byte b){
int tmp = b;
if(!flag){// 如果不是最后一个字符,则补齐为8位
tmp |= 256;
}
String str = Integer.toBinaryString(tmp);
if(flag){
return str;
}else{
return str.substring(str.length() - 8);
}
}
// 以哈夫曼编码实现字符串压缩:
// 把字符数组对应的哈夫曼编码,以8位为一个字符,组成一个新的字符数组
public static byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes){
StringBuilder stringBuilder = new StringBuilder();
// 把哈希表中每个字符对应的哈夫曼编码串接起来
for(byte b: bytes){
stringBulider.append(huffmanCodes.get(b));
}
// 统计新的字符数组中应该存放的个数
int len = 0;
if(stringBuilder.length() % 8 == 0){
len = stringBuilder.length();
}else{
len = stringBuilder.length() + 1;
}
// 以8位为单位形成新的字符数组
byte[] huffmanByte = new byte[len];
int index = 0;
for(int i = 0; i < stringBuilder.length(); i += 8){
String str = null;
if(i + 8 > stringBuilder.length()){
str = stringBuilder.substring(i);
}else{
str = stringBuilder.substring(i, i + 8);
}
huffmanByte[index++] = (byte)Integer.parseInt(str, 2);
}
}
// 创建哈希表存放每个字符的哈夫曼编码
static Map<Byet, String> huffmanCodes = new HashMap<>();
// 创建StringBuilder拼接字符串
static StringBuilder stringBuilder = new StringBulider();
// 构造当前节点 node 的哈夫曼编码
// code指前一个节点 到 当前节点的 “路径编码”
public static void getCodes(Node node, String code, StringBuilder stringBuilder){
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
stringBuilder2.append(code);
if(node != null){
if(node.data == null){
// 非叶子节点
// 左递归
getCodes(node.left, "0", stringBuilder2);
getCodes(node.right, "1", stringBuilder2);
}else{
huffmanCodes(node.data, stringBuilder2.toString());
}
}
}
// 前序遍历
public static void preOrder(Node root) {
if(root != null) {
root.preOrder();
}else {
System.out.println("空树");
}
}
public static Node createHuffmanTree(List<Node> nodes) {
while(nodes.size() > 1) {
Collections.sort(nodes);
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
Node parent = new Node(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
return nodes.get(0);
}
public static List<Node> getNodes(byte[] bytes){
// 统计字符数组中每个字符出现的次数
// 使用哈希表实现
ArrayList<Node> nodes = new ArrayList<Node>();
Map<Byte, Integer> counts = new HashMap<>();
for(byte b: bytes) {
Integer count = counts.get(b);
if(count == null) {
counts.put(b, 1);
}else {
counts.put(b, count + 1);
}
}
// 遍历哈希表
for(Map.Entry<Byte, Integer> entry: counts.entrySet()) {
nodes.add(new Node(entry.getKey(), entry.getValue()));
}
return nodes;
}
}
class Node implements Comparable<Node>{
Byte data;
int weight;
Node left;
Node right;
public Node(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
public int compareTo(Node o) {
return this.weight - o.weight;
}
public String toString() {
return "Node[ data = " + data + ", weight = " + weight + "]";
}
// 前序遍历
public void preOrder() {
System.out.println(this);
if(this.left != null) {
this.left.preOrder();
}
if(this.right != null) {
this.right.preOrder();
}
}
}