22. 赫夫曼编码，以及赫夫曼编码的实现，赫夫曼编码的文件压缩与解压

最新推荐文章于 2024-04-21 07:27:33 发布
乖乖虎学Java
最新推荐文章于 2024-04-21 07:27:33 发布
阅读量515
点赞数
分类专栏：数据结构与算法文章标签： c++ 算法前端
本文链接：https://blog.csdn.net/qq_46049600/article/details/109169515
版权
数据结构与算法专栏收录该内容
28 篇文章 1 订阅
订阅专栏
1. 什么是赫夫曼编码

哈夫曼编码(Huffman Coding)，又称霍夫曼编码，是一种编码方式，哈夫曼编码是可变字长编码(VLC)的一种。Huffman于1952年提出一种编码方法，该方法完全依据字符出现概率来构造异字头的平均长度最短的码字，有时称之为最佳编码，一般就叫做Huffman编码（有时也称为霍夫曼编码）。
在这里插入图片描述
哈夫曼树
在这里插入图片描述
2.ASCII码表

在这里插入图片描述
3.代码实现

package com.qin.huffmanCode;

import java.io.*;
import java.util.*;

//赫夫曼编码
public class HuffmanCode {

    public static void main(String[] args) throws IOException {

        String content = "i like like like java do you like a java";
        //getBytes方法，使用平台的默认字符集将此String编码为字节序列，将结果存储到新的字节数组中
        byte[] contentBytes = content.getBytes();
        System.out.println("字母转化的Ascii为:");
        for (byte contentByte : contentBytes) { //打印字节数组
            //105	32	108	105	107	101	32	108	105	107
            // 101	32	108	105	107	101	32	106	97	118
            // 97	32	100	111	32	121	111	117	32	108
            // 105	107	101	32	97	32	106	97	118	97
            System.out.printf("%d\t",contentByte);
        }
        System.out.println();
        System.out.println("原始字符的长度为"+contentBytes.length);  //40

        List<Node> nodes = getNodes(contentBytes);
        System.out.println("赫夫曼编码表为nodes="+nodes);

        System.out.println("===================");

        Node huffmanTree = createHuffmanTree(nodes);//创建赫夫曼树
        System.out.println("前序遍历");
        preOrder(huffmanTree);

        System.out.println("===================");
        //测试是否生成了对应的哈弗曼编码
        Map<Byte, String> huffmanCodes = getCodes(huffmanTree);
        System.out.println("生成的赫夫曼编码表"+huffmanCodes);

        System.out.println("===================");

        byte[] zip = zip(contentBytes, huffmanCodes);
        System.out.println("赫夫曼编码huffmanCodeBytes="+Arrays.toString(zip)); //17

        //封装后的方法
        System.out.println("===================");
        byte[] bytes = huffmanZip(contentBytes);
        System.out.println("封装后的赫夫曼编码"+Arrays.toString(bytes));

        System.out.println("===================");
        byte[] sourceBytes = decode(huffmanCodes, zip);
        System.out.println("原来的字节码为="+Arrays.toString(sourceBytes));
        //String(sourceBytes) 将byte[]数组转化为字符串
        System.out.println("原来的字符串为="+new String(sourceBytes));


        /*
        System.out.println("===================");
        System.out.println("测试压缩文件");
        String srcFile = "C:\\Users\\Administrator\\Pictures\\Saved Pictures\\1.jpg";
        String dstFile = "G:\\dst.zip";
        zipFile(srcFile,dstFile);
        System.out.println("压缩文件成功");






        System.out.println("===================");
        System.out.println("测试解压文件");
        String zipFile = "G:\\dst.zip";
        String desFile = "G:\\qxd.png";
        unZipFile(zipFile,desFile);
        System.out.println("解压成功");

         */










    }

    //分析传过来的字节数组,返回一个List
    private static List<Node> getNodes(byte[] bytes){
        //1. 创建一个ArrayList
        ArrayList<Node> nodes = new ArrayList<>();
        //2. 统计每个byte出现的次数->并传递给map
        Map<Byte,Integer> counts = new HashMap<>();
        for (byte aByte : bytes) {
            Integer count = counts.get(aByte);
            if (count == null){ //说明map还没有这个数据
                counts.put(aByte,1);
            }else {
                counts.put(aByte,count+1);
            }
        }
        //把每个键值对转成一个Node对象，并加入到nodes集合
        //遍历map
        for (Map.Entry<Byte,Integer> entry:counts.entrySet()){
            nodes.add(new Node(entry.getKey(),entry.getValue()));
        }
        return nodes;
    }

    //通过List创建对应的赫夫曼树
    private static Node createHuffmanTree(List<Node> nodes){

        while (nodes.size()>1){
            //排序
            Collections.sort(nodes); //从下到大排序
            //取出第一颗最小的二叉树
            Node nodeLeft = nodes.get(0);
            //取出第二颗次小的二叉树
            Node nodeRight = nodes.get(1);

            //创建一颗新的二叉树，他的跟节点是没有data的，只有权值
            Node parent = new Node(null,nodeLeft.weight+nodeRight.weight);
            parent.left = nodeLeft;
            parent.right = nodeRight;

            //将两颗已经处理完毕的二叉树从nodes删除
            nodes.remove(nodeLeft);
            nodes.remove(nodeRight);

            //添加父节点到数组中
            nodes.add(parent);

        }

        //nodes最后的节点，就是赫夫曼树的根节点
        return nodes.get(0);

    }

    //前序遍历
    private static void preOrder(Node root){
        if (root!=null){
            root.preOrder();
        }else {
            System.out.println("哈夫曼树为空，无法遍历");
        }
    }


    //=====================传递一个byte，创建一个哈夫曼树，下一步就是创建赫夫曼编码表==================================




    //生成赫夫曼树对应的赫夫曼编码
    //思路分析
    //1.将赫夫曼编码表存放在Map中<Byte,String>形式
    static Map<Byte,String> huffmanCodes = new HashMap<Byte, String>();
    //32 -> 01  97 -> 100等等
    //2.在生成赫夫曼编码表时，需要去拼接路径，定义一个StringBuilder 存储某个叶子节点的路径
    //StringBuilder.append()方法，可以把字符串加到最后面
    //如果z引用其当前内容为“ start ”的字符串构建器对象，则方法调用z.append("le")将导致字符串构建器包含“ startle ”
    static StringBuilder stringBuilder = new StringBuilder();

    // 将传入的node节点的所有叶子节点的赫夫曼编码得到，并放入到huffmanCodes集合
    // node传入的节点， code路径 默认左子节点0，右子节点1
    // stringBuilder 拼接路径
    //这个方法的作用是获得赫夫曼编码表，并把它存储在Map中
    private  static void getCodes(Node node,String code,StringBuilder stringBuilder){
        StringBuilder stringBuilder1 = new StringBuilder(stringBuilder);
        //将code加入到 stringBuilder1
        stringBuilder1.append(code);
        if (node!=null){ //如果node == null 不处理
            //判断当前node是叶子节点还是非叶子节点
            if (node.data==null){ //说明是一个非叶子节点
                //递归处理
                //向左递归
                getCodes(node.left,"0",stringBuilder1);
                getCodes(node.right,"1",stringBuilder1);
            }else {  //说明是叶子结点
                //表示找到某个也叶子结点，然后将叶子节点的data变为Map的key，他的路径变为Map的value，并保存在Map中
                huffmanCodes.put(node.data,stringBuilder1.toString());
            }
        }
    }

    //为了方便调用我们重载getCodes
    private static Map<Byte,String> getCodes(Node root){
        if (root==null){
            return null;
        }
        //处理root的左子树
        getCodes(root.left,"0",stringBuilder);
        //处理右子树
        getCodes(root.right,"1",stringBuilder);
        return huffmanCodes;
    }

    //===========================上面就是创建了一个赫夫曼编码表，下一步就是把输入进来的byte变成赫夫曼编码=============================




    // 编写一个方法，将字符串对应的Byte[]数组，通过生成的赫夫曼编码，返回一个赫夫曼编码压缩后的byte[]
    // bytes 这是原始的字符转对应的byte[] huffmanCodes生成的赫夫曼编码map
    // 返回赫夫曼编码处理过后的byte[]
    private static byte[] zip(byte[] bytes,Map<Byte,String> huffmanCodes){
        //1.利用huffmanCodes将bytes转换成赫夫曼编码对应的字符串
        StringBuilder stringBuilder = new StringBuilder();
        //遍历bytes数组
        for (byte aByte : bytes) {
            //huffmanCodes里面存放了 key->value的编码表，我们通过key就可以找到value
            stringBuilder.append(huffmanCodes.get(aByte));
        }
        System.out.println("测试stringBuilder="+stringBuilder.toString());

        //将"10101000...." 转成byte[]
        //统计返回byte[] huffmanCodeBytes长度
        //这里的意思是将二进制转换为10进制需要多大的空间
        int len;
        if (stringBuilder.length()%8==0){
            len = stringBuilder.length() / 8;
        }else {
            len = stringBuilder.length() / 8 + 1;
        }

        //创建一个存储压缩后的byte数组
        //这里就是把二进制转换为十进制的地方
        byte[] huffmanCodeBytes = new byte[len];
        int index = 0 ;//记录是第几个byte
        for (int i = 0; i < stringBuilder.length(); i+=8) { //因为是每8位对应一个byte
            String strByte;
            if (i+8>stringBuilder.length()){ //到了最后，如果不够8位就截取剩下的位数即可
                strByte = stringBuilder.substring(i);
            }else {
                strByte = stringBuilder.substring(i,i+8);
            }
            //将strByte 转换成byte放入到huffmanCodeBytes
            //Integer.parseInt(strByte,2)这句代码的意思是strByte里面的字符串都是二进制的，我们将它解析为十进制，通过二进制进行计算
            //而且这个是带符号的
            huffmanCodeBytes[index] = (byte) Integer.parseInt(strByte,2);
            index++;
        }
        return huffmanCodeBytes;

    }



    //============================一直到这里我们通过哈夫曼编码表创建的哈夫曼编码已经创建完成======================================


    //使用一个方法，将前面的方法封装起来，便于我们调用
    //bytes 原始的字符转对应的字节数组
    //byte 是经过赫夫曼编码处理过后的字节数组
    private static byte[] huffmanZip(byte[] bytes){
        List<Node> nodes = getNodes(bytes);
        Node huffmanTree = createHuffmanTree(nodes);
        Map<Byte, String> codes = getCodes(huffmanTree);
        byte[] zip = zip(bytes, codes);
        return zip;
    }




    //完成数据的解压
    //1. 将[-88, -65, -56, -65, -56, -65, -55, 77, -57, 6, -24, -14, -117, -4, -60, -90, 28]
    // 重新转成赫夫曼编码对应的二进制字符串 "1010100010111..."
    //2. 赫夫曼编码对应的二进制字符串 "1010100010111.."=> 对照赫夫曼编码转换成
    // i like like like java do you like a java
    // flag 标识是否需要补高位，如果是true，表示需要补高位，如果是false表示不补,如果是最后一个字节无需补高位
    // 返回值的对应二进制字符串（注意是按补码返回的）
    private static String byteToBitString(boolean flag,byte b){
        //讲一个byte转换成一个二进制的字符串
        //使用变量保存b
        int temp = b; //将b 转成int
        //如果是正数我们还存在一个补高位的问题
        if (flag){
            temp |= 256; //按位或 1 0000 0000 |  0000 0000 1
        }
        String  str = Integer.toBinaryString(temp); //返回的是temp对应的二进制的补码

        if (flag){
            return str.substring(str.length()-8);
        }else {
            return str;
        }


    }

    //编写一个方法完成对压缩数据的解码
    //Map<Byte,String> huffmanCodes 赫夫曼编码表
    //huffmanBytes 赫夫曼编码得到的字节数组
    //返回的就是原来字符串对应的数组
    private static byte[] decode(Map<Byte,String> huffmanCodes,byte[] huffmanBytes){
        //1. 先得到huffmanBytes对应的二进制字符串，形式10101000...
        StringBuilder stringBuilder = new StringBuilder();
        //将byte数组转换为二进制字符串
        for (int i = 0; i < huffmanBytes.length; i++) {
            byte b = huffmanBytes[i];
            //判断是不是最后一个字节
            boolean flag = (i==huffmanBytes.length-1);
            stringBuilder.append(byteToBitString(!flag,b));
            
        }
        //把字符串按照指定的赫夫曼编码进行解码
        //把赫夫曼编码表进行调换，因为反向查询
        Map<String,Byte> map = new HashMap<String, Byte>();
        for (Map.Entry<Byte,String> entry:huffmanCodes.entrySet()){
            map.put(entry.getValue(),entry.getKey());
        }

        //创建一个集合，存放byte
        List<Byte> list = new ArrayList<>();
        // i 可以理解为一个索引 ，扫描stringBuilder
        for (int i = 0;i<stringBuilder.length();){
            int count = 1; //小的计数器
            boolean flag = true;
            Byte b = null;
            while (flag){
                //取出一个字符 '1' '0'
                String key = stringBuilder.substring(i,i+count); //i 不动，让count移动，指定匹配到一个字符
                b = map.get(key);
                if (b == null){ //说明没有匹配到
                    count++;
                }else {
                    //匹配到
                    flag = false;
                }

            }
            list.add(b);
            i+=count; //i 直接移动到count

        }
        //当for循环结束之后，我们list中存放了所有的字符
        //把list中的数据放入到byte[] 并返回
        byte[] b = new byte[list.size()];
        for (int i = 0; i < b.length; i++) {
            b[i] = list.get(i);
        }
        return b;

    }



    //编写一个方法，讲一个文件进行压缩
    // srcFile 你传入的希望压缩的文件的全路径
    //desFile 我们压缩后的文件放在那个目录
    public static void zipFile(String srcFile,String desFile) throws IOException {
        //创建输出流
        FileInputStream is = null;
        OutputStream os = null;
        ObjectOutputStream oos = null;
        try {
            //创建一个文件的输入流
            is = new FileInputStream(srcFile);
            //创建一个和源文件大小一样的byte[]
            byte[] b = new byte[is.available()];
            //读取文件
            is.read(b);
            //获取到文件对应的赫夫曼编码
            //直接对源文件压缩
            byte[] huffmanBytes = huffmanZip(b);
            //创建一个文件的输出流，存放压缩文件
            os = new FileOutputStream(desFile);
            //创建一个和文件输出流相关联的ObjectOutputStream
            oos = new ObjectOutputStream(os);
            //把赫夫曼编码后的字节数组写入压缩文件
            oos.writeObject(huffmanBytes);
            //这里我们以对象流的方式写入赫夫曼编码，是为了以后解压的时候恢复源文件时使用
            //注意一定要把赫夫曼编码写入压缩文件
            oos.writeObject(huffmanCodes);

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            is.close();
            oos.close();
            os.close();
        }
    }


    //编写一个方法，完成对压缩文件的解压
    //zipFile 准备解压的文件
    //desFile 解压到哪里
    public static void unZipFile(String zipFile,String desFile) throws IOException {
        //定义文件的输入流
        InputStream is = null;
        //定义一个对象输入流
        ObjectInputStream ois = null;
        //定义文件的输出流
        OutputStream os = null;
        try {
            //创建文件输入流
            is = new FileInputStream(zipFile);
            //创建一个和is关联的对象输入流
            ois = new ObjectInputStream(is);
            //读取byte数组
            byte[] huffmanBytes  = (byte[]) ois.readObject();
            //读取赫夫曼编码表
            Map<Byte,String> huffmanCodes = (Map<Byte, String>) ois.readObject();
            //解码
            byte[] bytes = decode(huffmanCodes, huffmanBytes);
            //将decode写入到目标文件
            os = new FileOutputStream(desFile);
            //写出数据到文件中
            os.write(bytes);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            os.close();
            ois.close();
            is.close();
        }


    }

}


//创建Node，带数据和权值
//Comparable<Node> 有了它我们就可以直接对node进行排序
class Node implements Comparable<Node>{

    Byte data; // 存放数据本身，比如'a'=> 97
    int weight; //权值，表示字符出现的次数
    Node left;
    Node right;

    //Node的有参构造方法
    public Node(Byte data,int weight){
        super();
        this.data = data;
        this.weight = weight;
    }

    @Override
    public int compareTo(Node o) {
        //这句话的意思表示从小到大排序，-(this.weight-o.weight)表示从大到小
        return this.weight-o.weight;
    }

    @Override
    public String toString() {
        return "Node{" +
                "data=" + data +
                ", weight=" + weight +
                '}';
    }

    //前序遍历  中左右
    public void preOrder(){
        System.out.println(this);
        if (this.left!=null){
            this.left.preOrder();
        }
        if (this.right!=null){
            this.right.preOrder();
        }
    }


}