关于赫夫曼编码压缩数据和解压的具体实现

最新推荐文章于 2021-07-22 11:53:02 发布

oldhunter686

最新推荐文章于 2021-07-22 11:53:02 发布

阅读量159

点赞数

分类专栏：数据结构与算法文章标签： java 数据结构补码字符串

本文链接：https://blog.csdn.net/oldhunter686/article/details/105154660

版权

数据结构与算法专栏收录该内容

13 篇文章 1 订阅

订阅专栏

手打不易，有用就点个赞，哈哈。
你们可以增加IO类，用来压缩图片和文件玩一下。
参考视频https://www.bilibili.com/video/BV1E4411H73v?p=128
视频里面老师的代码有些bug，我改正过来了。

package 算法.赫夫曼编码的实现;
//数据压缩的实现

import java.util.*;

public class HuffmanCode {
    public static void main(String[] args) {
        String content = "java是世界上最好的语言！";
        byte[] contentBytes = content.getBytes();
        byte[] huffmanCodeBytes = huffmanZip(contentBytes);
        System.out.println(Arrays.toString(huffmanCodeBytes));
        //解压获取到原来的字节数组
        byte [] decodeByte= decode(huffmanCodes,huffmanCodeBytes);

        System.out.println(new String(decodeByte));

    }

    //接收一个字符数组，返回一个建立好的list<Node>
    public static List<Node> getNodes(byte[] bytes) {
        //创建一个arrayList
        List<Node> list = new ArrayList<>();
        //创建一个hashMap存放字符和字符出现的次数 key是字符 value是出现的次数
        Map<Byte, Integer> counts = new HashMap<>();
        for (byte b : bytes) {
            Integer count = counts.get(b);
            if (count == null) {
                counts.put(b, 1);
            } else {
                counts.put(b, count + 1);
            }
        }
        //把map中的键值对转成node 加入list
        for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {
            list.add(new Node(entry.getKey(), entry.getValue()));
        }
        return list;
    }

    //参数 需要创建赫夫曼树的list，返回赫夫曼树的根节点
    public static Node createHuffmanTree(List<Node> nodes) {

        //把list排序
        Collections.sort(nodes);
        //循环处理
        //最后list中只剩下赫夫曼树的头节点
        while (nodes.size() > 1) {

            //取出节点数权最少的两个二叉树
            Node leftNode = nodes.get(0);
            Node rightNode = nodes.get(1);
            //构建一颗新的二叉树，根节点权值是两颗二叉树的权值之和,没有Byte域
            Node parent = new Node(null, leftNode.weight + rightNode.weight);
            parent.left = leftNode;
            parent.right = rightNode;
            //从Array中剔除处理过的两颗二叉树
            nodes.remove(leftNode);
            nodes.remove(rightNode);
            nodes.add(parent);
            Collections.sort(nodes);
        }
        //最终只用返回赫夫曼树的头节点，就成功创建一颗赫夫曼树了
        return nodes.get(0);
    }

    //编写一个调用结点前序遍历的方法
    public static void preOrder(Node root) {
        if (root == null) {
            System.out.println("赫夫曼树为空");
        } else {
            root.preorder();
        }
    }

    //生成赫夫曼编码表，存放在map<Byte,String>
    static Map<Byte, String> huffmanCodes = new HashMap<>();
    //在生成赫夫曼编码时，需要用字符串拼接
    static StringBuilder stringBuilder = new StringBuilder();

    /*
     *  功能，将传入的node结点的所有叶子结点的赫夫曼编码得到，放入map
     *  node 传入结点
     *  code 路径 左子节点是0 右子节点是1
     *  StringBuilder 字符串拼接
     * */
    private static void getCodes(Node node, String code, StringBuilder stringBuilder) {
        //把形式参数中的内容传给新的stringBuilder
        StringBuilder stringBuilder1 = new StringBuilder(stringBuilder);
        stringBuilder1.append(code);
        if (node != null) {//node等于null 不处理
            //判断当前结点是叶子结点还是非叶子结点
            if (node.data == null) {
                //非叶子结点向左递归
                getCodes(node.left, "0", stringBuilder1);
                //向右递归
                getCodes(node.right, "1", stringBuilder1);
            } else {//如果是叶子结点
                huffmanCodes.put(node.data, stringBuilder1.toString());
            }
        }
    }

    //为了调用方便重载getCode,只需要传入赫夫曼树的根节点
    private static Map<Byte, String> getCodes(Node root) {
        if (root == null) {
            return null;
        }
        getCodes(root, "", stringBuilder);
        return huffmanCodes;
    }

    //将字符串对应的byte[] 通过赫夫曼编码表压缩
    //返回的是赫夫曼编码处理后的byte
    //也就是先转成对应的赫夫曼编码的字符串，再8位数一组转换成的byte，
    //并且由于计算机用二进制补码存储数据，输出的时候我们可以把得到的byte转换成我们的十进制数
    public static int lastStrLen=0;//定义一个全局变量，保存最后字节数组最后一个元素转成字符串的位数
    private static byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes) {
        //利用stringBuilder将bytes转成赫夫曼编码对应的字符串
        StringBuilder stringBuilder = new StringBuilder();
        for (byte b : bytes) {
            stringBuilder.append(huffmanCodes.get(b));
        }
        System.out.println("测试"+stringBuilder);
        //将"1010100001111"对应的字符串8位一组转换成byte[]
        //len 是字节数
        int len;
        if (stringBuilder.length() % 8 == 0) {
            len = stringBuilder.length() / 8;
        } else {
            len = stringBuilder.length() / 8 + 1;
        }
        //存储赫夫曼编码压缩后的byte数组
        byte[] HuffmanCodeBytes = new byte[len];
        int index = 0;
        for (int i = 0; i < stringBuilder.length(); i += 8) {
            String str;
            if (i + 8 > stringBuilder.length()) {
                //如果剩下位数不够8位
                str = stringBuilder.substring(i);//把剩下的位数全部加入str
                lastStrLen=str.length();
            } else {
                str = stringBuilder.substring(i, i + 8);
                //将str转成byte，放入resByte
            }
               HuffmanCodeBytes[index] = (byte)Integer.parseInt(str,2);
                 //使用下面的方法，当8位数的最高位是1时，会发生越界，原因是parseByte会把传入的字符串转换成int,转化后
                //由于int有32位，也就是最高位不为0，导致转换后的数字超过128
                //HuffmanCodeBytes[index] = Byte.parseByte(str,2);
                index++;


        }
        return HuffmanCodeBytes;
    }
    //传入原字节数组，返回压缩后的数组
    //使用一个方法将前面的方法封装起来，便于调用
    private static byte[] huffmanZip(byte[] bytes){
         //把原字节数组创建成一个list<Node> node中有字符和其出现的次数
        List<Node> nodes=getNodes(bytes);
        //通过list创建赫夫曼树,返回赫夫曼树的根节点
        Node huffmanTreeRoot=createHuffmanTree(nodes);
        //根据赫夫曼树生成赫夫曼编码
        Map<Byte,String> huffmanCodes = getCodes(huffmanTreeRoot);
        //根据生成的赫夫曼编码得到压缩后的赫夫曼编码数组
        byte[] huffmanCodeBytes =zip(bytes,huffmanCodes);
        return huffmanCodeBytes;
    }

    /*将一个byte 转换成一个二进制的字符串
    *   b传入的byte
    *   flag 是否是字节数组最后一个字节，如果是，由于最后一位字节不一定是8位数，需要特殊处理
    *   返回 b对应的二进制字符串 补码；
    *
    * */
    private static String byteToString(boolean flag,byte b){
        //将b转成成int，byte转换成二进制字符串的方法
        int temp = b;
        //如果要补位，因为int小于8个字节的数转换成二进制，不会显示高位
        //比如 int a=1; Integer.toBinaryString(a) =1 ;低8位负数和正数和1 0000 0000按位或然后截取低8位的话都不会改变值
        //这样就解决了正数补码只显示低位，和负数补码32位都显示的问题
        //我们需要把1与1 0000 0000（256）按位或，补齐8位
        if (flag){
            temp|=256;// 0000 0001 |1 0000 0000 = 1 0000 0001(257)
        }else {
            temp|= (1<<lastStrLen);
        }
        String str = Integer.toBinaryString(temp);//把 1 0000 0001 对应的int 转换成二进制补码
        if (flag){
            return  str.substring(str.length()-8);//截取最后8位
        }else {//如果是最后一位

            return str.substring(str.length()-lastStrLen);
        }
    }

    //编写解码的方法
    //传入赫夫曼编码表和经过压缩的字节数组
    private static byte[] decode(Map<Byte,String>huffmanCodes,byte [] huffmanBytes){
       StringBuffer stringBuffer = new StringBuffer();
       //将字节数组转换成二进制字符串
        for (int i = 0; i <huffmanBytes.length ; i++) {
            byte b = huffmanBytes[i];
            //判断是不是最后一个值
            boolean flag=(i==huffmanBytes.length-1);
            stringBuilder.append(byteToString(!flag,b));
        }
        System.out.println("测试"+stringBuilder);
        //把字符串按照指定的赫夫曼编码解码
        //把赫夫曼编码表进行调换。反向查询,接入到map
        Map<String,Byte> map = new HashMap<>();
        for (Map.Entry<Byte,String> entry:huffmanCodes.entrySet()) {
            map.put(entry.getValue(),entry.getKey());
        }

        //创建一个集合存放byte
        List<Byte> list = new ArrayList<>();
        for (int i = 0; i <stringBuilder.length() ; ) {
            int count =1;
            boolean flag =true;
            Byte b = null;
            while (flag){
                String key = stringBuilder.substring(i,i+count);
                b = map.get(key);
                if (b==null){
                    count++;
                }else {
                    //匹配成功
                    flag=false;
                }
            }
            list.add(b);
            i +=count;
        }
        //当for循环结束，我们已经获得了原来数据的byte形式，我们将其放入byte[]返回
        byte[] bytes=new byte[list.size()];
        for (int i = 0; i <bytes.length ; i++) {
            bytes[i]=list.get(i);
        }
        return bytes;
    }



}

//结点类
//为了让Node支持Collection排序，应该实现Comparable接口
class Node implements Comparable<Node> {
    //byte的包装类.是字节数据类型

    Byte data;//存放数据本身的AScii编码，'a'->97
    int weight;//权值，字符出现的次数
    Node left;
    Node right;

    public Node(Byte data, int weight) {
        this.weight = weight;
        this.data = data;
    }

    @Override
    public String toString() {
        return "Node{" +
                "data=" + data +
                ", weight=" + weight +
                '}';
    }

    @Override
    public int compareTo(Node o) {
        //从小到大进行排序
        return this.weight - o.weight;
    }

    //写一个前序遍历,用于测试
    public void preorder() {
        System.out.println(this);
        if (this.left != null) {
            this.left.preorder();
        }
        if (this.right != null) {
            this.right.preorder();
        }
    }
}

oldhunter686

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
关于赫夫曼编码压缩数据和解压的具体实现

手打不易，有用就点个赞，哈哈。你们可以增加IO类，用来压缩图片和文件玩一下。参考视频https://www.bilibili.com/video/BV1E4411H73v?p=128视频里面老师的代码有些bug，我改正过来了。package 算法.赫夫曼编码的实现;//数据压缩的实现import java.util.*;public class HuffmanCode { ...
复制链接

扫一扫