java学习-哈夫曼编码

邡沀

已于 2024-01-18 19:43:07 修改

阅读量417

点赞数 12

分类专栏： java 文章标签： java 学习 windows

于 2024-01-18 19:42:11 首次发布

本文链接：https://blog.csdn.net/tfx19931128/article/details/135682004

版权

java 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

本文详细介绍了哈夫曼编码的工作原理，包括如何通过统计元素概率构建哈夫曼树，以及使用Java实现从数据到哈夫曼编码和解码的过程。通过对比，展示了哈夫曼编码在数据压缩方面的优势。

摘要由CSDN通过智能技术生成

哈夫曼编码

预先统计出一组数据中所有的元素的出现概率。再通过构造树生成各元素表达式
如：一组数据中包含：a,b,c,d,e,f （5种元素）。各元素的出现概率为：
a：45%
b：13%
c：12%
d：16%
e：9%
f：5%
则可生成构造树：树左为0，右为1
5中元素至少需要3bit来表示（即：2^2 > 2^ 3 = 8 >5）,数据总和为 n*（3）= 3n;
采用哈夫曼编码后，元素可做以下表示(生成表达式)：
a = 0;
b = 101;
c = 100;
d = 111;
e = 1101;
f = 1100;
数据总和为 n * （45%*1+13%*3+12%*3+16%*3+9%*4+5%*4） = 2.24n
对比与正常编码。压缩了25%；

实现过程

java的实现代码：
	1.构造一个便于计算的树Mod

public static class Bc{
		//当前元素
        public Byte bt;
        //元素统计数量
        public long ct;
        //左子树
        public Bc left;
        //右子树
        public Bc right;
        
        public Bc(Byte bt, int ct) {
            this.bt = bt;
            this.ct = ct;
        }
    }

2.统计所有元素

public static List<Bc> getBcs(byte[] bytes){
        ArrayList<Bc> list = new ArrayList<>();
        if (bytes == null || bytes.length == 0){
            return list;
        }
        for (byte b : bytes) {
            Bc v = null;
            for (Bc bc : list) {
                if (bc.bt == b){
                    bc.ct++;
                    v = bc;
                    break;
                }
            }
            if (v == null){
                v = new Bc(b,1);
                list.add(v);
            }
        }
        return sort(list);
    }

3.通用排序方法

public static List<Bc> sort(List<Bc> list){
        list.sort((o1, o2) -> {
            int i = (int) (o2.ct - o1.ct);
            if (i == 0) {
                if (o2.bt == null) {
                    i  = -1;
                }else if (o1.bt == null){
                    i = 1;
                }else {
                    i = o1.bt-o2.bt;
                }
            }
            return i;
        });
        return list;
    }

4.将统计元素构造成树：

**public static Bc tree(List<Bc> list){
        if (list.size() == 1) {
            return list.get(0);
        }
        Bc root = new Bc(null, 0);
        Bc left = list.get(list.size() - 1);
        root.ct += left.ct;
        Bc right = list.get(list.size() - 2);
        root.ct += right.ct;
        root.left = left;
        root.right = right;
        list = list.subList(0, list.size() - 2);
        list.add(root);
        sort(list);
        return tree(list);
    }
**

5.通过构造树。获取所有元素的表达式

/**
 * 表达式 mod
*/
public static class CompressNode{
		//元素
        public Byte bt;
        //表达式
        public int trans;
        //表达式长度（区分于 0和 00 这类的表达式）
        public int offset;
        public CompressNode(Byte bt, int trans, int offset) {
            this.bt = bt;
            this.trans = trans;
            this.offset = offset;
        }
    }
/**
 * 从构造树中获得表达式
*/
public static List<CompressNode> getCompressNodes(Bc root){
        List<CompressNode> nodeList = new ArrayList<>();
        if (root.bt != null) {
            nodeList.add(new CompressNode(root.bt,0,1));
        }
        if (root.left != null){
            compressNodes(root.left, 0,1,nodeList);
        }
        if (root.right != null){
            compressNodes(root.right, 1,1,nodeList);
        }
        return nodeList;
    }
    private static void compressNodes(Bc root,int value, int hierarchy,List<CompressNode> nodeList){
        if (root.bt != null){
            nodeList.add(new CompressNode(root.bt,value,hierarchy));
        }
        if (root.left != null){
            compressNodes(root.left, value<<1,hierarchy+1,nodeList);
        }
        if (root.right != null){
            compressNodes(root.right, (value<<1)+1,hierarchy+1,nodeList);
        }
    }

6 ：将数据通过表达式进行压缩

/**
 * res：数据
 * compressNodes：表达式
*/
public static byte[] compress(byte[] res,List<CompressNode> compressNodes){
        if (res == null||res.length==0){
            return res;
        }
        List<Byte> byteList = new ArrayList<>();
        int c = 0;
        for (byte b : res) {
            CompressNode node = null;
            for (CompressNode compressNode : compressNodes) {
                if (Objects.equals(b,compressNode.bt)){
                    node = compressNode;
                }
            }
            int bt = node.trans;
            int offset = node.offset;
            if (c == 0){
                byteList.add((byte) 0);
            }
            c = setInList(byteList, c, bt, offset);
        }
        byteList.add((byte)c);
        byte[] bytes = new byte[byteList.size()];
        for (int i = 0; i < byteList.size(); i++) {
            bytes[i] = byteList.get(i);
        }
        return bytes;
    }

    private static int setInList(List<Byte> byteList, int c, Integer bt, int offset) {
        if (8- c > offset) {
            Byte last = byteList.get(byteList.size() - 1);
            last = (byte)(last |(bt <<(8- c - offset)));
            byteList.set(byteList.size()-1,last);
            c += offset;
        }else if (8- c == offset){
            Byte last = byteList.get(byteList.size() - 1);
            last = (byte)(last| bt);
            byteList.set(byteList.size()-1,last);
            c = 0;
        }else{
            Byte last = byteList.get(byteList.size() - 1);
            last = (byte)(last|(bt >>(offset -8+ c)));
            byteList.set(byteList.size()-1,last);
            for (int i = 0; i < 8-c; i++) {
                bt -= (1<<(offset-i));
            }
            offset = offset -8 + c;
            c = 0;
            byteList.add((byte) 0);
            return setInList(byteList,c,bt,offset);
        }
        return c;
    }

7: 解压缩由哈夫曼编码的数据

public static byte[] unCompress(byte[] cals,List<CompressNode> compressNodes){
        if (cals == null||cals.length==0){
            return cals;
        }
        List<Byte> byteList = new ArrayList<>();
        compressNodes.sort((o1,o2)->{
            int i = o2.offset - o1.offset;
            return i!=0?i:o2.trans- o1.trans;
        });
        int length = cals.length - 1;
        int lastIndex = cals[length];
        lastIndex = lastIndex==0?8:lastIndex;
        int i = 0;
        int c = 0;
        while (true){
            if (i == length || (i == length-1 && c >= lastIndex)){
                break;
            }
            int byteLen = 8;
            byte cal = cals[i];
            if (i == length-1){
                byteLen = lastIndex;
            }
            for (CompressNode node : compressNodes) {
                if (node.offset < byteLen-c){
                    if (getIndexByte(cal,c,node.offset) == node.trans){
                        byteList.add(node.bt);
                        c+=node.offset;
                        break;
                    }
                }else if (node.offset == byteLen-c){
                    if (getIndexByte(cal,c,node.offset) == node.trans){
                        byteList.add(node.bt);
                        i++;c=0;break;
                    }
                }else{
                    int bc = (node.offset - (byteLen-c))/8;
                    int reint = (node.offset - (byteLen - c)) % 8;
                    if (reint > 0){
                        bc++;
                    }
                    bc++;
                    if (i+bc > length || (i+bc == length-1 && reint > lastIndex)){
                        continue;
                    }else{
                        byte[] vs = new byte[bc];
                        for (int j = 0; j < bc; j++) {
                            vs[j] = cals[j+i];
                        }
                        if (getIndexBytes(vs,c, node.offset) == node.trans){
                            byteList.add(node.bt);
                            if (reint == 0) {
                                c = 0;i = i+bc;
                            }else{
                                c = reint;i = i+bc-1;
                            }
                            break;
                        }
                    }
                }
            }
        }
        byte[] bytes = new byte[byteList.size()];
        for (int k = 0; k < byteList.size(); k++) {
            bytes[k] = byteList.get(k);
        }
        return bytes;
    }
    
    public static int getIndexByte(byte val,int index,int length){
        if (index+length > 8) {
            throw new RuntimeException("byte bit max 8");
        }
        int e = 0;
        for (int i = index; i < index+length; i++) {
            if ((i==0&&val<0)||(i>0&&(val&1<<(7-i))==(1<<(7-i)))){
                e += 1<<(length+index-i-1);
            }
        }
        return e;
    }

    public static int getIndexBytes(byte[] vals,int index,int length){
        if (index+length > (8*vals.length) || length < (8*(vals.length-2))+2) {
            throw new RuntimeException("byte bit max 8");
        }
        int e = 0;
        for (int i = 0; i < vals.length; i++) {
            byte val = vals[i];
            if (i == 0) {
                e += getIndexByte(val, index, 8 - index) << (length - (8 - index));
            }else if(i == vals.length -1 ){
                e += getIndexByte(val, 0, length- ((8 - index)+8*(vals.length-2)));
            }else{
                e += getIndexByte(val, 0, 8) << (length- ((8 - index)+8*(vals.length-1-i)));
            }
        }
        return e;
    }