24 哈夫曼编码实现字符串的压缩与解压

哈夫曼编码

1. 步骤

  1. 传输的字符串:i like like like java do you like a java
  2. 计算各个字符对应的个数。
  3. 按照字符出现的次数构建一颗赫夫曼树, 次数作为权值。
  1. 根据赫夫曼树,给各个字符,规定编码 (前缀编码), 向左的路径为 0 向右的路径为 1。则有,o: 1000 u: 10010 d: 100110 y: 100111 i: 101 a : 110 k: 1110 e: 1111 j: 0000 v: 0001 l: 001 : 01
  2. 按照上面的赫夫曼编码,i like like like java do you like a java 字符串对应的编码为 1010100110111101111010011011110111101001101111011110100001100001110011001111000011001111000100100100110111101111011100100001100001110

2. 数据压缩

  1. 定义哈夫曼树节点类

    // 定义节点类
    class HuffmanCodeNode implements Comparable<HuffmanCodeNode>{
        private Byte character;
        private int value;
        private HuffmanCodeNode leftNode;
        private HuffmanCodeNode rightNode;
    
        // 前序遍历
        public void preOrder() {
            System.out.println(this);
            if (this.leftNode != null) {
                this.leftNode.preOrder();
            }
            if (this.rightNode != null) {
                this.rightNode.preOrder();
            }
        }
    
        public HuffmanCodeNode(Byte character, int value) {
            this.character = character;
            this.value = value;
        }
    
        public Byte getCharacter() {
            return character;
        }
    
        public void setCharacter(byte character) {
            this.character = character;
        }
    
        public int getValue() {
            return value;
        }
    
        public void setValue(int value) {
            this.value = value;
        }
    
        public HuffmanCodeNode getLeftNode() {
            return leftNode;
        }
    
        public void setLeftNode(HuffmanCodeNode leftNode) {
            this.leftNode = leftNode;
        }
    
        public HuffmanCodeNode getRightNode() {
            return rightNode;
        }
    
        public void setRightNode(HuffmanCodeNode rightNode) {
            this.rightNode = rightNode;
        }
    
        @Override
        public String toString() {
            return "HuffmanCodeNode{" +
                    "character=" + character +
                    ", value=" + value +
                    '}';
        }
    
        @Override
        public int compareTo(HuffmanCodeNode o) {
            // 升序排序
            return this.value - o.value;
        }
    }
    
  2. 得到每个节点的数据与权值

    public static List<HuffmanCodeNode> getNodes(String str) {
        // 存放所有的节点
        ArrayList<HuffmanCodeNode> nodes = new ArrayList<>();
        // 临时存放节点的符号与权值
        HashMap<Byte, Integer> nodeMap = new HashMap<>();
        byte[] strBytes = str.getBytes();
        // 计算字符串中每个字符的权值
        for (byte strByte : strBytes) {
            if (nodeMap.get(strByte) == null) {
              	nodeMap.put(strByte, 1);
            }
            else {
              	nodeMap.put(strByte, nodeMap.get(strByte) + 1);
            }
        }
        Set<Map.Entry<Byte, Integer>> entrySet = nodeMap.entrySet();
        // 将所有节点加入到列表中
        for (Map.Entry<Byte, Integer> byteIntegerEntry : entrySet) {
          	nodes.add(new HuffmanCodeNode(byteIntegerEntry.getKey(), byteIntegerEntry.getValue()));
        }
        // 返回所有节点的符号与权值
        return nodes;
    }
    
  3. 根据节点构造哈夫曼树

    // 构造哈夫曼树
    public static HuffmanCodeNode createHuffmanCodeTree(List<HuffmanCodeNode> huffmanCodeNodes) {
        while (huffmanCodeNodes.size() > 1) {
            // 根据权值对节点进行升序排序
            Collections.sort(huffmanCodeNodes);
            // 获取最小权值的节点
            HuffmanCodeNode leftNode = huffmanCodeNodes.remove(0);
            // 获取第二小权值的节点
            HuffmanCodeNode rightNode = huffmanCodeNodes.remove(0);
            // 构造父节点
            HuffmanCodeNode parent = new HuffmanCodeNode(null, leftNode.getValue() + rightNode.getValue());
            parent.setLeftNode(leftNode);
            parent.setRightNode(rightNode);
            // 将父节点添加进列表中,继续参与运算
            huffmanCodeNodes.add(parent);
        }
        // 返回树的根节点
        return huffmanCodeNodes.get(0);
    }
    
  4. 得到每个节点(字符)的哈夫曼编码

    static StringBuilder stringBuilder = new StringBuilder();
    // 保存符号与该符号对应的哈夫曼编码
    static HashMap<Byte, String> hashMap = new HashMap<>();
    
    // 重载 getHuffmanCodes
    public static Map<Byte, String> getHuffmanCodes(HuffmanCodeNode root) {
        if (root == null) {
          	return null;
        }
        // 向左递归
        getHuffmanCodes(root.getLeftNode(), "0", stringBuilder);
        // 向右递归
        getHuffmanCodes(root.getRightNode(), "1", stringBuilder);
        return hashMap;
    }
    
    // 得到哈夫曼编码
    private static void getHuffmanCodes(HuffmanCodeNode node, String code, StringBuilder sb) {
        StringBuilder stringBuilder = new StringBuilder(sb);
        // 追加当前节点的编码
        stringBuilder.append(code);
        if (node != null) {
            if (node.getCharacter() == null) {  // 如果是非叶子节点
                // 向左递归
                getHuffmanCodes(node.getLeftNode(), "0", stringBuilder);
                // 向右递归
                getHuffmanCodes(node.getRightNode(), "1", stringBuilder);
            } else {  // 如果是叶子节点
              	hashMap.put(node.getCharacter(), stringBuilder.toString());
            }
        }
    }
    
  5. 压缩字符串

    /**
    * 使用哈夫曼编码压缩字符串
    * @param strBytes  要压缩的字符串的字节数组
    * @param huffmanCodes  节点的哈夫曼编码
    * @return  压缩后的字节数组
    */
    public static byte[] zip(byte[] strBytes, Map<Byte, String> huffmanCodes) {
        StringBuilder sb = new StringBuilder();
        byte[] huffmanCodesBytes = null;
        for (byte charByte : strBytes) {
          	sb.append(huffmanCodes.get(charByte));
        }
        if (sb.length() % 8 == 0) {
          	huffmanCodesBytes = new byte[sb.length() / 8];
        } else {
          	huffmanCodesBytes = new byte[sb.length() / 8 + 1];
        }
        int index = 0;
        for (int i = 0; i < sb.length(); i += 8) {
            if (i + 8 > sb.length()) {
              	huffmanCodesBytes[index] = (byte) Integer.parseInt(sb.substring(i), 2);
            } else {
              	huffmanCodesBytes[index] = (byte) Integer.parseInt(sb.substring(i, i + 8), 2);
            }
            index++;
        }
        return huffmanCodesBytes;
    }
    
  6. 整合代码,方便使用

    // 重载 zip() 方法
    public static byte[] zip(String str) {
        byte[] strBytes = str.getBytes();
        List<HuffmanCodeNode> nodes = getNodes(str);
        HuffmanCodeNode root = createHuffmanCodeTree(nodes);
        Map<Byte, String> huffmanCodes = getHuffmanCodes(root);
        byte[] huffmanCodesBytes = zip(str.getBytes(), huffmanCodes);
        return huffmanCodesBytes;
    }
    
  7. 测试

    String str = "i like like like java do you like a java";
    byte[] zip = HuffmanCodes.zip(str);
    System.out.println(Arrays.toString(zip));
    

    输出:

    [-88, -65, -56, -65, -56, -65, -55, 77, -57, 6, -24, -14, -117, -4, -60, -90, 28]
    

3. 数据解压

  1. 将压缩后的字节转为二进制字符串

    // 字节转二进制字符串
    public static String byteToBitString(boolean flag, byte b) {
        int temp = b;
        if (flag) {
          	temp |= 256;
        }
        String str = Integer.toBinaryString(temp);
        if (flag) {
          	return str.substring(str.length() - 8);
        } else {
          	return str;
        }
    }
    
  2. 将字符串的哈夫曼编码转回为字符序列

    // 将哈夫曼编码转回字符序列
    public static byte[] decode(byte[] huffmanCodesBytes, Map<Byte, String> huffmanCodes) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < huffmanCodesBytes.length; i++) {
            byte temp = huffmanCodesBytes[i];
            boolean flag = (i == huffmanCodesBytes.length - 1);
            sb.append(byteToBitString(!flag, temp));
        }
    
        HashMap<String, Byte> map = new HashMap<>();
        for (Map.Entry<Byte, String> byteStringEntry : huffmanCodes.entrySet()) {
          	map.put(byteStringEntry.getValue(), byteStringEntry.getKey());
        }
    
        List<Byte> list = new ArrayList<>();
        for (int i = 0; i < sb.length();) {
            int count = 1;
            boolean flag = true;
            Byte b = null;
            while (flag) {
                String key = sb.substring(i, i + count);
                b = map.get(key);
                if (b == null) {
                  	count++;
                } else {
                  	flag = false;
                }
            }
            list.add(b);
            i += count;
        }
        byte[] bytes = new byte[list.size()];
        for (int i = 0; i < list.size(); i++) {
          	bytes[i] = list.get(i);
        }
        return bytes;
    }
    
  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
哈夫曼编码是一种用于数据压缩的算法,它通过将出现频率较高的字符用较短的编码表示,从而实现对数据的压缩。下面是使用Python实现哈夫曼编码和解码的示例代码: ```python import heapq from collections import defaultdict # 构建哈夫曼树 def build_huffman_tree(freq): heap = [[weight, [char, ""]] for char, weight in freq.items()] heapq.heapify(heap) while len(heap) > 1: lo = heapq.heappop(heap) hi = heapq.heappop(heap) for pair in lo[1:]: pair[1] = '0' + pair[1] for pair in hi[1:]: pair[1] = '1' + pair[1] heapq.heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:]) return heap[0] # 生成哈夫曼编码表 def generate_huffman_code(tree): huffman_code = {} for pair in tree[1:]: char = pair[0] code = pair[1] huffman_code[char] = code return huffman_code # 压缩字符串 def compress_string(string, huffman_code): compressed_string = "" for char in string: compressed_string += huffman_code[char] return compressed_string # 解压字符串 def decompress_string(compressed_string, huffman_code): decompressed_string = "" code = "" for bit in compressed_string: code += bit for char, c in huffman_code.items(): if code == c: decompressed_string += char code = "" break return decompressed_string # 示例 string = "hello world" freq = defaultdict(int) for char in string: freq[char] += 1 tree = build_huffman_tree(freq) huffman_code = generate_huffman_code(tree) compressed_string = compress_string(string, huffman_code) decompressed_string = decompress_string(compressed_string, huffman_code) print("原始字符串:", string) print("压缩后的字符串:", compressed_string) print("解压后的字符串:", decompressed_string) ``` 运行以上代码,将输出以下结果: ``` 原始字符串: hello world 压缩后的字符串: 0110111011110010111001100 解压后的字符串: hello world ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值