哈夫曼压缩（二）——中文文本

最新推荐文章于 2023-03-18 19:50:40 发布

LIAO_7053

最新推荐文章于 2023-03-18 19:50:40 发布

阅读量2.4k

点赞数 2

分类专栏： IO流文章标签：哈夫曼压缩中文文本 IO流

本文链接：https://blog.csdn.net/LIAO_7053/article/details/82533315

版权

上篇已经介绍了用哈夫曼算法压缩英文文本，本篇将简单介绍一下用哈夫曼算法压缩中文文本。

一、压缩过程

与压缩英文文本相比，压缩中文文件的主要区别主要体现在下面：

1、采用256位的数组来存储编码信息；

2、用字节数组读取到汉字时，将会生成两个负数，范围为（-128——-1），此时将不能作为字节数组的索引，因此需要对这部分数据进行处理；

3、如果采用单个字节地去读取汉字时，则会生成两个正数，范围为（128-255），此时则正常，但读写速率会降低很多；

4、本文用字节流读写数据时，均进行了预先判断及处理，详见代码部分。

代码实现：

package com.liao.Huffman0830v2;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Comparator;
import java.util.PriorityQueue;

public class HufTree {
   
    private static final int LEN = 256;
    private int[] byteCount = new int[LEN];// 统计各字节出现次数
    private String[] charCode = new String[LEN];// 记录各字节哈夫曼编码
    private PriorityQueue<hufNode> nodeQueue = new PriorityQueue<>(LEN, new Comparator<hufNode>() {
        @Override
        public int compare(hufNode o1, hufNode o2) {
            return o1.count - o2.count;// 按次数排序
        }
    });

    // 成员内部类
    private static class hufNode {
   
        private hufNode lchild;// 左分支
        private hufNode rchild;// 右分支
        private String str;// 记录字符
        private int count;// 统计次数

        // 构造方法
        public hufNode(String str, int count) {
            super();
            this.str = str;
            this.count = count;
        }
    }

    // 主函数
    public static void main(String[] args) {
        File file = new File("file\\003.txt");
        File file2 = new File("file\\压缩文件1.txt");
        new HufTree().compress(file, file2);// 压缩文件
        System.out.println("原文件大小：" + file.length()/1000 + "kb");
        System.out.println("压缩文件大小：" + file2.length()/1000 + "kb");
    }

    // 压缩文件
    private void compress(File file, File file2) {
        byte[] bs = readFile(file);// 读取文件
        countChar(bs);// 统计词频
        hufNode root = createTree();// 创建哈夫曼树
        generateCode(root, "");// 生成哈夫曼编码
        printCode();// 打印哈夫曼编码
        writeFile(bs, file2);// 写入压缩文件
    }

    // 将文件转换为字节数组保存
    private byte[] readFile(File file) {
        byte[] bs = new byte[(int) file.length()];// 创建字节数组
        BufferedInputStream bis = null;// 声明字节缓冲流
        try {
            bis = new BufferedInputStream(new FileInputStream(file));
            bis.read(bs);// 将文件读取到字节数组中
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                if (bis != null)
                    bis.close();// 关闭输入流
            }