Huffman压缩（最终版）

最新推荐文章于 2023-12-29 22:18:06 发布
CPJLJ
最新推荐文章于 2023-12-29 22:18:06 发布
阅读量338
点赞数 1
分类专栏： C
本文链接：https://blog.csdn.net/BobDay/article/details/105805811
版权
C 专栏收录该内容
19 篇文章 0 订阅
订阅专栏
这个程序有个大bug，就是稍大一点的文件不能先压缩在解压，只能压缩后退出程序再解压，不知道是不是内存问题
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
/*用Huffman算法写出一个程序实现文件的压缩(和解压缩)*/
//怎么输入数据，这里牵扯到字母转ASCII二进制
//输出二进制

//大体流程
//将读取的数据放进一个数组
//这个数组的元素是指针，指向dumb域, name域, frequency域等域的结构
//在把这个数组变成二叉堆

#define characterKind 128       //7位可以代表128个字符
typedef struct character *leaf; //这个用于之后建树8/8
struct character
{
    int dumb; //判断是否为哑结点
    char name;
    int frequency;
    leaf left;  //左树叶
    leaf right; //右树叶
};

//栈的例程
typedef struct stackNode *Stack;
typedef struct stackNode *Postion;
typedef struct stackNode *SNode;
struct stackNode
{
    int element;
    Postion next;
};

int pop(Stack S);
void push(int element, Stack S);
int isEmpty(Stack S);

//将数据收集起来，设字符种类为M，个数为N。该例程运行时间为O(N*M)
void collectData(char filename[], struct character *collectTable[], int *size);

//开始建堆
typedef struct character *queueNode;      //插入到堆的元素
typedef struct character **priorityQueue; //相当于一个指向指针的指针，即一个数组
typedef struct character *queueMin;       //指向堆中最小的元素的指针
//这个堆需要buildHeap(), deleteMin(),downFilter(), insert()等例程
priorityQueue buildHeap(struct character *collectTable[], int *size);
queueMin deleteMin(priorityQueue Q, int *size);
void downFiler(int i, struct character *collectTable[], int *size); //将collectTable[i]指向的元素下滤到正确位置
void insert(queueNode q, priorityQueue Q, int *size);

//进行两次deleteMin()并合并MIN，再把合并的元素放进去,直到deleteMin()为空
//其实最多执行N-1次合并就结束了,但我们以collectTableSize作为参照
typedef struct character *dumb; //哑结点，连接两个结点的结点(或是连接一个或者两个树叶的结点)
//要建立一个编码表，来记录生成的huffman编码

//用散列表，设置一个128个元素的数组(可以装下ASCII码表的所有元素)
//该数组每一个元素是一个指针，指向一个整型变量encoding
typedef char *HashTable;
//建立散列表的例程

enum direction
{
    leftTree,
    rightTree
}; //用于判断是左树叶还是右树叶,左树叶是编码为0，右树叶编码是1
//1.构建huffman树.
//将它们的和作为哈夫曼树的权值节点,构建到huffman树中;
typedef struct character *huffmanTree;
huffmanTree buildHuffmanTree(priorityQueue Q, huffmanTree T, int *size);
//2.通过哈夫曼树产生哈夫曼编码;
//规则是：从根节点出发，往左走-->0 ，往右走-->1,
//遇到叶子节点的情况，就将它对应的huffman编码写入数组中。
void buildCodingSchedule(huffmanTree T, HashTable codingSchedule[], int bits, int *size, const int tableSize);
void encoding(huffmanTree T, char bitCoding[], char *codingSchedule[], int bits, enum direction dire, const int tableSize); //该例程遍历Huffman树，遍历到非哑结点时将数据记录到散列表

//我们在解压之前要编写配置文件
//我们要解压的话必须要知道这可huffman树，所以在压缩的时候需要编写一个配置文件来
//存储huffman树的信息（各个字符以及字符出现的次数）。
//在配置文件里面将：字符+字符出现的次数存在一行
//在这里要使用itoa这个函数，将次数转换成一个字符串(string)类型存储
//压缩与解压例程
char *compress(char *filename, HashTable codingSchedule[], int *compressNum);                         //返回压缩的字符个数
void WriteToTheConfiguration(priorityQueue Q, int collectTableSizeCopy, char *configurationFileName); //写入配置文件的例程
priorityQueue ReadConfigurationFile(char *ConfigurationFile, int *sum, int *collectTableSize);        //返回一个二叉堆，为解码的时候排除附加的08

char *decompression(char *filename, huffmanTree T, int *sum);

void Compress();
void Decompress();

int main()
{
    int selection = 3; //0代表选择压缩，1代表解压; 初始是3为了进while循环
    //如果输入字符给selection，程序会自动停止
    while ((selection == 0 || selection == 1) || selection != 2)
    {
        if (selection == 0)
            Compress();
        else if (selection == 1)
            Decompress();

        printf("\n请选择服务：0.压缩    1.解压\n");
        printf("提示：(输入选择0或1并按ENTER建结束输入，输入\'2\'结束程序)\n");
        scanf("%d", &selection);
        while (selection != 0 && selection != 1 && selection != 2)
        {
            while ((getchar()) != '\n')
                continue;
            printf("请输入数字0,1或2，并按ENTER键结束输入!\n");
            printf("你选择的服务是：");
            scanf("%d", &selection);
        }
    }
    printf("谢谢使用！\n");
    system("pause");
    return 0;
}

void collectData(char filename[], struct character *collectTable[], int *size) //一个字符串即压缩文件的名称
{

    int i;
    struct character *tmpCell;
    FILE *pf = fopen(filename, "r");
    char ch;
    if (pf == NULL)
    {
        printf("未找到该文件！\n");
        printf("文件可能不存在或程序与文件不在同一文件目录下\n");
        printf("请重新启动程序重试\n");
        printf("谢谢使用！\n");
        exit(0);
    }
    else
    {
        while ((ch = fgetc(pf)) != EOF) //O(N)
        {
            // putchar(ch);
            if (*size == 0) //为空的时候直接插入
            {
                collectTable[0] = malloc(sizeof(struct character)); //collextTable[0]是个哑结点
                tmpCell = malloc(sizeof(struct character));         //忽略了错误检测
                tmpCell->dumb = 0;
                tmpCell->name = ch;
                tmpCell->frequency = 1;
                tmpCell->left = NULL;
                tmpCell->right = NULL;
                collectTable[1] = tmpCell;
                *size += 1;
            }
            else
            {
                for (i = 1; i <= *size; i++) //O(M)
                {
                    if (collectTable[i]->name == ch)
                    {
                        collectTable[i]->frequency += 1;
                        break;
                    }
                }

                if (i > *size) //说明没有找到
                {
                    tmpCell = malloc(sizeof(struct character)); //忽略了错误检测
                    tmpCell->name = ch;
                    tmpCell->dumb = 0;
                    tmpCell->frequency = 1;
                    tmpCell->left = NULL;
                    tmpCell->right = NULL;
                    collectTable[i] = tmpCell;
                    *size += 1;
                }
            }
        }
    }

    fclose(pf);
}

priorityQueue buildHeap(struct character *collectTable[], int *size)
{

    priorityQueue Q;
    int i;

    for (i = *size / 2; i > 0; i--)
    {
        downFiler(i, collectTable, size);
    }
    collectTable[0]->frequency = *size; //保存堆中元素的数量
    Q = collectTable;
    return Q;
}

void downFiler(int i, priorityQueue collectTable, int *size)
{

    int child;
    queueNode tmpCell;

    tmpCell = collectTable[i];
    // printf("size = %d\n", *size);
    for (child = i * 2; child <= *size; child *= 2)
    {
        if (child != *size &&
            collectTable[child]->frequency > collectTable[child + 1]->frequency)
            child = child + 1;

        if (tmpCell->frequency > collectTable[child]->frequency)
        {
            collectTable[i] = collectTable[child];
            i = child;
        }
        else
            break;
    }
    collectTable[i] = tmpCell;
}

queueMin deleteMin(priorityQueue Q, int *size) //删除一个元素记得把Q[0]->frequency或collectTableSize-1
{
    queueMin Node;
    int lastest = *size; //最末尾元素的索引

    Node = Q[1];
    Q[1] = Q[lastest];
    *size -= 1; //堆内元素减一
    Q[0]->frequency -= 1;
    downFiler(1, Q, size);
    return Node;
}

void insert(queueNode q, priorityQueue Q, int *size) //合并的元素插入堆
{

    int i;
    *size += 1;
    Q[0]->frequency += 1;
    Q[*size] = q;
    for (i = *size / 2; i > 0; i--)
        downFiler(i, Q, size);
}

huffmanTree buildHuffmanTree(priorityQueue Q, huffmanTree T, int *size) //一直保持树的右子树不小于左子树
{

    queueMin first;
    queueMin second; //两次DeleteMin所得的权重最小的元素
    dumb dumbNode;

    while (*size != 1)
    {
        first = deleteMin(Q, size);
        second = deleteMin(Q, size);

        // printf("first (dumb%d) %c : %d \n", first->dumb, first->name, first->frequency);
        // printf("second (dumb%d) %c : %d \n", second->dumb, second->name, second->frequency);
        if (first->dumb != 1 && second->dumb != 1)
        { //这个哑结点会有两个树叶
            dumbNode = malloc(sizeof(struct character));
            if (dumbNode == NULL)
            {
                printf("内存不够，请重新启动程序\n");
                exit(0);
            }
            else
            {
                dumbNode->dumb = 1;
                dumbNode->frequency = first->frequency + second->frequency;
                dumbNode->left = first;
                dumbNode->right = second;
            }
        }
        else
        {
            if (first->dumb == 1)
            {
                dumbNode = malloc(sizeof(struct character));
                if (dumbNode == NULL)
                {
                    printf("内存不够，请重新启动程序\n");
                    exit(0);
                }
                else
                {
                    dumbNode->dumb = 1;
                    dumbNode->frequency = first->frequency + second->frequency;
                    dumbNode->right = first;
                    dumbNode->left = second;
                }
            }
            else if (first->dumb != 1 && second->dumb == 1)
            {
                if (dumbNode == NULL)
                {
                    printf("内存不够，请重新启动程序\n");
                    exit(0);
                }
                else
                {
                    dumbNode = malloc(sizeof(struct character));
                    dumbNode->dumb = 1;
                    dumbNode->frequency = first->frequency + second->frequency;
                    dumbNode->right = second;
                    dumbNode->left = first;
                }
            }
        }

        insert(dumbNode, Q, size);
    }
    T = deleteMin(Q, size);
    return T;
}

void encoding(huffmanTree T, char bitCoding[], char *codingSchedule[], int bits,
              enum direction dire, const int tableSize)
//该例程遍历Huffman树，遍历到非哑结点时将数据记录到散列表
{

    int bts = bits + 1;
    if (T->dumb == 0)
    {
        int index = (int)T->name; //codingSchedule的下标
        // printf("bits = %d\n", bits);
        // printf("%d character %c : frequency %d\n", bits, T->name, T->frequency);
        char *tmp = malloc(sizeof(char) * bts); //tmp是字符串数组，额外的一位是'\0'
        for (int i = 0; i < bits; i++)
            tmp[i] = bitCoding[i]; //将bitCoding的元素全部拷贝到tmp中;
        tmp[bits] = '\0';
        codingSchedule[index] = tmp; //codingSchedule的元素(指向字符数组的指针)指向tmp
    }
    else
    {
        dire = leftTree;
        bitCoding[bits] = '0';
        encoding(T->left, bitCoding, codingSchedule, bts, dire, tableSize);
        dire = rightTree;
        bitCoding[bits] = '1';
        encoding(T->right, bitCoding, codingSchedule, bts, dire, tableSize);
    }
}

void buildCodingSchedule(huffmanTree T, HashTable codingSchedule[],
                         int bits, int *size, const int tableSize)
{

    enum direction dire = leftTree;
    char *bitCoding = malloc(sizeof(char) * (*size * 2)); //防止这个数组可能会越界
    encoding(T, bitCoding, codingSchedule, bits, dire, tableSize);
}

char *compress(char *filename, HashTable codingSchedule[], int *compressNum) //返回压缩了多少的字符
//从原文件得到字符，将对应的Huffman编码每满8位就写入压缩文件中
//如果最后一个字节不满8位，用0填充, 用一个遍历记录原文件有多少字符
//就不用担心最后填充的0解压时会编码成实际字符了
{

    //先打开原文件
    FILE *source = fopen(filename, "r");
    int j = 0;
    char *compressFileName = strcat(filename, "Compressed.txt");
    FILE *target = fopen(compressFileName, "wb");
    char ch;
    int chr;
    int bits;          //二进制位数
    int size;          //每一个字符相对于编码的大小
    int num = 0;       //要转换的十进制数字(每8位就转换成二进制存储到压缩文件)
    char compressChar; //要存入壓縮文件的字符
    int S = 8;
    int i;
    int fNum;
    while ((ch = fgetc(source)) != EOF)
    {
        *compressNum += 1;
        // printf("*compressNum = %d\n", *compressNum);

        chr = (int)ch; //强制类型转换
        i = 0;
        while (codingSchedule[chr][i] != '\0')
        {
            if (codingSchedule[chr][i] == '1')
                num += pow(2, S - 1);
            // printf("S = %d, num = %d\n", S, num);
            S -= 1;
            if (S == 0)
            {
                compressChar = (char)num;
                fNum = fwrite(&compressChar, sizeof(char), 1, target);
                S = 8;
                num = 0;
            }
            i++;
        }
    }
    if (S != 8) //還有尾巴
        fNum = fwrite(&num, sizeof(int), 1, target);
    fclose(source);
    fclose(target);
    return compressFileName;
}

void WriteToTheConfiguration(priorityQueue Q, int collectTableSizeCopy, char *configurationFileName) //写入配置文件的例程
{
    FILE *fp = fopen(configurationFileName, "w");
    for (int i = 1; i <= collectTableSizeCopy; i++)
        fprintf(fp, "%c:%d\n", Q[i]->name, Q[i]->frequency);

    fclose(fp);
}

priorityQueue ReadConfigurationFile(char *ConfigurationFile, int *sum, int *collectTableSize) //返回字符的频率总会，为解码的时候排除附加的0
{
    int i = 0;
    int y = 0;
    priorityQueue Result;
    int fre; //准换成数字的frequency
    char before;
    char ch; //存储冒号之前的字符
    struct character *tmpCell;
    ConfigurationFile = strcat(ConfigurationFile, "ConfigurationFile.txt");
    // printf("0conpressFileName = %s\n", ConfigurationFile);
    FILE *pf = fopen(ConfigurationFile, "r");
    int colon = 0; //0代表冒号之前，1代表冒号之后
    int chrI = 0;  //字符强制类型准换的十进制值
    struct character *result[characterKind + 1];
    char frequency[20];                           //存储冒号之后的字符
    result[0] = malloc(sizeof(struct character)); //result[0]是个哑结点
    if (result[0] == NULL)
    {
        printf("内存不够!!!\n");
        printf("请重启程序重试\n");
        exit(0);
    }
    y++; //数组从result[y(1)]开始存储数据
    //在这里直接建树怎么样
    if (pf == NULL)
    {
        printf("未找到配置文件!\n");
        printf("文件解压失败，请退出程序重试!\n");
        printf("谢谢使用\n");
        exit(0);
    }
    else
    {
        while ((ch = fgetc(pf)) != EOF)
        {
            if (ch == ':')
            {
                colon = 1;
                continue;
            }
            if (colon == 0)
                before = ch;
            if (colon == 1)
            {
                frequency[i] = ch;
                i++;
                while ((ch = fgetc(pf)) != '\n')
                { //可以忽略掉每一行最后的多余的'\n',作用就可以选出我们需要记录的'\n'
                    frequency[i] = ch;
                    i++;
                }
                frequency[i] = '\0'; //转换为字符串
                sscanf(frequency, "%d", &fre);
                tmpCell = malloc(sizeof(struct character));
                tmpCell->dumb = 0;
                tmpCell->name = before;
                tmpCell->frequency = fre;
                tmpCell->left = NULL;
                tmpCell->right = NULL;
                result[y++] = tmpCell;
                (*collectTableSize)++;
                // printf("0collectTableSize = %d\n", *collectTableSize);
                // printf("result[y - 1]->name-> %c:%d\n", result[y - 1]->name, result[y - 1]->frequency);
                //这里就可以放入数组中
                *sum += fre;
                colon = 0;
                i = 0;
            }
        }
        result[0]->frequency = *sum; //记录字符频率的总数
    }
    Result = result;
    return Result;
}

int pop(Stack S)
{
    Postion p;
    int element;
    // printf("H\n");
    // printf("S->%d\n", S->next->element);
    if (S->next == NULL)
    {
        printf("栈为空！！！\n");
        exit(0);
    }
    element = S->next->element;
    // printf("element = %d\n", element);
    p = S->next;
    S->next = S->next->next;
    p->next = NULL;
    S->element -= 1;
    free(p);
    // printf("G\n");
    return element;
}

void push(int element, Stack S)
{
    Postion p;
    SNode tmpCell;

    p = S->next;
    tmpCell = malloc(sizeof(struct stackNode));
    tmpCell->element = element;
    S->next = tmpCell;
    tmpCell->next = p;
    S->element += 1;
}

int isEmpty(Stack S)
{
    return (S->element == 0) ? 1 : 0;
}

char *decompression(char *filename, huffmanTree T, int *sum)
{
    //我需要两个栈
    int i = 0;
    int j = 0;
    int y = 0;
    int enterline = 0;
    int popNum;
    Stack bit = NULL;
    // int bit[8];                                  //八位的bitValue每一位的二进制值
    int Valuedecimal;                           //字符的十进制
    char bitValue;                              //读取的二进制数的值
    char character;                             //要写入解压文件的字符
    FILE *pCompressFile = fopen(filename, "r"); //以二进制模式打开文件
    FILE *resultFP = NULL;                      //指向解压后文件的指针
    huffmanTree t = T;                          //指向树的结点的指针，每次解码完一个字符后重新指向树根
    char *resultFileName = malloc(sizeof(char) * 30);
    if (pCompressFile == NULL)
    {
        printf("Out of space!!!\n");
        exit(0);
    }
    //设计好解压后的文件名
    while (filename[i] != '.')
    {
        resultFileName[i] = filename[i];
        i++;
    }
    j = i;
    resultFileName[j++] = '@';
    while (filename[i] != 'C')
    {
        resultFileName[j] = filename[i];
        j++;
        i++;
    }
    resultFileName[j] = '\0';

    //栈的初始化
    bit = malloc(sizeof(struct stackNode));
    bit->next = NULL;
    bit->element = 0;

    resultFP = fopen(resultFileName, "w");
    if (resultFP == NULL)
    {
        printf("解压失败，请重启程序重试！\n");
        exit(0);
    }
    else
    { //开始解码
        while (*sum >= 0)
        {
            y = 0;
            //读取二进制数
            Valuedecimal = fgetc(pCompressFile);
            if (Valuedecimal == EOF)
                break;

            //读取一字节(8位)的字符
            while (bit->element != 8 && Valuedecimal != 0)
            {
                // printf("1Valuedecimal.txt = %d\n", Valuedecimal);
                push(Valuedecimal % 2, bit);
                Valuedecimal = Valuedecimal / 2;
                y++;
            }
            while (y < 8)
            {
                push(0, bit);
                y++;
            }
            //遍历二进制数组
            while (!isEmpty(bit))
            {
                if (t->dumb == 0)
                {
                    character = t->name;
                    t = T;                              //重新指向树根
                    fprintf(resultFP, "%c", character); //写入文件
                    *sum = (*sum) - 1;
                }
                else
                {
                    popNum = pop(bit);
                    printf("%d", popNum); //打印文件的二进制值
                    enterline++;
                    if (enterline % 4 == 0)
                        printf(" ");
                    if (popNum == 0)
                    {
                        t = t->left;
                    }
                    else if (popNum == 1)
                    {
                        t = t->right;
                    }
                }
            }
        }
    }

    fclose(pCompressFile);
    fclose(resultFP);
    return resultFileName;
}
//压缩
void Compress()
{
    int i = 0;
    int bits = 0; //一个count数记录编码,每一次在Huffman树中找到字符时放进一个encoding
    const int tableSize = characterKind - 1;
     //collectTable[0]不放元素,不过在建堆后可以存储堆中元素的数量
    int collectTableSize = 0; //记录collectTable装了多少种字符了，其实是0
    int collectTableSizeCopy;
    huffmanTree T;
    struct character *collectTable[characterKind + 1]; //记录每个字符出现频率的数组
    HashTable codingSchedule[characterKind - 1];       //ASCII一共有127个字符以0开头，这个就是一个简单的数组，不是散列表，不像该HashTable了
    char *filename = malloc(sizeof(char) * 30);
    char *filename1 = malloc(sizeof(char) * 30);
    char configurationFileName[20] = "";
    int compressNum = 0; //记录压缩了多少的字符
    char *compressFileName;
    priorityQueue Q = NULL;

    printf("请输入要压缩的文件名(带文件后缀以ENTER建结束输入):\n");
    scanf("%s", filename);
    strcpy(filename1, filename);
    collectData(filename, collectTable, &collectTableSize);
    Q = buildHeap(collectTable, &collectTableSize);
    //将Q写入一个配置文件
    collectTableSizeCopy = collectTableSize; //记录collectTable装了多少种字符了，初始是0
    strcpy(configurationFileName, filename);
    strcat(configurationFileName, "Compressed.txtConfigurationFile.txt");
    WriteToTheConfiguration(Q, collectTableSizeCopy, configurationFileName);
    // for (i = 1; i <= collectTableSize; i++)
    // {
    //     printf("Q[%d]'s name is %c and frequency is %d\n", i, Q[i]->name, Q[i]->frequency);
    // } //遍历堆
    // printf("2filename = *%s*\n", filename);
    T = buildHuffmanTree(Q, T, &collectTableSize);
    buildCodingSchedule(T, codingSchedule, bits, &collectTableSizeCopy, tableSize);
    /*开始压缩*/
    // printf("P: %s\n", codingSchedule[80]);
    compressFileName = compress(filename, codingSchedule, &compressNum); //返回压缩的文件
    printf("压缩完成!!!\n");
    printf("总共压缩了%d个字符\n", compressNum);
    printf("压缩后的文件名为%s\n", compressFileName);
    free(T);
}

void Decompress()
{
    int i = 0;
    int sum = 0;              //读取的字符频率值的总和
    int bits = 0;             //一个count数记录编码,每一次在Huffman树中找到字符时放进一个encoding
    int collectTableSize = 0; //记录collectTable装了多少种字符了，其实是0
    huffmanTree T= NULL;
    priorityQueue Q;
    char *resultFileName; //解压后的文件名
    char compressFileName[30];
    char configurationFileName[50];
    HashTable codingSchedule[characterKind - 1]; //ASCII一共有127个字符以0开头，这个就是一个简单的数组，不是散列表，不像该HashTable了

    printf("请输入要解压的文件名(以ENTER建结束输入)\n");
    scanf("%s", compressFileName);
    while (compressFileName[i] != '\0')
    {
        configurationFileName[i] = compressFileName[i];
        i++;
    }
    configurationFileName[i] = '\0';
    printf("开始解压！\n");
    printf("正在读取配置文件...\n");
    Q = ReadConfigurationFile(configurationFileName, &sum, &collectTableSize); //这个数组就是优先队列
    T = buildHuffmanTree(Q, T, &collectTableSize);                             //重建了Huffman树
    //开始以二进制读取压缩文件，并按树解码，sum是字符总数
    // printf("%c\n", T->right->right->right->left->right->right->right->left->right->name);
    // printf("1conpressFileName = %s\n", compressFileName);1
    printf("读取配置文件成功！\n");
    printf("正在解压...\n");
    resultFileName = decompression(compressFileName, T, &sum);
    printf("解压成功!!!\n");
    printf("解压后的文件名为:%s\n", resultFileName);
    free(T);
    //队列入树
    //读取压缩文件
}
CPJLJ
关注
1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Huffman压缩（最终版）

这个程序有个大bug，就是稍大一点的文件不能先压缩在解压，只能压缩后退出程序再解压，不知道是不是内存问题#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>/*用Huffman算法写出一个程序实现文件的压缩(和解压缩)*///怎么输入数据，这里牵...
复制链接

扫一扫
专栏目录