Huffman压缩(解压部分未完成)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
/*用Huffman算法写出一个程序实现文件的压缩(和解压缩)*/
//怎么输入数据,这里牵扯到字母转ASCII二进制
//输出二进制

//大体流程
//将读取的数据放进一个数组
//这个数组的元素是指针,指向dumb域, name域, frequency域, parent域的结构
//在把这个数组变成二叉堆

#define characterKind 128       //7位可以代表128个字符
typedef struct character *leaf; //这个用于之后建树8/8
struct character
{
    int dumb; //判断是否为哑结点
    char name;
    int frequency;
    leaf left;  //左树叶
    leaf right; //右树叶
};

//将数据收集起来,设字符种类为M,个数为N。该例程运行时间为O(N*M)
void collectData(char filename[], struct character *collectTable[], int *size);

//开始建堆
typedef struct character *queueNode;      //插入到堆的元素
typedef struct character **priorityQueue; //相当于一个指向指针的指针,即一个数组
typedef struct character *queueMin;       //指向堆中最小的元素的指针
//这个堆需要buildHeap(), deleteMin(),downFilter(), insert()等例程
priorityQueue buildHeap(struct character *collectTable[], int *size);
queueMin deleteMin(priorityQueue Q, int *size);
void downFiler(int i, struct character *collectTable[], int *size); //将collectTable[i]指向的元素下滤到正确位置
void insert(queueNode q, priorityQueue Q, int *size);

//进行两次deleteMin()并合并MIN,再把合并的元素放进去,直到deleteMin()为空
//其实最多执行N-1次合并就结束了,但我们以collectTableSize作为参照
typedef struct character *dumb; //哑结点,连接两个结点的结点(或是连接一个或者两个树叶的结点)
//要建立一个编码表,来记录生成的huffman编码

//用散列表,设置一个128个元素的数组(可以装下ASCII码表的所有元素)
//该数组每一个元素是一个指针,指向一个整型变量encoding
typedef char *HashTable;
//建立散列表的例程

enum direction
{
    leftTree,
    rightTree
}; //用于判断是左树叶还是右树叶,左树叶是编码为0,右树叶编码是1
//1.构建huffman树.
//将它们的和作为哈夫曼树的权值节点,构建到huffman树中;
typedef struct character *huffmanTree;
huffmanTree buildHuffmanTree(priorityQueue Q, huffmanTree T, int *size);
//2.通过哈夫曼树产生哈夫曼编码;
//规则是:从根节点出发,往左走-->0 ,往右走-->1,
//遇到叶子节点的情况,就将它对应的huffman编码写入数组中。
void buildCodingSchedule(huffmanTree T, HashTable codingSchedule[], int bits, int *size, const int tableSize);
void encoding(huffmanTree T, char bitCoding[], char *codingSchedule[], int bits, enum direction dire, const int tableSize); //该例程遍历Huffman树,遍历到非哑结点时将数据记录到散列表

//我们在解压之前要编写配置文件
//我们要解压的话必须要知道这可huffman树,所以在压缩的时候需要编写一个配置文件来
//存储huffman树的信息(各个字符以及字符出现的次数)。
//在配置文件里面将:字符+字符出现的次数存在一行
//在这里要使用itoa这个函数,将次数转换成一个字符串(string)类型存储
//压缩与解压例程
char *compress(char *filename, HashTable codingSchedule[], int *compressNum); //返回压缩的字符个数
void decompression(char *filename, HashTable codingSchedule[]);
void WriteToTheConfiguration(priorityQueue Q, int collectTableSizeCopy, char *configurationFileName); //写入配置文件的例程
priorityQueue ReadConfigurationFile(char *ConfigurationFile);                                                   //返回一个二叉堆,为解码的时候排除附加的0

void Compress();
void Decompress();

int main()
{
    int selection = 3; //0代表选择压缩,1代表解压; 初始是3为了进while循环
    //如果输入字符给selection,程序会自动停止
    while ((selection == 0 || selection == 1) || selection != 2)
    {
        if (selection == 0)
            Compress();
        else if (selection == 1)
            Decompress();
        else
            printf("服务失败,请重新选择服务!\n");
        printf("请选择服务:0.压缩    1.解压\n");
        printf("提示:(输入选择0或1并按ENTER建结束输入,输入\'2\'结束程序)\n");
        scanf("%d", &selection);
        while (selection != 0 && selection != 1 && selection != 2)
        {
            printf("请输入数字0,1或2,并按ENTER键结束输入!\n");
            printf("你选择的服务是:");
            scanf("%d", &selection);
            printf("selection = %d\n", selection);
        }
    }
    printf("谢谢使用!\n");
    system("pause");
    return 0;
}

void collectData(char filename[], struct character *collectTable[], int *size) //一个字符串即压缩文件的名称
{

    int i;
    struct character *tmpCell;
    FILE *pf = fopen(filename, "r");
    char ch;
    if (pf == NULL)
    {
        printf("找不到该文件!\n");
        exit(0);
    }
    else
    {
        while ((ch = fgetc(pf)) != EOF) //O(N)
        {
            // putchar(ch);
            if (*size == 0) //为空的时候直接插入
            {
                collectTable[0] = malloc(sizeof(struct character)); //collextTable是个哑结点
                tmpCell = malloc(sizeof(struct character));         //忽略了错误检测
                tmpCell->dumb = 0;
                tmpCell->name = ch;
                tmpCell->frequency = 1;
                tmpCell->left = NULL;
                tmpCell->right = NULL;
                collectTable[1] = tmpCell;
                *size += 1;
            }
            else
            {
                for (i = 1; i <= *size; i++) //O(M)
                {
                    if (collectTable[i]->name == ch)
                    {
                        collectTable[i]->frequency += 1;
                        break;
                    }
                }

                if (i > *size) //说明没有找到
                {
                    tmpCell = malloc(sizeof(struct character)); //忽略了错误检测
                    tmpCell->name = ch;
                    tmpCell->dumb = 0;
                    tmpCell->frequency = 1;
                    tmpCell->left = NULL;
                    tmpCell->right = NULL;
                    collectTable[i] = tmpCell;
                    *size += 1;
                }
            }
        }
    }

    fclose(pf);
}

priorityQueue buildHeap(struct character *collectTable[], int *size)
{

    priorityQueue Q;
    int i;

    for (i = *size / 2; i > 0; i--)
    {
        downFiler(i, collectTable, size);
    }
    collectTable[0]->frequency = *size; //保存堆中元素的数量
    Q = collectTable;
    return Q;
}

void downFiler(int i, priorityQueue collectTable, int *size)
{

    int child;
    queueNode tmpCell;

    tmpCell = collectTable[i];
    // printf("size = %d\n", *size);
    for (child = i * 2; child <= *size; child *= 2)
    {
        if (child != *size &&
            collectTable[child]->frequency > collectTable[child + 1]->frequency)
            child = child + 1;

        if (tmpCell->frequency > collectTable[child]->frequency)
        {
            collectTable[i] = collectTable[child];
            i = child;
        }
        else
            break;
    }
    collectTable[i] = tmpCell;
}

queueMin deleteMin(priorityQueue Q, int *size) //删除一个元素记得把Q[0]->frequency或collectTableSize-1
{
    queueMin Node;
    int lastest = *size; //最末尾元素的索引

    Node = Q[1];
    Q[1] = Q[lastest];
    *size -= 1; //堆内元素减一
    Q[0]->frequency -= 1;
    downFiler(1, Q, size);
    return Node;
}

void insert(queueNode q, priorityQueue Q, int *size) //合并的元素插入堆
{

    int i;
    *size += 1;
    Q[0]->frequency += 1;
    Q[*size] = q;
    for (i = *size / 2; i > 0; i--)
        downFiler(i, Q, size);
}

huffmanTree buildHuffmanTree(priorityQueue Q, huffmanTree T, int *size) //一直保持树的右子树不小于左子树
{

    queueMin first;
    queueMin second; //两次DeleteMin所得的权重最小的元素
    dumb dumbNode;

    while (*size != 1)
    {
        first = deleteMin(Q, size);
        second = deleteMin(Q, size);

        // printf("first (dumb%d) %c : %d \n", first->dumb, first->name, first->frequency);
        // printf("second (dumb%d) %c : %d \n", second->dumb, second->name, second->frequency);
        if (first->dumb != 1 && second->dumb != 1)
        { //这个哑结点会有两个树叶
            dumbNode = malloc(sizeof(struct character));
            dumbNode->dumb = 1;
            dumbNode->frequency = first->frequency + second->frequency;
            dumbNode->left = first;
            dumbNode->right = second;
        }
        else
        {
            if (first->dumb == 1)
            {
                dumbNode = malloc(sizeof(struct character));
                dumbNode->dumb = 1;
                dumbNode->frequency = first->frequency + second->frequency;
                dumbNode->right = first;
                dumbNode->left = second;
            }
            else if (first->dumb != 1 && second->dumb == 1)
            {
                dumbNode = malloc(sizeof(struct character));
                dumbNode->dumb = 1;
                dumbNode->frequency = first->frequency + second->frequency;
                dumbNode->right = second;
                dumbNode->left = first;
            }
        }

        insert(dumbNode, Q, size);
    }
    T = deleteMin(Q, size);
}

void encoding(huffmanTree T, char bitCoding[], char *codingSchedule[], int bits,
              enum direction dire, const int tableSize)
//该例程遍历Huffman树,遍历到非哑结点时将数据记录到散列表
{

    int bts = bits + 1;
    if (T->dumb == 0)
    {
        int index = (int)T->name; //codingSchedule的下标
        // printf("bits = %d\n", bits);
        // printf("%d character %c : frequency %d\n", bits, T->name, T->frequency);
        char *tmp = malloc(sizeof(char) * bts); //tmp是字符串数组,额外的一位是'\0'
        for (int i = 0; i < bits; i++)
            tmp[i] = bitCoding[i]; //将bitCoding的元素全部拷贝到tmp中;
        tmp[bits] = '\0';
        codingSchedule[index] = tmp; //codingSchedule的元素(指向字符数组的指针)指向tmp
    }
    else
    {
        dire = leftTree;
        bitCoding[bits] = '0';
        encoding(T->left, bitCoding, codingSchedule, bts, dire, tableSize);
        dire = rightTree;
        bitCoding[bits] = '1';
        encoding(T->right, bitCoding, codingSchedule, bts, dire, tableSize);
    }
}

void buildCodingSchedule(huffmanTree T, HashTable codingSchedule[],
                         int bits, int *size, const int tableSize)
{

    enum direction dire = leftTree;
    char *bitCoding = malloc(sizeof(char) * (*size * 2)); //防止这个数组可能会越界
    encoding(T, bitCoding, codingSchedule, bits, dire, tableSize);
}

char *compress(char *filename, HashTable codingSchedule[], int *compressNum) //返回压缩了多少的字符
//从原文件得到字符,将对应的Huffman编码每满8位就写入压缩文件中
//如果最后一个字节不满8位,用0填充, 用一个遍历记录原文件有多少字符
//就不用担心最后填充的0解压时会编码成实际字符了
{

    //先打开原文件
    FILE *source = fopen(filename, "r");
    int j = 0;
    char *compressFileName = strcat(filename, "Compressed");
    FILE *target = fopen(compressFileName, "wb");
    char ch;
    int chr;
    int bits;          //二进制位数
    int size;          //每一个字符相对于编码的大小
    int num = 0;       //要转换的十进制数字(每8位就转换成二进制存储到压缩文件)
    char compressChar; //要存入壓縮文件的字符
    int S = 8;
    int i;
    int fNum;
    while ((ch = fgetc(source)) != EOF)
    {
        *compressNum += 1;
        // printf("*compressNum = %d\n", *compressNum);

        chr = (int)ch; //强制类型转换
        i = 0;
        while (codingSchedule[chr][i] != '\0')
        {
            if (codingSchedule[chr][i] == '1')
                num += pow(2, S - 1);
            // printf("S = %d, num = %d\n", S, num);
            S -= 1;
            if (S == 0)
            {
                compressChar = (char)num;
                fNum = fwrite(&compressChar, sizeof(char), 1, target);
                S = 8;
                num = 0;
            }
            i++;
        }
    }
    if (S != 8) //還有尾巴
        fNum = fwrite(&num, sizeof(int), 1, target);
    fclose(source);
    fclose(target);
    return compressFileName;
}

void WriteToTheConfiguration(priorityQueue Q, int collectTableSizeCopy, char *configurationFileName) //写入配置文件的例程
{
    FILE *fp = fopen(configurationFileName, "w");
    for (int i = 1; i <= collectTableSizeCopy; i++)
        fprintf(fp, "%c:%d\n", Q[i]->name, Q[i]->frequency);

    fclose(fp);
}

priorityQueue ReadConfigurationFile(char *ConfigurationFile) //返回字符的频率总会,为解码的时候排除附加的0
{
    int i = 0;
    int sum = 0;
    int fre; //准换成数字的frequency
    char before;
    char ch; //存储冒号之前的字符
    FILE *pf = fopen(ConfigurationFile, "r");
    int colon = 0;      //0代表冒号之前,1代表冒号之后
    char frequency[20]; //存储冒号之后的字符
    //在这里直接建树怎么样
    if (pf == NULL)
    {
        printf("未找到配置文件!\n");
        printf("文件解压失败,请退出程序重试!\n");
        exit(0);
    }
    else
    {
        while ((ch = fgetc(pf)) != EOF)
        {
            if (ch == ':')
            {
                colon = 1;
                continue;
            }
            if (colon == 0)
                before = ch;
            if (colon == 1)
            {
                frequency[i] = ch;
                i++;
                while ((ch = fgetc(pf)) != '\n')
                { //可以忽略掉每一行最后的多余的'\n',作用就可以选出我们需要记录的'\n'
                    frequency[i] = ch;
                    i++;
                }
                frequency[i] = '\0'; //转换为字符串
                sscanf(frequency, "%d", &fre);
                // printf("%c:%d\n", before, fre);
                //这里就可以放进优先队列了
                sum += fre;
                colon = 0;
                i = 0;
            }
        }
    }
}

//压缩
void Compress()
{
    int i = 0;
    int bits = 0; //一个count数记录编码,每一次在Huffman树中找到字符时放进一个encoding
    const int tableSize = characterKind - 1;
    huffmanTree huffmanTree;
    struct character *collectTable[characterKind + 1]; //记录每个字符出现频率的数组
    HashTable codingSchedule[characterKind - 1];       //ASCII一共有127个字符以0开头,这个就是一个简单的数组,不是散列表,不像该HashTable了
    //collectTable[0]不放元素,不过在建堆后可以存储堆中元素的数量
    int collectTableSize = 0; //记录collectTable装了多少种字符了,其实是0
    int collectTableSizeCopy;
    char filename[30];
    char filename1[30];
    printf("请输入要压缩的文件名(带文件后缀以ENTER建结束输入):\n");
    scanf("%s", filename);
    strcpy(filename1, filename);
    char configurationFileName[20] = "";
    int compressNum = 0; //记录压缩了多少的字符
    char *compressFileName;
    collectData(filename, collectTable, &collectTableSize);
    priorityQueue Q;
    Q = buildHeap(collectTable, &collectTableSize);
    //将Q写入一个配置文件
    collectTableSizeCopy = collectTableSize; //记录collectTable装了多少种字符了,初始是0
    strcpy(configurationFileName, filename1);
    strcat(configurationFileName, "CompressedConfigurationFile");
    WriteToTheConfiguration(Q, collectTableSizeCopy, configurationFileName);
    // for (i = 1; i <= collectTableSize; i++)
    // {
    //     printf("Q[%d]'s name is %c and frequency is %d\n", i, Q[i]->name, Q[i]->frequency);
    // } //遍历堆
    // printf("2filename = *%s*\n", filename);
    huffmanTree = buildHuffmanTree(Q, huffmanTree, &collectTableSize);
    buildCodingSchedule(huffmanTree, codingSchedule, bits, &collectTableSizeCopy, tableSize);

    /*开始压缩*/
    // printf("P: %s\n", codingSchedule[80]);
    compressFileName = compress(filename, codingSchedule, &compressNum); //返回压缩的文件
    printf("压缩完成!\n");
    printf("总共压缩了%d个字符\n", compressNum);
    printf("压缩后的文件名为%s\n", compressFileName);
}

void Decompress()
{
    char compressFileName[30];
    char configurationFileName[30];
    
    printf("请输入要解压的文件名(以ENTER建结束输入)\n");
    scanf("%s", compressFileName);
    strcpy(configurationFileName, compressFileName);
    strcat(configurationFileName, "ConfigurationFile"); 
    // printf("ConfigurationFileName = %s\n", configurationFileName);
    printf("开始解压!\n");
    printf("正在读取配置文件。。。\n");
    priorityQueue configurationNum = ReadConfigurationFile(configurationFileName);
    printf("读取配置文件成功!\n");
    //队列入树
    //读取压缩文件
    // // decompression(filename, codingSchedule[]); //我要知道原文件有多少個字符
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值