《数据结构》课程作业:利用Huffman树对文件进行压缩和解压缩。
仅作原理展示用,未经过优化,请酌情参考。
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <stdbool.h>
#include <string.h>
#define DEBUG
/* Huffman树 */
typedef struct HTNode {
unsigned char c;
unsigned w;
struct HTNode *parent, *lchild, *rchild;
} HTNode, *HT;
/* Huffman编码 */
typedef struct HCode {
unsigned char c;
unsigned long long code;
unsigned len;
} HCode;
typedef struct HC {
HCode *hcode;
unsigned size;
} HC;
/* 统计信息 */
typedef struct Word {
unsigned char c;
unsigned w;
} Word;
typedef struct Words {
unsigned size;
unsigned volume;
Word *word;
} Words;
/* 数据流 */
typedef struct Data {
unsigned char *string;
unsigned size;
} Data;
void ParseFile(const char *fin, Words *words, Data * data);
void GenHT(HT *ht, Words words);
void GenHC(HC *hc, HT ht);
void Encode(HC hc, Data orig, Data *news);
void Zip(const char *fin, const char *fout);
void Unzip(const char *fin, const char *fout);
void DestroyHT(HT *ht);
void DestroyHC(HC *hc);
void DestroyWords(Words *words);
void DestroyData(Data *data);
/* 栈(非递归遍历Huffman树) */
typedef struct StackNode {
HTNode *e;
struct StackNode *next;
} StackNode, *Stack;
void InitStack(Stack *S);
bool Push(Stack *S, HTNode *e);
bool Pop(Stack *S, HTNode **e);
bool StackEmpty(Stack S);
void DestroyStack(Stack *S);
#ifdef DEBUG
/* 字符输出数据流 */
void PrintDataC(Data data) {
for (int i = 0; i < data.size; i++) printf("%c", data.string[i]);
putchar('\n');
}
/* 二进制输出数据流 */
void PrintDataB(Data data) {
char array[32] = {};
for (int i = 0; i < data.size; i++) {
itoa(data.string[i], array, 2);
printf("%s", array);
}
putchar('\n');
}
/* 输出统计信息 */
void PrintWords(Words words) {
for (int i = 0; i < words.size; i++)
printf("%d: %d\n", words.word[i].c, words.word[i].w);
}
/* 打印Huffman树 */
void PrintHT(HT ht) {
if (ht) {
PrintHT(ht->lchild);
PrintHT(ht->rchild);
if (ht->lchild == NULL && ht->rchild == NULL)
printf("| %u: %u | ", ht->c, ht->w);
}
}
/* 打印Huffman编码 */
void PrintHC(HC hc) {
char array[32] = {};
for (int i = 0; i < hc.size; i++) {
itoa(hc.hcode[i].code, array, 2);
printf("%c: %u, %s\n", hc.hcode[i].c, hc.hcode[i].len, array);
}
}
/* 检查文件是否相同 */
bool Check(char *file1, char *file2) {
FILE *fp1 = fopen(file1,"r"), *fp2 = fopen(file2,"r");
unsigned fsize1, fsize2;
fseek(fp1, 0, SEEK_END);
fseek(fp2, 0, SEEK_END);
fsize1 = ftell(fp1);
fsize2 = ftell(fp2);
rewind(fp1);
rewind(fp2);
if (fsize1 != fsize2) return false;
char c1, c2;
for (unsigned i = 0; i < fsize1; i++) {
fread(&c1,1,1,fp1);
fread(&c2,1,1,fp2);
if(c1 != c2) return printf("at fsize = %d, c1 is %d, c2 is %d\n", i, c1, c2);
}
fclose(fp1);
fclose(fp2);
return true;
}
#endif
int main(void) {
// 压缩一个图片
Zip("pic.png", "pic.png.hzip");
Unzip("pic.png.hzip","picn.png");
printf("%d\n", Check("pic.png", "picn.png"));
// 压缩一个pdf
Zip("lab.pdf", "lab.pdf.hzip");
Unzip("lab.pdf.hzip","labn.pdf");
printf("%d\n", Check("lab7.pdf", "lab7n.pdf"));
// 压缩一个文件
Zip("test", "test.hzip");
Unzip("test.hzip", "testn");
printf("%d\n", Check("test", "testn"));
return 0;
}
/* 解析文件 */
void ParseFile(const char *fin, Words *words, Data * data) {
FILE *fp = fopen(fin, "rb");
if (!fp) exit(0);
/* 载入文件 */
fseek(fp, 0, SEEK_END);
data->size = ftell(fp);
rewind(fp);
data->string = (unsigned char *)malloc(sizeof(char) * data->size);
if (!data->string) exit(0);
fread(data->string, sizeof(char), data->size, fp);
fclose(fp);
/* 计数 */
unsigned cache[256] = {0}; //缓冲数组
unsigned index = 0;
for (int i = 0; i < data->size; i++) {
cache[data->string[i]]++;
}
words->size = 0;
for (int i = 0; i < 256; i++)
if (cache[i]) words->size++; //统计个数
words->word = (Word *)malloc(sizeof(Word) * words->size);
if (!words->word) exit(0);
for (int i = 0; i < 256; i++) {
if (cache[i]) {
words->word[index].c = i;
words->word[index].w = cache[i];
index++;
}
}
}
/* 用统计频数生成Huffman树 */
void GenHT(HT *ht, Words words) {
*ht = (HT)malloc(sizeof(HTNode) * (2 * words.size - 1)); //全部结点
if (!*ht) exit(0);
for (int i = words.size - 1; i < 2 * words.size - 1; i++) {
(*ht + i)->c = words.word[i + 1 - words.size].c, (*ht + i)->w = words.word[i + 1 - words.size].w;
(*ht + i)->lchild = (*ht + i)->rchild = (*ht + i)->parent = NULL;
}
for (int i = 0; i < words.size - 1; i++) (*ht + i)->lchild = (*ht + i)->rchild = (*ht + i)->parent = NULL;
/* 寻找最小频数结点 */
int index1, index2;
unsigned min1, min2;
for (int i = words.size - 2; i != -1; i--) {
min1 = min2 = INT_MAX;
for (int j = i + 1; j < 2 * words.size - 1; j++) {
if ((*ht + j)->parent) continue; //已配对则跳过
if ((*ht + j)->w < min1) {
min2 = min1, index2 = index1;
min1 = (*ht + j)->w, index1 = j;
}
else if ((*ht + j)->w < min2) {
min2 = (*ht + j)->w, index2 = j;
}
}
(*ht + index1)->parent = (*ht + index2)->parent = *ht + i;
(*ht + i)->lchild = *ht + index1, (*ht + i)->rchild = *ht + index2;
(*ht + i)->w = (*ht + index1)->w + (*ht + index2)->w;
}
(*ht)->w = words.size;
}
/* 生成Huffman编码 */
void GenHC(HC *hc, HT ht) {
unsigned index = 0; //HC下角标
hc->size = ht->w;
hc->hcode = (HCode *)malloc(sizeof(HCode) * hc->size);
if (!hc->hcode) exit(0);
for (int i = 0; i < hc->size; i++) hc->hcode[i].code = hc->hcode[i].len = 0;
HTNode *p = ht, *recent = NULL, *temp;
Stack S;
InitStack(&S);
while (p || !StackEmpty(S)) {
if (p) {
Push(&S, p);
p = p->lchild;
}
else {
if (S->e->rchild && S->e->rchild != recent) p = S->e->rchild;
else {
Pop(&S, &p);
if (p->lchild == NULL && p->rchild == NULL) {
hc->hcode[index].c = p->c;
temp = p;
while (temp->parent) {
hc->hcode[index].code = hc->hcode[index].code >> 1;
hc->hcode[index].len++;
temp->parent->lchild == temp ? hc->hcode[index].code : (hc->hcode[index].code |= (1ULL << 63));
temp = temp->parent;
}
index++;
}
recent = p;
p = NULL;
}
}
}
}
/* 编码压缩 */
void Encode(HC hc, Data orig, Data *news) {
unsigned index = 0;
unsigned usedbits = 0;
unsigned long long code;
unsigned len;
news->string = (unsigned char *)malloc(sizeof(char) * orig.size);
if (!news->string) exit(0);
memset(news->string, 0, sizeof(char) * orig.size);
news->size = 1;
for (int i = 0; i < orig.size; i++) {
for (int j = 0; j < hc.size; j++) {
if (hc.hcode[j].c == orig.string[i]) {
code = hc.hcode[j].code, len = hc.hcode[j].len;
break;
}
} //获取编码
while (usedbits + len >= 8) {
news->string[index] |= (code >> (usedbits + 56));
index++;
code = code << (8 - usedbits);
len -= 8 - usedbits;
usedbits = 0;
news->size++;
}
news->string[index] |= (code >> (usedbits + 56));
usedbits += len;
}
}
/* 压缩 */
void Zip(const char *fin, const char *fout) {
Words words;
Data orig, news;
HT ht;
HC hc;
ParseFile(fin, &words, &orig);
GenHT(&ht, words);
GenHC(&hc, ht);
Encode(hc, orig, &news);
/* 保存数据 */
FILE *fp = fopen(fout, "wb");
if (!fp) exit(0);
fwrite(&orig.size, sizeof(unsigned), 1, fp); //原始文件大小
fwrite(&ht->w, sizeof(unsigned), 1, fp); //编码字符集大小
fwrite(&news.size, sizeof(unsigned), 1, fp); //压缩后数据大小
fwrite(ht, sizeof(HTNode), 2 * ht->w - 1, fp); //Huffman树
fwrite(news.string, sizeof(char), news.size, fp); //压缩后数据
fclose(fp);
DestroyWords(&words);
DestroyData(&orig);
DestroyHT(&ht);
DestroyHC(&hc);
}
/* 解压缩 */
void Unzip(const char *fin, const char *fout) {
unsigned huffmansize;
HT ht;
Data orig, news;
/* 读取数据 */
FILE *fpin = fopen(fin, "rb"), *fpout = fopen(fout, "wb");
if (!fpin || !fpout) exit(0);
fread(&orig.size, sizeof(unsigned), 1, fpin);
orig.string = (unsigned char *)malloc(sizeof(char) * orig.size);
if (!orig.string) exit(0);
fread(&huffmansize, sizeof(unsigned), 1, fpin);
ht = (HT)malloc(sizeof(HTNode) * (2 * huffmansize - 1));
if (!ht) exit(0);
fread(&news.size, sizeof(unsigned), 1, fpin);
news.string = (unsigned char *)malloc(sizeof(char) * news.size);
if (!news.string) exit(0);
fread(ht, sizeof(HTNode), 2 * huffmansize - 1, fpin);
fread(news.string, sizeof(char), news.size, fpin);
fclose(fpin);
/* 修正Huffman树 */
HTNode *base = (ht + 1)->parent;
long long diff = (void *)ht - (void *)base;
for (int i = 0; i < 2 * huffmansize - 1; i++) {
if ((ht + i)->parent) (ht + i)->parent = (void *)(ht + i)->parent + diff;
if ((ht + i)->lchild) (ht + i)->lchild = (void *)(ht + i)->lchild + diff;
if ((ht + i)->rchild) (ht + i)->rchild = (void *)(ht + i)->rchild + diff;
}
/* 解码 */
unsigned index = 0;
HTNode *temp = ht;
unsigned char mask = 1U << 7;
for (int i = 0; i < news.size; i++) {
for (int j = 0; j < 8; j++) {
if ((news.string[i] << j) & mask) temp = temp->rchild;
else temp = temp->lchild;
if (temp->lchild == NULL && temp->rchild == NULL) {
orig.string[index++] = temp->c;
temp = ht;
}
if (index == orig.size) break;
}
}
fwrite(orig.string, sizeof(char), orig.size, fpout);
fclose(fpout);
DestroyHT(&ht);
DestroyData(&orig);
DestroyData(&news);
}
/* 销毁Huffman树 */
void DestroyHT(HT *ht) {
free(*ht);
*ht = NULL;
}
/* 销毁Huffman编码 */
void DestroyHC(HC *hc) {
free(hc->hcode);
}
/* 销毁统计数字 */
void DestroyWords(Words *words) {
free(words->word);
}
/* 销毁数据流 */
void DestroyData(Data *data) {
free(data->string);
}
void InitStack(Stack *S) {
*S = NULL;
}
void DestroyStack(Stack *S) {
StackNode *temp;
while (*S) {
temp = (*S)->next;
free(*S);
*S = temp;
}
}
bool Push(Stack *S, HTNode *e) {
if (!*S) {
*S = (Stack)malloc(sizeof(StackNode));
if (!*S) exit(0);
(*S)->e = e;
(*S)->next = NULL;
}
else {
StackNode *temp = (StackNode *)malloc(sizeof(StackNode));
if (!temp) exit(0);
temp->e = e;
temp->next = *S;
*S = temp;
}
return true;
}
bool Pop(Stack *S, HTNode **e) {
if (StackEmpty(*S)) return false;
if (e) *e = (*S)->e;
StackNode *temp = (*S)->next;
free(*S);
*S = temp;
return true;
}
bool StackEmpty(Stack S) {
return S ? 0 : 1;
}