哈夫曼树简介
给定N个权值作为N个叶子节点,构造一棵二叉树,若该树的带权路径长度达到最小,称这样的二叉树为最优二叉树,也称为哈夫曼树。哈夫曼树是带权路径长度最短的树,权值较大的结点离根较近。
数组模拟实现哈希曼树代码实现
#include<cstring>
#include<iostream>
#include<fstream>
#include<algorithm>
using namespace std;
int readw(FILE *fp, int *name, int *count) {
fseek(fp, 0, 0);
memset(count, 0, sizeof(int) * 256);
int j = 0;
if (fp != NULL) {
while (!feof(fp)) {
char c = fgetc(fp);
count[c]++;
}
}
for (int i = 0; i < 256; i++) {
if (count[i]) {
name[j] = i;
j++;
}
}
return j;
}
typedef struct {
short parent, lchild, rchild;
short name;
int weight;
} HNode, *HTree;
HTree create_huffman_tree(int *name, int *w, int n, HNode *ht) {
for (int i = 0; i < n; i++) {
ht[i].weight = w[name[i]];
ht[i].name = name[i];
ht[i].lchild = -1;
ht[i].rchild = -1;
ht[i].parent = -1;
}
for (int i = 0; i < n - 1; i++) {
int max1 = 0x7fffffff, max2 = 0x7fffffff;
int x1 = -1, x2 = -1;
for (int j = 0; j < n + i; j++) {
if (ht[j].weight < max1 && ht[j].parent == -1) {
max2 = max1;
x2 = x1;
max1 = ht[j].weight;
x1 = j;
} else if (ht[j].weight < max2 && ht[j].parent == -1) {
max2 = ht[j].weight;
x2 = j;
}
}
ht[x1].parent = n + i;
ht[x2].parent = n + i;
ht[n + i].weight = ht[x1].weight + ht[x2].weight;
ht[n + i].lchild = x1;
ht[n + i].rchild = x2;
ht[n + i].parent = -1;
}
return n > 0 ? ht + 2 * n - 2 : ht;
}
void create_dictionary(HTree tp, int root, int level, char code, char dictionary[][256]) {
static char str[256];
if (tp[root].lchild == -1 && tp[root].rchild == -1) {
str[level] = code;
str[level+1] = '\0'; // 结束编码字符串
strcpy(dictionary[tp[root].name], str+1);
} else {
if (tp[root].lchild != -1) {
str[level] = code; // 将编码字符添加到编码字符串中
create_dictionary(tp, tp[root].lchild, level + 1, '0', dictionary);
}
if (tp[root].rchild != -1) {
str[level] = code; // 将编码字符添加到编码字符串中
create_dictionary(tp, tp[root].rchild, level + 1, '1', dictionary);
}
}
}
int writeHuffmanCode(string target, char dictionary[256][256]) {
ofstream ofs(target, ios::binary);
ifstream ifs("C:\\Users\\86130\\Desktop\\压缩文件\\待压缩的文件.txt", ios::in);
char bitBuffer = 0; // 用于存储比特位的缓冲区
int bitCount = 0; // 缓冲区中的比特位数
char a;
while (ifs.get(a)) {
const char* code = dictionary[a];
for (int i = 0; code[i]; i++) {
bitBuffer <<= 1; // 左移一位,为下一个比特位腾出位置
bitBuffer |= (code[i] - '0'); // 根据哈夫曼编码的值设置比特位
bitCount++;
if (bitCount == 8) { // 缓冲区已满,写入一个字节到文件
ofs.write(&bitBuffer, 1);
string s = "";
for(int i = 0;i<8;i++)
{
s+=(bitBuffer&1)+'0';
bitBuffer>>=1;
}
reverse(s.begin(),s.end());
cout<<s;
bitBuffer = 0;
bitCount = 0;
}
}
}
// 如果缓冲区中还有剩余的比特位,需要写入文件并进行补齐
if (bitCount > 0) {
bitBuffer <<= (8 - bitCount); // 剩余的比特位左移
ofs.write(&bitBuffer, 1);
string s = "";
for(int i = 0;i<bitCount;i++)
{
s+=(bitBuffer&1)+'0';
bitBuffer>>=1;
}
reverse(s.begin(),s.end());
cout<<s;
}
cout<<endl;
ifs.close();
ofs.close();
return bitCount?(8-bitCount):0;
}
void decompress(string source, string target, HTree Hroot, HTree tp, int remainingBits) {
ifstream file(source,std::ios::binary|std::ios::ate);
std::streampos fileSize = file.tellg();file.close();
file.close();
ifstream ifs(source, ios::binary);
ofstream ofs(target);
int bitCount = 8; // 缓冲区中比特位的数量
char bitBuffer;
HNode currentNode = *Hroot; // 当前节点为根节点
int num = 0;
while (ifs.read(&bitBuffer, 1)) {
num++;
if(num==fileSize)break;
for (int i = 7; i >= 0; i--) {
if (bitCount == 0) {
//bitBuffer = ifs.get();
bitCount = 8;
}
bool bit = (bitBuffer >> (bitCount - 1)) & 1; // 读取比特位
bitCount--;
cout<<bit;
if (bit) {
currentNode = tp[currentNode.rchild]; // 右子树
} else {
currentNode = tp[currentNode.lchild]; // 左子树
}
if (currentNode.lchild == -1 && currentNode.rchild == -1) {
// 当前节点为叶子节点
char character = currentNode.name;
ofs.put(character);
currentNode = *Hroot; // 重置当前节点为根节点
}
}
}
// 处理最后一个字节的未满比特位的情况
for (int i = 7; i >= remainingBits; i--) {
bool bit = (bitBuffer >> i) & 1; // 读取比特位
cout<<bit;
if (bit) {
currentNode = tp[currentNode.rchild]; // 右子树
} else {
currentNode = tp[currentNode.lchild]; // 左子树
}
if (currentNode.lchild == -1 && currentNode.rchild == -1) {
// 当前节点为叶子节点
char character = currentNode.name;
ofs.put(character);
currentNode = *Hroot; // 重置当前节点为根节点
}
}
ifs.close();
ofs.close();
}
int main()
{
FILE *fp;
fp = fopen("C:\\Users\\86130\\Desktop\\压缩文件\\待压缩的文件.txt","r");
int name[256],count[256];
int num = readw(fp,name,count);
HNode ht[511];
HTree Hroot = create_huffman_tree(name,count,num,ht);
char dictionary[256][256];
create_dictionary(ht,Hroot-ht,0,'#',dictionary);
for(int i = 0;i<num;i++)
cout<<(char)name[i]<<"的哈夫曼编码为"<<dictionary[name[i]]<<endl;
int remainBits;
remainBits = writeHuffmanCode("C:\\Users\\86130\\Desktop\\压缩文件\\压缩后的文件.txt",dictionary);
cout<<"余下的字节数"<<remainBits<<endl;
decompress("C:\\Users\\86130\\Desktop\\压缩文件\\压缩后的文件.txt","C:\\Users\\86130\\Desktop\\压缩文件\\解压后的文件.txt",Hroot,ht,remainBits);
}
如果其中两个函数
void writeHuffmanCode(string target,char dictionary[256][256])
{
ofstream ofs;
ofs.open(target,ios::binary);
ifstream ifs;
ifs.open("C:\\Users\\86130\\Desktop\\压缩文件\\待压缩的文件.txt",ios::in);
if(!ifs.is_open()||!ofs.is_open())
{
cout<<"文件打开失败";
return;
}
char a;
while(ifs.get(a))
{
ofs.write(reinterpret_cast<const char*>(dictionary[a]), sizeof(dictionary));
}
ifs.close();
ofs.close();
}
void decompress(string source,string target,HTree Hroot,HTree tp){
ofstream ofs;
ofs.open(target,ios::out);
ifstream ifs(source,ios::binary);
if(!ifs.is_open()||!ofs.is_open())
{
cout<<"文件打开失败";
return;
}
char a;
HNode p = *Hroot;
while(ifs.get(a))
{
if(a=='0') {
if(p.lchild!=-1)p = tp[p.lchild];
else {
ofs<<(char)p.name;
p = tp[(*Hroot).lchild];
}
}
else if(a=='1'){
if(p.rchild!=-1)p = tp[p.rchild];
else {
ofs<<(char)p.name;
p = tp[(*Hroot).rchild];
}
}
}
if(p.lchild==-1&&p.rchild==-1)ofs<<(char)p.name;
ofs.close();
ifs.close();
}
写成这个形式就会造成压缩后的文件变得更大,正确写法是使用位操作更充分的利用空间(后续会有stl版本,emmm还是看浏览量emmm)