#include<iostream>
#include<stdlib.h>
#include<string>
#include<fstream>
#include<cctype>
#define ASCIISIZE 128
#define ARTICLEPATH "Acticle.txt" //文件名
#define CODEPATH "Code.txt" //文件名
using namespace std;
typedef char* Huffmancode;
typedef struct Huffman
{
int lchild; //当前节点的左孩子
int rchild; //当前节点的右孩子
int parent; //当前节点的父节点
int weight; //当前节点的权值
char ch; //当前节点的字母
Huffmancode huffmanCode; //当前节点的编码
}*HuffmanTree, Node;
int weight[ASCIISIZE] = { 0 }; //记录对应字符acsii码的权值,ascii码值代表下标
HuffmanTree tree; //哈夫曼树
string article; //保存从文件里读出的文章
void OutFile(); //从文件读出保存到article
void CountWeight(); //根据article中的各个字符的数量计算权值
void SelectMin(int i, int* s1, int* s2);//寻找两个最小的节点
void CreateHuffmanTree(); //创建哈夫曼树
void CreateHuffmanCode(); //创建哈夫曼编码
void CodeInFile(); //将编码写入文件
void OutFile()
{
ifstream ifs(ARTICLEPATH, ios::in);
if (!ifs.is_open())
{
cout << "open error!" << endl;
return;
}
string str;
while (getline(ifs, str))
{
for (int i = 0; i < str.size(); i++)
{
if (str[i] >= -1 && str[i] <= 255 && isalpha(str[i])) //-1~255是isalpha函数的可判断字符的范围,也可以不加此函数去求其他字符的编码
{
article += tolower(str[i]);//将大写字母变成小写字母,也可以不加此函数去区分大小写
}
}
str.clear();
}
ifs.close();
}
void CountWeight() //根据article中的各个字符的数量计算权值
{
for (int i = 0; i < article.size(); i++)
{
weight[article[i]]++; //对应的acsii码值就是字符在weight数组对应的索引
}
}
void SelectMin(int i, int* s1, int* s2)
{
int j;
for (j = 0; j < i; j++) //从[0,i)里面找
{
if (!tree[j].parent) //先随便拿到没有爹且存在权值的
{
*s1 = j;
break;
}
}
for (j++; j < i; j++) //从(j,i)里面找
{
if (!tree[j].parent && tree[j].weight < tree[*s1].weight) //开始找最小值
{
*s1 = j;
}
}
for (j = 0; j < i; j++) //从[0,i)里面找
{
if (!tree[j].parent && j != *s1) //先随便拿到没有爹中的一个的权值
{
*s2 = j;
break;
}
}
for (j++; j < i; j++) //从(j,i)里面找
if (!tree[j].parent && tree[j].weight < tree[*s2].weight && j != *s1)
{
*s2 = j;
}
}
void CreateHuffmanTree()
{
int size = 2 * ASCIISIZE - 1; //根据二叉树的性质T = no + n1 + n2, n2 = n0 - 1计算出哈夫曼数的总结点数
tree = (HuffmanTree)malloc(sizeof(Node) * (size + 1));//生成size + 1个节点,从1开始
for (int i = 1; i <= ASCIISIZE; i++) //初始化ASCIISIZE个叶子节点
{
tree[i].ch = i - 1;
tree[i].lchild = tree[i].rchild = tree[i].parent = 0;
tree[i].weight = weight[i - 1]; //i - 1是为了防止weight越界
}
for (int i = ASCIISIZE + 1; i <= size; i++) //初始化剩下的节点(不包括索引0),也就是父节点
{
tree[i].ch = 0;
tree[i].lchild = tree[i].rchild = tree[i].parent = 0;
tree[i].weight = 0;
}
int s1, s2; //s1,s2分别代表找到两个最小的节点
for (int i = ASCIISIZE + 1; i <= size; i++) //给叶子节点找爹,爹的个数已经确定
{
SelectMin(i, &s1, &s2); //找最小节点
tree[i].lchild = s1; //修改i节点左右孩子
tree[i].rchild = s2;
tree[i].weight = tree[s1].weight + tree[s2].weight;//i节点权值和为孩子节点权值和
tree[s1].parent = tree[s2].parent = i; //修改左右的孩子的爹
}
}
void CreateHuffmanCode()
{
int start; //编码开始存在数组中的下标
Huffmancode code = (Huffmancode)malloc(sizeof(char) * ASCIISIZE); //使用一个一维字符数组保存某个字符的编码然后再赋值给对应节点
for (int i = 1; i <= ASCIISIZE; i++) //遍历每个叶子节点
{
start = ASCIISIZE - 1; //逆序存,因为是从叶子找到根节点
code[start] = '\0';
for (int parent = tree[i].parent, now = i; parent != 0; now = parent, parent = tree[parent].parent)//从叶子节点从下往上遍历到根节点
{
if (tree[parent].lchild == now)
{
code[--start] = '0';
}
else
{
code[--start] = '1';
}
}
tree[i].huffmanCode = (Huffmancode)malloc(sizeof(char) * (ASCIISIZE - start));//正序存
strcpy(tree[i].huffmanCode, &code[start]);
}
free(code);
}
void CodeInFile()
{
ofstream ofs(CODEPATH, ios::out);
if (!ofs.is_open())
{
cout << "open error!" << endl;
return;
}
ofs << "字母:" << " " << "编码:" << endl;
for (int i = 1; i <= ASCIISIZE; i++)
{
if (tree[i].weight)
{
ofs << tree[i].ch << " " << tree[i].huffmanCode << endl;
}
}
cout << "编码已存入文件Code.txt" << endl;
}
int main()
{
OutFile();
CountWeight();
CreateHuffmanTree();
CreateHuffmanCode();
CodeInFile();
system("pause");
return 0;
}