要求:(C++和Python实现)
- 从一个命名为text.txt只含有小写字母的文本文档中读取对应的字母的权重
- 根据这些权重创建一个哈夫曼树
- 根据这个哈夫曼树创建一个哈夫曼编码,并将其存到code.txt文本文档中
- 输入一段英文字母,将它存入到soucrefile.txt文本中,并将其读入文件,在根据创建的哈夫曼编码输出其编码后的字符,存入到SourCode.txt文本中
#define _CRT_SECURE_NO_WARNINGS//strcpy的使用在不同编译器中会报错,define后就不会影响了
#include<iostream>
#include<fstream>//文件操作的头文件
#include"Switch.h"//编写一个可以进行switch的头文件
using namespace std;
typedef struct
{
int index;//每个哈夫曼树都含有一个id,权重,左孩子,右孩子,双亲
int weight;
int rchild,parent,lchild;
}HTNode,*HuffmenTree;
typedef char** HuffmanCode;//将char** 重命名为HuffmanCode,&HuffmanCode用来存储对应的编码字段
void Select( HuffmenTree HT,int i ,int &s1,int &s2)
{//选择权重最小以及双亲的权重为0的两个树,返回其index
int j;
s1 = 10000;
int minid1 = 0;
for (j = 1; j <= i; j++)
{
if (HT[j].weight < s1 && HT[j].parent==0)
{
s1 = HT[j].weight;
minid1 = HT[j].index;
}
}
s2 = 10000;
int minid2 = 0;
for (j = 1; j <= i; j++)
{
if (HT[j].weight < s2 && (HT[j].index != minid1)&&HT[j].parent==0)
{
s2 = HT[j].weight;
minid2 = HT[j].index;
}
}
s1 = minid1;
s2 = minid2;
}
void CreatHuffmenTree(HuffmenTree &HT,int n,Data*& e)
{//根据所给的权重,创建一个哈夫曼树
int s1, s2;
if(n<=1)
return ;
int m = n*2-1;//叶子节点为n个,则创建的哈夫曼树的节点就要有2*n-1个
int i;
HT = new HTNode[m+1];
for (i = 1;i<=m;i++)
{//将每个树的parent,rchild,rchild赋值为0,对应的inde为id的值
//默认数组的第一个空间不使用,便于index查找时的方便
HT[i].index = i;
HT[i].parent = 0;
HT[i].rchild = 0;
HT[i].lchild = 0;
}
for (i = 1 ; i<=n; i++)
{//前n个节点的weight值为说读入的权重,分别赋值
HT[i].weight = e[i-1].count;
}
for (i = n+1 ;i<=m ;++i)
{/*分别查找n-1次,从 【1,i-1】的范围中,进行Select,用s1,s2来接收其值,将对应的s1,s2的parent进行赋值
将i位置的weight赋值为s1,s2的weight相加,左孩子,右孩子分别为s1,s2*/
Select(HT,i-1,s1,s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lchild = s1;
HT[i].rchild = s2;
HT[i].weight =HT[s1].weight +HT[s2].weight;
}
for (i = 1; i <= m; i++)
{//打印创建后的个index的权重,左孩子,有孩子双亲
if(i<=n)
cout <<char(i+97-1)<<" "<< HT[i].index << " " << HT[i].weight << " "<<HT[i].parent<<" "<< HT[i].lchild << " " << HT[i].rchild << endl;
else
cout <<" "<< HT[i].index << " " << HT[i].weight << " "<<HT[i].parent<<" "<< HT[i].lchild << " " << HT[i].rchild << endl;
}
}
void CreateHuffmanCode(HuffmenTree HT, HuffmanCode& HC, int n)
{//创建哈夫曼编码
HC = new char*[n + 1];//编码是一个二重指针类型,给char**一个大于要编码数据大小的空间
char* cd = new char[n];//每个数据的编码长度都不会大于n则,给中间数组一个n的空间就可以了
cd[n - 1] = '\0';//默认中间数组的最后一个元素为字符终止符'\0'
int start = 0;
int c, f,i;
for ( i = 1 ;i <= n; i++)
{//要对每个要编码元素进行遍历,存储其对应的编码
start = n - 1;//因为是从叶子节点才开始存入数据的,所以从数组的倒数第二个元素开始存入char字符
c = i;
f = HT[i].parent;//判断是否到根节点的唯一途径就是判断其双亲是否为0
while (f != 0)
{
start --;//双亲不为零时,start想前移动一位
if (HT[f].lchild == c)//我们默认左孩子的路径上Wie0.右孩子路径上为1
cd[start] = '0';
else
cd[start] = '1';
c = f;//将c赋值为f,看看下一次循环中f的双亲的左孩子还是右孩子是c
f = HT[f].parent;
}
HC[i] = new char[n - start];
strcpy(HC[i], &cd[start]);//将中间数组赋值给HC
}
ofstream ofs;
ofs.open("code.txt", ios::ate);
for (i = 1; i <= n; i++)
{//写入文件
char c = 97 + i - 1;
ofs << c << ":" << HC[i] << endl;
}
ofs.close();
}
void HUffman_Decode(HuffmanCode cd)
{//对输入的字符串进行解码
ofstream ofs;
ifstream ifs;
string str;
cout<<"请输入任意字符串:";
cin>>str;
cout<<"输入的字符串已经写入sourcefile.txt文件"<<endl;
ofs.open("sourcefile.txt", ios::ate);
ofs << str ;//存入输入的字符串
ofs.close();
ifs.open("sourcefile.txt", ios::in);
char c;
ofs.open("SourceCode.txt",ios::ate);
while ((c = ifs.get()) !=EOF)
{//读取对应的字符串
ofs<<cd[c-97+1]<<" ";//因为每次读取的都是一个字符,将其转换成index要减去97+1
}
cout<<"编码后的字符串已经写入到SourceCode.txt文件"<<endl;
ofs.close();
ifs.close();
}
int main()
{
HuffmenTree HT;
ifstream ifs;
ifs.open("text.txt", ios::in);
char c;
int i;
int* Elem = new int[26];
for (i = 0; i < 26; i++)
{//默认只读入小写,所以只有26个数组大小
Elem[i] = 0;
}
while ((c = ifs.get()) != EOF)
{
Switch(c, Elem);
}
Data* e = new Data[26];
int j = 0;
for ( i = 0; i < 26; i++)
{//如果存在某个字符不存在,则要进行删除,不需要将其编入哈夫曼树
if (Elem[i] != 0)
{
e[j].count = Elem[i];
e[j].data = 97 + i;
j++;
}
}
Data* a = e;
e = new Data[j];
for ( i = 0; i < j; i++)
{
e[i].data = a[i].data;
e[i].count = a[i].count;
}
CreatHuffmenTree(HT,j,e);
HuffmanCode cd;
CreateHuffmanCode(HT, cd, j);
HUffman_Decode(cd);
system("pause");
return 1;
}
Switch.h文件(直接在头文件中进行函数的实现了)
#pragma once
#include<iostream>
using namespace std;
typedef struct
{
char data;
int count;
}Data;
void Switch(char c, int*& Elem)
{
switch (c)
{
case 'a':Elem[0] ++;break;
case 'b':Elem[1] ++; break;
case 'c':Elem[2] ++; break;
case 'd':Elem[3] ++; break;
case 'e':Elem[4] ++; break;
case 'f':Elem[5] ++; break;
case 'g':Elem[6] ++; break;
case 'h':Elem[7] ++; break;
case 'i':Elem[8] ++; break;
case 'j':Elem[9] ++; break;
case 'k':Elem[10] ++; break;
case 'l':Elem[11] ++; break;
case 'm':Elem[12] ++; break;
case 'n':Elem[13] ++; break;
case 'o':Elem[14] ++; break;
case 'p':Elem[15] ++; break;
case 'q':Elem[16] ++; break;
case 'r':Elem[17] ++; break;
case 's':Elem[18] ++; break;
case 't':Elem[19] ++; break;
case 'u':Elem[20] ++; break;
case 'v':Elem[21] ++; break;
case 'w':Elem[22] ++; break;
case 'x':Elem[23] ++; break;
case 'y':Elem[24] ++; break;
case 'z':Elem[25] ++; break;
default:break;
}
}
------------------------------------------------------下面是python实现----------------------------------------
没有对switch函数进行封装
class Node():
def __init__(self, weight,i):
self.index = i #创建一个Node类,每个Node中都含有index,wight,rchild,lchild
self.weight = weight
self.parent = 0
self.lchild = 0
self.rchild = 0
def __lt__(self, item):# 重写了比较大小
return self.weight < item.weight
class HuffmanTree():
def __init__(self, li):
self.li = [] # 用列表来代替数组
n = 26 #默认传入26个个英文字符,无论其weight的值是否为0
m = n*2 - 1
self.li.append(Node(0,0))# 先将第一个无用的Node插入
for i in range(1,n+1): #插入有用的26个字符
self.li.append(Node(li[i],i))
for i in range(n-1): #插入后面的25个空Node
self.li.append(Node(0,n+i+1))
for i in range(n+1,m+1):
lis1 = self.li[1:i]
lis1.sort()#因为重写了比较函数,所以这里的sort可以使用
lis2 = []#用一个中间数组来接收双亲为0的元素
for item in range(len(lis1)):
if lis1[item].parent == 0:
lis2.append(lis1[item])
s1 = lis2[0].index
s2 = lis2[1].index#选取其最前面的两个,即为最小的两个
self.li[s1].parent = i
self.li[s2].parent = i
self.li[i].lchild = s1
self.li[i].rchild = s2#随后改变其双亲的值,双亲对应的左右孩子的值,双亲的weight值
self.li[i].weight = self.li[s1].weight + self.li[s2].weight
for i in range(1,len(self.li)):#这里是打印创建后的个数的情况
print(self.li[i].index,self.li[i].weight,self.li[i].parent,self.li[i].lchild,self.li[i].rchild)
def Return(self):
return self.li#因为__init__()不允许有返回值,所以写了一个Return返回函数,返回创建的HuffmanTree
def HuffmanCode(HT):
dic = {}#每个字母的编码都用字典来进行接收
for i in range(1,27):
dic[i] =[]#将每个字典的value值赋值为空列表
for i in range(1,27):
lis = []
c = i
f= HT[i].parent
while f != 0:
if HT[f].lchild == c:
lis.insert(0,0)#找到左右孩子在列表的最前面进行插入,这样就是从根节点到叶子节点的输入了
else :
lis.insert(0,1)
c = f
f = HT[f].parent
dic[i].extend(lis)
lis.clear()
with open("code.txt",'w') as file:
file.write(str(dic))#写入Code.txt
print('编码集成功写入code.txt')
return dic
def Huffman_Decode(code):
data = input("请输入要编码的字符")
with open('sourcefile.txt','w') as file:
file.write(data)
print('输入的字符串成功写入sourcefile.txt')
session = []
for i in data:
lis = code[ord(i)-97+1]
session.extend(lis)
with open("SourceCode.txt",'w') as file:
file.write(str(session))
print("编译后的编码成功写入SourceCode.txt")
filename ='text.txt'
data = []
with open(filename,'r') as file:
a =file.read()
# print(a)
for i in a :
data.append(i)
li = [0 for _ in range(27)]
for i in data:
if i in ['a', 'A']:
li[1] += 1
elif i in ['b', 'B']:
li[2] += 1
elif i in ['c', 'C']:
li[3] += 1
elif i in ['D', 'd']:
li[4] += 1
elif i in ['E', 'e']:
li[5] += 1
elif i in ['f', 'F']:
li[6] += 1
elif i in ['G', 'g']:
li[7] += 1
elif i in ['H', 'h']:
li[8] += 1
elif i in ['I', 'i']:
li[9] += 1
elif i in ['J', 'j']:
li[10] += 1
elif i in ['K', 'k']:
li[11] += 1
elif i in ['l', 'L']:
li[12] += 1
elif i in ['m', 'M']:
li[13] += 1
elif i in ['n', 'N']:
li[14] += 1
elif i in ['o', 'O']:
li[15] += 1
elif i in ['p', 'P']:
li[16] += 1
elif i in ['q', 'Q']:
li[17] += 1
elif i in ['r', 'R']:
li[18] += 1
elif i in ['s', 'S']:
li[19] += 1
elif i in ['t', 'T']:
li[20] += 1
elif i in ['u', 'U']:
li[21] += 1
elif i in ['v', 'V']:
li[22] += 1
elif i in ['w', 'W']:
li[23] += 1
elif i in ['x', 'X']:
li[24] += 1
elif i in ['y', 'Y']:
li[25] += 1
elif i in ['z', 'Z']:
li[26] += 1
# li = [5,29,7,8,14,23,3,11]
HT = HuffmanTree(li).Return()
code = HuffmanCode(HT)
HuffmanCode(HT)
Huffman_Decode(code)