附录(源代码)
#include<iostream>
#include<string>
#include<fstream>
#include<math.h>
using namespace std;
struct element
{
int weight;
int lkid, rkid, parent;
};
void huffman_tree(element* hufftree, int* w, int n);//构造叶子节点长为n的哈夫曼树
void sort(element* hufftree, int& a, int& b, int n);//找出权值最小的两个节点
void huffman_code(element* hufftree, int n, string* code);//对哈夫曼树进行编码
void tongji(char* str, int len, int& n);
string dcr(int n);//根据index生成等长二进制编码
void compress();//压缩函数
void decompress();//解压函数
int ezs(string code);//二进制码转十进制
string i_to_s(int l);
int s_to_i(string str);
int getprefix(char* str, int l, string& type, int& len);//返回去除了前缀的新下标
string getstr(char* str, int begin, int l);
int dictionaries[256];//对应的是hufftree的下标
int freq[256];
string equal_length_code[256];//等长的二进制编码
int main()
{
string choose;
while (true)
{
cout << "输入1压缩文件 输入2解压文件 输入其它退出" << endl;
cin >> choose;
if (choose == "1")
compress();
else if (choose == "2")
decompress();
else
break;
cout << "OK!" << endl;
}
return 0;
}
void sort(element* hufftree, int& a, int& b, int n)
{
int t1, t2;
for (int i = 0;i < 2 * n - 1;i++)
{
if (hufftree[i].parent == -1)
{
t1 = i;
a = i;
break;
}
}
for (int i = 0;i < 2 * n - 1;i++)
{
if (hufftree[i].parent == -1 && i != a)
{
t2 = i;
b = i;
break;
}
}
for (int i = 0;i < 2 * n - 1;i++)
{
if (hufftree[i].parent == -1 && i != a && (i < n || hufftree[i].lkid != -1))//比较所有有权值的节点
if (hufftree[i].weight < hufftree[a].weight)
a = i;
}
if (a == b)
b = t1;
for (int i = 0;i < 2 * n - 1;i++)
{
if (hufftree[i].parent == -1 && i != a && i != b && (i < n || hufftree[i].lkid != -1))
if (hufftree[i].weight < hufftree[b].weight)
b = i;
}
}
void huffman_tree(element* hufftree, int* w, int n)
{
for (int i = 0;i < 2 * n - 1;i++)
{
hufftree[i].parent = -1;
hufftree[i].lkid = -1;
hufftree[i].rkid = -1;
}
for (int i = 0;i < n;i++)
hufftree[i].weight = w[i];
for (int i = n;i < 2 * n - 1;i++)
{
int a, b;
sort(hufftree, a, b, n);
hufftree[a].parent = i;
hufftree[b].parent = i;
hufftree[i].lkid = a;//左最小
hufftree[i].rkid = b;
hufftree[i].weight = hufftree[a].weight + hufftree[b].weight;
}
}
void huffman_code(element* hufftree, int n, string code[256])
{
for (int i = 0;i < n;i++)
{
int p = i;
while (p != 2 * n - 2)
{
if (hufftree[hufftree[p].parent].lkid == p)
code[i] = '0' + code[i];
else
code[i] = '1' + code[i];
p = hufftree[p].parent;
}
}
}
void tongji(char* str, int len, int& n)
{
n = 0;
for (int i = 0;i < len;i++)//ascii范围是-128-127
{
int ascii = str[i];
if (ascii >= 0)
freq[ascii]++;
else
{
ascii = ascii * -1 + 127;
freq[ascii]++;
}
}
for (int index = 0;index < 256;index++)
if (freq[index] != 0)
n++;
}
string dcr(int n)
{
int a[8];
int i = 0;
string str = "";
while (n != 0)
{
a[i] = n % 2;
n /= 2;
i++;
}
for (n = i - 1;n >= 0;n--)
{
if (a[n] == 1)
str += '1';
else
str += '0';
}
int l = str.length();
if (l < 8)
for (int k = 0;k < 8 - l;k++)
str = '0' + str;
return str;
}
void compress()
{
string filename;
cout << "输入要压缩的文件的完整路径" << endl;
cin >> filename;
int fl = filename.length();
for (int i = 0;i < fl;i++)
if (filename[i] == '\\')
{
filename.insert(i, "\\\\");
i += 2;
}
for (int i = 0;i < 256;i++)
equal_length_code[i] = dcr(i);
for (int i = 0;i < 256;i++)
freq[i] = 0;
for (int i = 0;i < 256;i++)
dictionaries[i] = -1;
string name;
string type;
for (int i = filename.length() - 1;i >= 0;i--)
if (filename[i] == '.')
{
name = filename.substr(0, i);
//cout<<name<<endl;
type = filename.substr(i + 1);
//cout<<type<<endl;
break;
}
int n;
ifstream f1;
char* fn = (char*)filename.data();
f1.open(fn, ifstream::binary);
f1.seekg(0, ios::end);
int l = f1.tellg();
f1.seekg(0, ios::beg);
char* str = new char[l];
f1.read(str, l);
f1.close();
tongji(str, l, n);
element* hufftree = new element[2 * n - 1];
string h_code[n];
int* w = new int[n];
int k = 0;
for (int i = 0;i < 256;i++)
if (freq[i] != 0)
{
w[k] = freq[i];//w是权
dictionaries[i] = k;
k++;
}
huffman_tree(hufftree, w, n);//构建哈夫曼树
/*for(int i=0;i<2*n-1;i++)
cout<<hufftree[i].weight<<endl;*/
huffman_code(hufftree, n, h_code);//根据哈夫曼树生成每个字符对应的哈夫曼编码
/*for(int i=0;i<n;i++)
cout<<h_code[i]<<endl;*/
if (n == 1)
h_code[0] = '0';
//cout<<l<<endl;
string code = "";
for (int i = 0;i < l;i++)
{
int ascii = str[i];
if (ascii >= 0)
code += h_code[dictionaries[ascii]];
else
code += h_code[dictionaries[ascii * -1 + 127]];
}
//cout<<"step2\n";
int len = code.length();
//cout<<len<<endl;
//cout<<len;
if (len % 8 != 0)//不够八位的补上0
{
int complement = 8 - l % 8;
for (int i = 0;i < complement;i++)
code += '0';
}
//cout<<"step3\n";
string newstr = "";//压缩后的新字符串
for (int i = 0;i * 8 < len;i++)
{
string s = "";
for (int j = 0;j < 8;j++)//获得八位二进制码
s += code[i * 8 + j];
int index = ezs(s);//转化为十进制 八位二进制生成一个字节
//cout<<index;
if (index < 128)
newstr += char(index);
else
newstr += char((index - 127) * -1);
}
string prefix;
prefix += type + '|';
prefix += i_to_s(l);//前缀 解压时用
for (int i = 0;i < 256;i++)
{
if (freq[i] != 0)
{
prefix += '|';//字符前后用||分隔
if (i < 128)
prefix += char(i);
else
prefix += char((i - 127) * -1);
prefix += i_to_s(freq[i]);
}
}
prefix += '|';
prefix += "stop";
newstr = prefix + newstr;
//cout<<prefix<<"\n";
l = newstr.length();
//cout<<endl;
//cout<<l;
string lmyfile = name + ".yasuo";
char* lf = (char*)lmyfile.data();
ofstream f2;
f2.open(lf, ofstream::binary);
const char* a = newstr.data();
f2.write(a, l);
f2.close();
delete[] hufftree;
delete[] w;
delete[] str;
}
void decompress()
{
string filename;
cout << "输入要解压文件的完整路径" << endl;
cin >> filename;
int fl = filename.length();
for (int i = 0;i < fl;i++)
if (filename[i] == '\\')
{
filename.insert(i, "\\\\");
i += 2;
}
for (int i = 0;i < 256;i++)
equal_length_code[i] = dcr(i);
for (int i = 0;i < 256;i++)
freq[i] = 0;
for (int i = 0;i < 256;i++)
dictionaries[i] = -1;
string hc[256];
string name;
string type;
for (int i = filename.length() - 1;i >= 0;i--)
{
if (filename[i] == '.')
{
name = filename.substr(0, i);
break;
}
}
//cout<<name<<endl;
ifstream f1;
char* fn = (char*)filename.data();
f1.open(fn, ifstream::binary);
f1.seekg(0, ios::end);
int l = f1.tellg();
f1.seekg(0, ios::beg);
char* str = new char[l];
f1.read(str, l);
f1.close();
int len;//字符的总数
int begin = getprefix(str, l, type, len);//begin是新的下标
string code = "";
for (int i = begin;i < l;i++)
{
int ascii = str[i];
if (ascii >= 0)
code += equal_length_code[ascii];
else
code += equal_length_code[ascii * -1 + 127];
}
//cout<<code.length();
int n = 0;//原哈夫曼树叶子节点个数
for (int i = 0;i < 256;i++)
if (freq[i] != 0)
n++;
element* hufftree = new element[2 * n - 1];
string h_code[n];
int* w = new int[n];
int k = 0;
for (int i = 0;i < 256;i++)
if (freq[i] != 0)
{
w[k] = freq[i];
dictionaries[i] = k;
k++;
}
huffman_tree(hufftree, w, n);
huffman_code(hufftree, n, h_code);
if (n == 1)
h_code[0] = '0';
for (int i = 0;i < 256;i++)
if (freq[i] != 0)
hc[i] = h_code[dictionaries[i]];
int cl = code.length();
string newstr = "";
int a = 0;
int max = 0;
for (int i = 1;i < 256;i++)
if (freq[i] > freq[max])
max = i;
int min_length = h_code[dictionaries[max]].length();
for (int k = 0;k < len;k++)
{
string temp = "";
while (a < cl)
{
bool found = false;
temp += code[a];
if (temp.length() < min_length)
{
a++;
continue;
}
for (int index = 0;index < 256;index++)
{
if (temp == hc[index])
{
found = true;
if (index < 128)
newstr += char(index);
else
newstr += char((index - 127) * -1);
break;
}
}
a++;
if (found == true)
break;
}
}
l = newstr.length();
const char* b = newstr.data();
string lmyfile = name + '.' + type;
char* lf = (char*)lmyfile.data();
ofstream f2;
f2.open(lf, ofstream::binary);
f2.write(b, l);
f2.close();
delete[] str;
delete[] w;
delete[] hufftree;
}
int ezs(string code)
{
int l = code.length();
int result = 0;
for (int i = 0;i < l;i++)
if (code[i] == '1')
result += pow(2, l - i - 1);
return result;
}
string i_to_s(int l)
{
string str = "";
while (l >= 10)
{
str = char(l % 10 + 48) + str;
l /= 10;
}
str = char(l + 48) + str;
return str;
}
int s_to_i(string str)
{
int n = 0;
for (int i = 0;i < str.length();i++)
n = n * 10 + int(str[i] - 48);
return n;
}
int getprefix(char* str, int l, string& type, int& len)
{
string newstr;
//cout<<l<<endl;
int e = 0;//结束标志stop开始的地方
for (int i = 0;i < l;i++)
if (str[i] == 's' && str[i + 1] == 't' && str[i + 2] == 'o' && str[i + 3] == 'p')
e = i;
//cout<<str[e]<<str[e+1]<<str[e+2]<<str[e+3];
int i = 0;
int front, rear;
for (i;i < e;i++)
if (str[i] == '|')
{
rear = i;
type = getstr(str, 0, rear);
i++;
break;
}
//cout<<type;
front = rear + 1;
for (i;i < e;i++)
if (str[i] == '|')
{
rear = i;
len = s_to_i(getstr(str, front, rear - front));
break;
}
//cout<<len;
//cout<<str[i+1];
while (i < e)
{
if (str[i] == '|')
{
int w;
i++;
if (i >= e)
break;
int ascii = str[i];
i++;
front = i;
for (i;i < e;i++)
{
if (str[i] == '|')
{
rear = i;
w = s_to_i(getstr(str, front, rear - front));
break;
}
}
if (ascii >= 0)
freq[ascii] = w;
else
freq[ascii * -1 + 127] = w;
}
}
return e + 4;
}
string getstr(char* str, int begin, int l)
{
string s = "";
for (int i = begin;i < begin + l;i++)
s += str[i];
return s;
}
(仅供参考)