题目一:
(1)最优huffman编码为:
111 (此字符为空格)
a 1010
b 100100
c 00101
d 10111
e 010
f 110100
g 100111
h 0001
i 0111
j 1101101110
k 11011010
l 10110
m 110111
n 0110
o 1000
p 100110
q 1101101100
r 0000
s 0011
t 1100
u 00100
v 1101100
w 110101
x 1101101111
y 100101
z 1101101101
(2)平均编码长度为5.74074;
(3)认为嫡值会比平均编码长度小.因为嫡值包含有概率的计算,算出来的压缩比会更高,即编码长度会较小.
(4)感觉除了标点外.英文压缩或许就字母频率了.
2哈夫曼编码实现
- #include <iostream>
- #include <fstream>
- #include <queue>
- #include <string>
- #include <algorithm>
- using namespace std;
- class Node
- {
- public:
- char mchar;
- int mweight;
- Node *lchild,*rchild;
- Node()
- {
- mchar='!'; //不是叶子结点就叹号表示
- lchild=rchild=NULL;
- }
- Node(char c,int w)
- {
- mchar=c;
- mweight=w;
- lchild=rchild=NULL;
- }
- //Node(Node &n)
- //{
- // mchar=n.mchar;
- // mweight=n.mweight;
- // lchild=n.lchild;
- // rchild=n.rchild;
- //}
- };
- bool operator <(Node a,Node b)
- {
- return a.mweight>b.mweight;
- }
- class HuffmanTree
- {
- public:
- long mbitlenlastcoding;
- int mSumBitLen;
- Node *root;
- //queue头中自带有优先队列.如此定义可以小的先出队.注意要如此重载<运算符
- priority_queue<Node> H;
- vector<Node*> V;
- vector<string> CodeTable;
- HuffmanTree()
- {
- mbitlenlastcoding=0;
- mSumBitLen=0;
- root=NULL;
- }
- void CreateTree(int *farray,string chartable,int num)
- {
- SetQueueH(farray,chartable,num);
- for(int k=0;k<num-1;k++) //智能指针可能可以自动销毁
- {
- Node *i=new Node(H.top());
- //auto_ptr<Node> api(i);
- H.pop();
- Node *j=new Node(H.top());
- //auto_ptr<Node> apj(j);
- H.pop();
- Node *fk=new Node;
- //auto_ptr<Node> apfk(fk);
- fk->lchild=i;
- fk->rchild=j;
- fk->mweight=i->mweight + j->mweight;
- H.push(*fk);
- root=fk; //运算到第n-1次时.FK就是root
- }
- LinearizeTree(); //线性存储huffman树
- string str(" ");
- CreateCodeTable(root,str,-1); //创建编码表
- sort(CodeTable.begin(),CodeTable.end()); //对编码表按字母序排序.方便压缩时查找
- }
- void Coding(const char* str) //编码
- {
- ofstream fout;
- fout.open("text.dat",ios::binary);
- int bit=0;
- int bitcount=0;
- for(int i=0;i<strlen(str);i++)
- {
- int j=0;
- if(str[i]>='A' && str[i]<='Z')
- j=str[i]-'A'+1;
- else if(str[i]>='a' && str[i]<='z')
- j=str[i]-'a'+1;
- else if(' '==str[i])
- j=0;
- string code(CodeTable[j].substr(2));
- mbitlenlastcoding+=code.length(); //保存实际编码的位数,供解压缩使用;
- for(int k=0;k<code.length();k++)
- {
- if('0'==code.at(k)) //按序把编码放到高位.也可以放到低位
- bit=bit<<1;
- else
- bit=(bit<<1)+1; // 放低位,此步改为与0x80000000相或,bit=(bit|0x80000000);
- bitcount=(bitcount+1)%32;
- if(0==bitcount)
- fout.write((char*)&bit,sizeof(bit));
- }
- }
- if(0!=bitcount) //将最后不满32位的位移动到最高位
- {
- bitcount=32-bitcount;
- bit=bit<<bitcount;
- fout.write((char*)&bit,sizeof(bit));
- }
- fout.close();
- }
- void Decodeing(char *infilename,char *outfilename) //解码
- {
- ifstream fin;
- fin.open(infilename,ios::binary);
- if(true==fin.fail())
- {
- cerr<<"file read fail"<<endl;
- return;
- }
- bool overflag=false;
- long bitcount=0;
- int buffsize=sizeof(int)*8;
- char buff[sizeof(int)*8+1]; //+1用来保存串结束符
- int bit;
- Node *p=root;
- string strResult;
- while(!overflag)
- {
- fin.read((char*)&bit,sizeof(bit));
- mbitlenlastcoding=mbitlenlastcoding-buffsize;
- _itoa(bit,buff,2);
- //因为_itoa函数转换二进制数时.若前几位为零时,不会转换位零,要自行添加
- string str(buff);
- str.insert(0,buffsize-strlen(buff),'0');
- if(mbitlenlastcoding<0)
- {
- mbitlenlastcoding=buffsize+mbitlenlastcoding;
- str=str.substr(0,mbitlenlastcoding);
- buffsize=str.length();
- overflag=true; //置结束标志
- }
- int i=0;
- while(i<buffsize )
- {
- if('!' !=p->mchar)
- {
- strResult.push_back(p->mchar);
- p=root;
- }
- else
- {
- if('0'==str.at(i++))
- p=p->lchild;
- else
- p=p->rchild;
- }
- }
- }
- // cout<<strResult<<endl;
- ofstream fout(outfilename);
- fout<<strResult;
- fout.close();
- fin.close();
- }
- void ShowQueueH()
- {
- while(!H.empty())
- {
- Node t=H.top();
- H.pop();
- cout<<t.mchar<<" "<<t.mweight<<endl;
- }
- }
- void ShowTree()
- {
- for(int i=0;i<V.size();i++)
- cout<<i<<" "<<V[i]->mchar<<" "<<V[i]->mweight<<endl;
- }
- void ShowCodeTable()
- {
- for(int i=0;i<CodeTable.size();i++)
- cout<<CodeTable.at(i)<<endl;
- }
- void PrintCodeTable(char *filename)
- {
- ofstream fout(filename);
- for(int i=0;i<CodeTable.size();i++)
- fout<<CodeTable[i]<<endl;
- fout.close();
- }
- ~HuffmanTree()
- {
- for(int i=0;i<V.size();i++)
- {
- if(NULL!=V[i])
- delete V[i];
- }
- }
- private:
- void SetQueueH(int *farray,string chartable,int num)
- {
- for(int i=0;i<num;i++)
- {
- Node t(chartable.at(i),farray[i]);
- H.push(t);
- }
- }
- void LinearizeTree() //线性存储huffman树
- {
- queue<Node> S;
- S.push(*root);
- V.push_back(root);
- while(!S.empty())
- {
- Node t=S.front();
- S.pop();
- if(NULL != t.lchild)
- {
- S.push( (*t.lchild) );
- V.push_back(t.lchild);
- }
- if(NULL != t.rchild)
- {
- S.push( (*t.rchild) );
- V.push_back(t.rchild);
- }
- }
- }
- void CreateCodeTable(Node *p,string str,int flag)
- {
- if (NULL == p ) return ;
- if(0==flag) str.push_back('0');//str.append("0");
- else if(1==flag) str.push_back('1');//str.append("1");
- if('!' != p->mchar)
- {
- mSumBitLen+=(str.length()-1); //加上编码的长度
- str.insert(str.begin(),p->mchar);
- CodeTable.push_back(str);
- return;
- }
- else
- {
- if(NULL!=p->lchild)
- CreateCodeTable(p->lchild,str,0);
- if(NULL!=p->rchild)
- CreateCodeTable(p->rchild,str,1);
- }
- }
- };
- void main()
- {
- string chartable(" etaoinshrdlcumwfgypbvkjxqz");
- int farray[]={183,102,77,68,59,58,55,51,49,48,35,34,26,24,21,19,18,17,16,16,13,9,6,2,2,1,1};
- char text[]="Chapter Graphs surveys the most important graph processing problems including depth first search breadth first search minimum spanning trees and shortest paths ";
- HuffmanTree ht;
- ht.CreateTree(farray,chartable,chartable.length());
- ht.ShowTree();
- ht.ShowCodeTable();
- ht.PrintCodeTable("CodeTable.txt");
- ht.Coding(text);
- ht.Decodeing("text.dat","Decoding.txt");
- cout<<"平均编码长度:"<<ht.mSumBitLen/27.0<<endl;
- }