huffman压缩
huffman压缩
心得:在huffman.h中有一个encode()函数,其中定义了一个MyString类的变量str,但是给它赋好值return的时候就会出错,百思不得其解。问过老师,老师说是没有深拷贝。可我mystring中写了深拷贝。后来看C专家编程,看到P49的时候就懂。是因为当控制流离开局部变量的范围时,自动变量便自动失效,这就意味着即使返回一个指向局部变量的指针,当函数结束时,变量被销毁,谁也不知道这个指针所指向的地址内容是什么。只要将这个变量设置为static就能解决这个问题。
huffman.h
#ifndef _HuffmanTree_H
#define _HuffmanTree_H
#include <fstream>
#include "Seqlist.h"
#include "Mystring.h"
#include "TriElement.h"
class HuffmanTree
{
protected:
MyString charset;//字符集
Seqlist<TriElement>huftree;
void encode(int i); //单解码
public:
HuffmanTree(MyString &m,int weight[],int n);
void printCode();
MyString encode(MyString & text);//编码,压缩
void decode();//译码,解压缩
};
HuffmanTree::HuffmanTree(MyString &m,int weight[], int n)//字符集,权值,长度
:huftree(2*n-1)
{
for (int i = 0; i < n; i++)
{
this->charset.insert(i,m[i]);
this->huftree.insert(TriElement(weight[i], -1, -1, -1));
}
for (int i = 0; i < n - 1; i++)
{
int min1 = 0x7fffffff, min2 = min1, x1 = 0, x2 = x1;
for (int j = 0; j < n + i;j++)
if (huftree[j].parent == -1 && huftree[j].data < min1)
{
min2 = min1;
x2 = x1;
min1 = huftree[j].data;
x1 = j;
}
else if (huftree[j].parent == -1 && huftree[j].data < min2)
{
min2 = huftree[j].data;
x2 = j;
}
huftree[x1].parent = n + i;
huftree[x2].parent = n + i;
huftree.insert(TriElement(huftree[x1].data + huftree[x2].data, -1, x1, x2));
}
cout << "Huffman 树的结点顺序表:" << this->huftree;
}
void HuffmanTree::encode(int i)
{ MyString str;
int child = i,parent = huftree[child].parent;//已经重载下标,huftree[i]是哈夫曼第i个
while(parent != -1)//不是根节点
{ if(huftree[parent].left == child)//是左子树
str+='0';
else
str+='1';
child = parent;
parent = huftree[child].parent;
}
str.reverse();//反转字符串
cout<<str<<",";//临时的方法,按书上会出现一个问题
}
void HuffmanTree::printCode()
{
cout << "Huffman 编码,";
for (int i = 0; i < this->charset.count(); i++)
{
cout << this->charset[i] << ":";
//cout<<str<<",";
encode(i) ;
}
cout << endl;
}
MyString HuffmanTree::encode(MyString &text)
{ static MyString codestr;
ofstream fout;
int child ,parent;//已经重载下标,huftree[i]是哈夫曼第i个
int i,j;
for(i = 0;i<text.count();i++)
{ MyString str;
for(j = 0;j < this->charset.count();j++)
if(text[i] == this->charset[j])
break;//此时j就是该字符在哈夫曼树中的位置
child = j;
parent = huftree[child].parent;
while(parent != -1)//不是根节点
{ if(huftree[parent].left == child)//是左子树
str+='0';
else
str+='1';
child = parent;
parent = huftree[child].parent;
}
str.reverse();//反转字符串
codestr += str;
}
//cout<<"压缩后为:"<<codestr;//临时的方法,按书上会出现一个问题
//cout<<"压缩比为"<<codestr.count()<<":"<<text.count()*8;//需要改,要改成浮点数
fout.open("c://2.txt");
fout<<codestr;
fout.close();
return codestr;
}
void HuffmanTree::decode()
{
char s[1000];//源文档中字符信息
ifstream fin;//
fin.open("c://2.txt");//从C盘1.txt中读取文件信息
fin >> s;//把文件中信息读入字符串s中
MyString bef_text(s);//压缩后的字符信息
fin.close();
MyString af_text;
int node = this->huftree.count() - 1;
for (int i = 0; i < bef_text.count(); i++)
{
if (bef_text[i] == '0')
node = huftree[node].left;
else node = huftree[node].right;
if (huftree[node].leaf())
{
af_text += this->charset[node];
node = this->huftree.count() - 1;
}
}
cout<<"解码为"<<af_text<<endl;
}
#endif
main
#include <iostream>
using namespace std;
#include "HuffmanTree.h"
/*采用Huffman编码进行文件压缩。给定一个文本文件,统计其中字符使用频率,
建立一棵Huffman树,采用变长的二进制位串表示字符的Huffman编码,计算压缩比。再对经Huffman编码压缩的文本进行译码。
*/
int main()
{
/*readtxt*/
MyString charj,code;//字符集,编码后的文件
int *qz,length;//权值,以及字符集个数
int qqz[100];
char s[1000];//源文档中字符信息
ifstream fin;//
fin.open("c://1.txt");//从C盘1.txt中读取文件信息
fin >> s;//把文件中信息读入字符串s中
MyString m(s);//压缩后的字符信息,原文件大小即为m.n
fin.close();
cout<<"文件中信息为"<<m<<endl;
qz = m.charest(charj);//charj存放字符集,qz数组存放权值
cout<<"字符集为"<<charj<<endl;
cout<<"权值为";
length = charj.count();
for(int i = 0;i < length;i++)
{ qqz[i] = *qz;
cout<<*qz<<" ";
qz++;
}
cout<<endl;
HuffmanTree huf(charj,qqz,length);
huf.printCode();
cout<<"压缩后为:"<<huf.encode(m);
huf.decode();
system("pause");
return 0;
}
Mystring.h
#ifndef _Mystring_H
#define _Mystring_H
#include <exception>
#include <string.h>
class MyString
{
protected:
char *element;
int length;
int n; //原字符串容量,后期可改进
private:
void init(char *s = "");
public:
MyString(char *s="");
MyString(char ch);
~MyString();
MyString& operator=(MyString &str); //深拷贝
void operator+=(char ch);
void operator+=(MyString &str);
void operator+=(char *str);
char& operator[](int i);
void reverse();
friend ostream& operator<<(ostream &, MyString &str);
void print();
void insert(int i,char ch); //在第i位插入ch字符
void insert(int i,MyString &str);
int *charest(MyString & mm);//建立字符集
int count();//返回串长度
};
void MyString::init(char *str)
{ int i;
this->n = strlen(str);
this->length = (n*2)>32?(n*2):32;\
this->element = new char[this->length];
for(i = 0;i < this->n ; i++)
this->element[i] = str[i];
this->element[i] = '\0';
}
MyString::MyString(char *str)
{ this->init(str);
}
MyString::MyString(char ch)
{
this->init("");
this->n = 1;
this->element[0] = ch;
this->element[1] = '\0';
}
ostream& operator<<(ostream& out, MyString& str)
{
out << str.element ;
return out;
}
void MyString::print()
{ for(int i;i < this->n;i++)
cout<<this->element[i];
}
MyString& MyString::operator=(MyString& str)
{
this->~MyString();
this->init(str.element);
return *this;
}
void MyString::operator+=(MyString& str)
{
int m = str.count();
if ((m + this->n) * 2 > this->length)
{
this->length = (m + this->n) * 2;
char* temp = this->element;
this->element = new char[this->length];
for (int i = 0; temp[i] != '\0'; i++)
this->element[i] = temp[i];
}
for (int i = 0; i<m; i++)
this->element[this->n + i] = str[i];
this->n += m;
this->element[this->n] = '\0';
}
void MyString::operator+=(char* str)
{
int m = strlen(str);
if ((m + this->n) * 2 > length)
{
this->length = (m + this->n) * 2;
char* temp = this->element;
this->element = new char[this->length];
for (int i = 0; temp[i] != '\0'; i++)
this->element[i] = temp[i];
}
for (int i = 0; str[i] != '\0'; i++)
this->element[this->n + i] = str[i];
this->n += m;
this->element[this->n] = '\0';
}
void MyString::operator+=(char ch)
{
this->n += 1;
if (this->n * 2 > this->length)
{
this->length = this->n * 2;
char* temp = this->element;
this->element = new char[this->length];
for (int i = 0; temp[i] != '\0'; i++)
this->element[i] = temp[i];
}
this->element[n - 1] = ch;
this->element[n] = '\0';
}
char& MyString::operator[](int i)
{
if (i >= 0 && i <= this->n&&this->length >= 0)
return this->element[i];
else throw out_of_range("参数i指定字符序号或len超出范围");
}
void MyString::reverse()
{
char temp;
for (int i = 0; i < (this->n - 1) / 2; i++)
{
temp = this->element[i];
this->element[i] = this->element[n - i - 1];
this->element[n - i - 1] = temp;
}
}
void MyString::insert(int i, char ch)
{
this->insert(i,MyString(ch));
}
void MyString::insert(int i, MyString& str)
{
if (str.n == 0)
return;
if (i < 0)
i = 0;
if (i > n)
i = n;
char* temp = this->element;
if (this->length <= this->n + str.n)
{
this->length = (this->n + str.n + 1) * 2;
this->element = new char[this->length];
for (int j = 0; j < i; j++)
this->element[j] = temp[j];
}
for (int j = this->n; j >= i; j--)
this->element[j + str.n] = temp[j];
if (temp != this->element)
delete[] temp;
for (int j = 0; j < str.n; j++)
this->element[i + j] = str.element[j];
this->n += str.n;
}
int * MyString::charest(MyString & mm)
{
int i,j,temp = 0;
for(i = 0;i < this->n; i++)
{ for(j = 0;j < mm.n;j++)
if(this->element[i] == mm.element[j])//在字符集中存在
break;
if(j == mm.n)//不存在
mm.insert(j,this->element[i]);
}
int * a = new int[this->n];//遍历寻找每个字符集中字符的权值,以后可以改进算法
for(i = 0;i < mm.n;i++)
{ for(j = 0;j < this->n;j++)
{ if(mm.element[i] == this->element[j])
temp++;
}
a[i] = temp;
temp = 0;
}
return a;
}
MyString::~MyString()
{
delete[] element;
}
int MyString::count()
{ return this->n;
}
#endif
SeqList.h
#ifndef _Seqlist_H
#define _Seqlist_H
#include <exception>//C++异常类
template <class T>
class Seqlist
{protected:
T *element;
int length;//顺序表数组容量
int n;//顺序表元素个数
private:
void init(T values[],int n);
public:
Seqlist(int length = 32);
Seqlist(T values[],int n);
~Seqlist();
void insert(int i,T x);
void insert(T x);
T& operator[](int i);
friend ostream& operator<<<>(ostream& , Seqlist<T>&);
int count();
};
template <class T>
Seqlist<T>::Seqlist(int length)
{
this->element = new T[length];
this->length = length;
this->n = 0;
}
template <class T>
Seqlist<T>::Seqlist(T values[],int n)
{ this->init(values,n);
}
template <class T>
void Seqlist<T>::init(T values[],int n)
{ this->length =(((n * 2) > 32) ? (n*2) : 32 );//取2*n或者32最大值
this->element = new T[this->length];
this->n = n;
for(int i = 0;i <n;i++)
this->element[i] = values[i];
}
template <class T>
void Seqlist<T>::insert(int i ,T x)//见书P31
{ T *temp = this->element;
if(this->n==this->length)//若数组满则扩充数组容量
{ this->length *=2;
this->element = new T[this->length];
for(int j = 0;j<i;j++)
this->element[j] = temp[j];
}
for(int j = this->n-1 ;j >= i;j--)
this->element[j+1] = temp[j];
if(temp!=this->element)
delete[] temp;
this->element[i] = x;
this->n++;
}
template <class T>
void Seqlist<T>::insert(T x)
{ insert(this->n,x);
}
template <class T>
Seqlist<T>::~Seqlist()
{ delete[] this->element;
}
template <class T>
T& Seqlist<T>::operator[](int i)
{ if(i >= 0 && i < this->n)
return this->element[i];
throw out_of_range("参数i超出下标范围");
}
template <class T>
ostream& operator<<<>(ostream & out,Seqlist<T>& s)
{ out<<"(";
if(s.n > 0)
out<<s.element[0];
for(int i = 1;i < s.n;i++)
out<<","<<s.element[i];
out<<")"<<endl;
return out;
}
template <class T>
int Seqlist<T>::count()
{ return this->n;
}
#endif
TriElement.h
#ifndef _TriElement_H
#define _TriElement_H
class TriElement
{public:
int data,parent,left,right;
TriElement(int data = 0,int parent = -1,int left = -1,int right = -1)
{ this->data = data;
this->parent = parent;
this->left = left;
this->right = right;
}
friend std::ostream& operator<<(std::ostream& out,TriElement e)
{ out<<"("<<e.data<<","<<e.parent<<","<<e.left<<","<<e.right<<")";
return out;
}
bool leaf()//是否为叶子结点
{ if(this->left == -1 && this->right == -1)
return true;
return false;
}
bool operator==(TriElement &e)//比较相等
{ if(this->data == e.data)
return true;
return false;
}
};
#endif