//mark一下
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <conio.h>
struct head{
unsigned char b;
long count; /*the frequency*/
long parent,lch,rch;
char bits[256];
}header[512],tmp;
void compress(){
char filename[255],outputfile[255],buf[512];
unsigned char c;
long i,j,m,n,f;//long1,int在本机上均为4字节,不加以区分
long min1,pt1,flength;
FILE *ifp,*ofp;//变量
printf("source filename:");
gets(filename);
ifp=fopen(filename,"rb");
if(ifp==NULL){
printf("source file open error!\n");
return;
}
printf("destination filename:");
gets(outputfile);
ofp=fopen(outputfile,"wb");
if(ofp==NULL)
{
printf("destination file open error!\n");
return;
}//打开文件
flength=0;
while(!feof(ifp))
{
c=fgetc(ifp);
header[c].count++;
flength++;
}
flength--;
header[c].count--;//统计词频
for(i=0;i<512;i++){
if(header[i].count!=0)header[i].b=(unsigned char)i;
else header[i].b=0;
header[i].parent=-1;
header[i].lch=header[i].rch=-1;
}//赋初值
for(i=0;i<256;i++){
for(j=i+1;j<256;j++){
if(header[i].count<header[j].count){
tmp=header[i];
header[i]=header[j];
header[j]=tmp;
}
}
}//排序
for(i=0;i<256;i++)
if(header[i].count==0)break;
n=i;//i为权值大于0的结点数目
m=2*n-1;
for(i=n;i<m;i++){
min1=999999999;
for(j=0;j<i;j++){
if(header[j].parent!=-1)continue;//-1标志根节点
if(min1>header[j].count){
pt1=j;
min1=header[j].count;
}
}//找到权值最小的min1
header[i].count=header[pt1].count;
header[pt1].parent=i;
header[i].lch=pt1;
min1=999999999;
for(j=0;j<i;j++){
if(header[j].parent!=-1)continue;
if(min1>header[j].count){
pt1=j;
min1=header[j].count;
}
}
header[i].count+=header[pt1].count;
header[i].rch=pt1;
header[pt1].parent=i;
}//构建Huffman树
for(i=0;i<n;i++){
f=i;
header[i].bits[0]=0;
while(header[f].parent!=-1){
j=f;
f=header[f].parent;
if(header[f].lch==j){
j=strlen(header[i].bits);
memmove(header[i].bits+1,header[i].bits,j+1);
header[i].bits[0]='0';
}
else{
j=strlen(header[i].bits);
memmove(header[i].bits+1,header[i].bits,j+1);
header[i].bits[0]='1';
}
}
}//编码,有待改进
fseek(ifp,0,SEEK_SET);
fwrite(&flength,sizeof(long),1,ofp);
fseek(ofp,8,SEEK_SET);
buf[0]=0;
f=0;
pt1=8;//与151行结合
while(!feof(ifp)){
c=fgetc(ifp);
f++;
for(i=0;i<n;i++)
if(c==header[i].b) break;
strcat(buf,header[i].bits);
j=strlen(buf);
c=0;
while(j>=8){
for(i=0;i<8;i++){
if(buf[i]=='1')c=(c<<1)|1;
else c=c<<1;
}
fwrite(&c,1,1,ofp);
pt1++;
strcpy(buf,buf+8);
j=strlen(buf);
}
if(f%1000000==0)
printf("\r完成度%.2f",(float)f/flength);
if(f==flength){printf("\r完成度1.00");break;}
}
if(j>0){
strcat(buf,"00000000");
for(i=0;i<8;i++){
if(buf[i]=='1') c=(c<<1)|1;
else c=c<<1;
}
fwrite(&c,1,1,ofp);
pt1++;
}//压缩写入
fseek(ofp,4,SEEK_SET);
fwrite(&pt1,sizeof(long),1,ofp);//写入标记
fseek(ofp,pt1,SEEK_SET);
fwrite(&n,sizeof(long),1,ofp);//从标记处开始写,先写n
for(i=0;i<n;i++){
fwrite(&(header[i].b),1,1,ofp);//原字符
c=strlen(header[i].bits);//编码长
fwrite(&c,1,1,ofp);
j=strlen(header[i].bits);
if(j%8!=0){
for(f=j%8;f<8;f++)
strcat(header[i].bits,"0");//补位
}
while(header[i].bits[0]!=0){
c=0;
for(j=0;j<8;j++){
if(header[i].bits[j]=='1')c=(c<<1)|1;
else c=c<<1;
}
strcpy(header[i].bits,header[i].bits+8);
fwrite(&c,1,1,ofp);//编码的字符表示
}
}
fclose(ifp);
fclose(ofp);
printf("\ncompress successfully!压缩率为%f\n",(float)pt1/flength);
return;
}
void uncompress(){
char filename[255],outputfile[255],buf[255],bx[255];
unsigned char c;
long i,j,m,n,f,p,l;
long flength;
FILE *ifp,*ofp;
printf("source filename:");
gets(filename);
ifp=fopen(filename,"rb");
if(ifp==NULL){
printf("source file open error!\n");
return;
}
printf("destination filename:");
gets(outputfile);
ofp=fopen(outputfile,"wb");
if(ofp==NULL){
printf("destination file open error!\n");
return;
}
fread(&flength,sizeof(long),1,ifp);
fread(&f,sizeof(long),1,ifp);
fseek(ifp,f,SEEK_SET);//找到标记所指的信息位置
fread(&n,sizeof(long),1,ifp);//字符种类数
for(i=0;i<n;i++){
fread(&header[i].b,1,1,ifp);//原字符
fread(&c,1,1,ifp);//编码长度
p=(long)c;
header[i].count=p;//count在此用来存编码长度
header[i].bits[0]=0;
if(p%8>0) m=p/8+1;
else m=p/8;//求字符表示的编码长
for(j=0;j<m;j++){
fread(&c,1,1,ifp);
f=c;
itoa(f,buf,2);
f=strlen(buf);
for(l=8;l>f;l--)strcat(header[i].bits,"0");
//补成8位
strcat(header[i].bits,buf);
}
header[i].bits[p]=0;//多余“0"会写在0之后,准确还原编码
}//读取压缩信息
for(i=0;i<n;i++){
for(j=i+1;j<n;j++){
if(strlen(header[i].bits)>strlen(header[j].bits)){
tmp=header[i];
header[i]=header[j];
header[j]=tmp;
}
}
}//排序
p=strlen(header[n-1].bits);//最长的编码长度
fseek(ifp,8,SEEK_SET);
m=0;
bx[0]=0;
while(1){
while(strlen(bx)<(unsigned int)p){
fread(&c,1,1,ifp);
f=c;
itoa(f,buf,2);
f=strlen(buf);
for(l=8;l>f;l--)strcat(bx,"0");
strcat(bx,buf);
}
for(i=0;i<n;i++)
if(memcmp(header[i].bits,bx,header[i].count)==0)break;
strcpy(bx,bx+header[i].count);
c=header[i].b;
fwrite(&c,1,1,ofp);
m++;
if(m%1000000==0)
printf("\r完成度%.2f",(float)m/flength);
if(m==flength){printf("\r完成度1.00");break;}
}
fclose(ifp);
fclose(ofp);
printf("Uncompress successfully!\n");
return;
}
int main(){
char c;
while(1){
printf("1--Compress file\n");
printf("2--Uncompress file\n");
printf("3--exit\n");
printf("Select 1 or 2 or 3:");
c=getch();
printf("%c\n",c);
if(c=='1')compress();
else if(c=='2')uncompress();
else if(c=='3')return 0;
}
}