序列压缩中基于Markov预测模型的的Finite Context Model源码实现,还未实现位数非常大时的加减乘除运算,更新中…
#include "stdio.h"
#include "iostream"
#include "map"
#include <string.h>
#define K 5
using namespace std;
struct dct
{
public:
dct(){}
dct(int x,char *y)
{
num=x;
strcpy(str,y);
}
int num;
char str[K+1];
};
map<int,dct> mymap; //定义全局的键值对mymap
map<int,dct>::iterator it;
int index=0;
int sch1(char *input1)
{
int count1=0;
for(it=mymap.begin();it!=mymap.end();it++)
{
if(strcmp(it->second.str,input1)==0)
{
count1=it->second.num;
break;
}
}
//如果计算的到的count1的值为0,表示字典中还没有当前这种推导关系,则将buff数组存入字典中
if(count1==0)
{
dct item;
item.num=1;
strcpy(item.str,input1);
mymap.insert(make_pair(++index,item));
}
//如果计算得到的count1的值不为0,表示当前推导关系在字典中找到相同的推导关系,则将字典中的计算器加1
if(count1!=0)
{
it->second.num+=1;
}
return count1;
}
int sch2(char *input2)
{
char str_input[K];
char str_space[K+1];
char str_dct[K];
int count2=0;
//把input字符串的前K个字符赋给str_input
for(int i=0;i<K;i++)
{
str_input[i]=input2[i];
}
for(it=mymap.begin();it!=mymap.end();it++)
{
strcpy(str_space,it->second.str); //把当前字典项的K+1长度的字符串存入str_space中
for(int i=0;i<K;i++)
{
str_dct[i]=str_space[i]; //将str_space的前K个字符赋给str_dct数组中
}
if(strcmp(str_dct,str_input)==0)
{
count2=count2+it->second.num;
}
}
return count2;
}
int sch3(char *input3)
{
int count3=0;
for(it=mymap.begin();it!=mymap.end();it++)
{
if(strcmp(it->second.str,input3)==0)
{
count3=it->second.num;
return count3;
}
}
return 0;
}
int main()
{
double A[2]={0,0.25};
double T[2]={0.25,0.5};
double C[2]={0.5,0.75};
double G[2]={0.75,1};
double pA=0.25;
double pT=0.25;
double pC=0.25;
double pG=0.25;
double rng[2]={0,1};
double low,range,range_low,range_high;
char buff[K+1],buff_copy[K+1];
char *buff_bottom=&buff[K];
int n_c=0,n_s_c=0,n_A_c=0,n_T_c=0,n_C_c=0,n_G_c=0;
double aofa=0.25;
FILE *f;
f=fopen("test.txt","r");
fgets(buff,K+1,f); //把输入的前K个字符读入buff数组,因为此时还没有字典
//将前K个字符进行算术编码,各个字符的概率均为0.25,这个过程中概率不修改
for(int i=0;i<6;i++)
{
if(buff[i]=='A')
{
range=rng[1]-rng[0];
low=rng[0];
range_low=A[0];
range_high=A[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
else if (buff[i]=='T')
{
range=rng[1]-rng[0];
low=rng[0];
range_low=T[0];
range_high=T[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
else if (buff[i]=='C')
{
range=rng[1]-rng[0];
low=rng[0];
range_low=C[0];
range_high=C[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
else
{
range=rng[1]-rng[0];
low=rng[0];
range_low=G[0];
range_high=G[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
cout<<'['<<rng[0]<<','<<rng[1]<<']'<<endl;
}
//将当前buff数组的内容存入字典作为字典项
dct item;
item.num=1;
strcpy(item.str,buff);
mymap.insert(make_pair(++index,item));
//将buff数组的内容整体向前移动一个单位
for(int i=0;i<K+1;i++)
{
buff[i]=buff[i+1];
}
//循环执行,将序列的下一位读入到buff数组的最后一位,
//将当前buff数组去字典中查找,得到n_c,n_s_c,n_A_c,n_T_c,n_C_c,n_G_c的值,
//修改ATCG的概率,对buff数组的最后一位利用修改后的ATCG概率和码区分布进行算数编码
while(fgets(buff_bottom,2,f))
{
//查找字典,得到n_c的值
n_c=sch2(buff);
//查找字典,得到n_A_c的值
strcpy(buff_copy,buff);
buff_copy[K]='A';
n_A_c=sch3(buff_copy);
//查找字典,得到n_T_c的值
strcpy(buff_copy,buff);
buff_copy[K]='T';
n_T_c=sch3(buff_copy);
//查找字典,得到n_C_c的值
strcpy(buff_copy,buff);
buff_copy[K]='C';
n_C_c=sch3(buff_copy);
//查找字典,得到n_G_c的值
strcpy(buff_copy,buff);
buff_copy[K]='G';
n_G_c=sch3(buff_copy);
//查找字典,得到n_s_c的值
n_s_c=sch1(buff);
//根据n_A_c,n_T_c,n_C_c,n_G_c的值,修改字符 A T C G的概率
pA=(n_A_c+aofa)/(n_c+aofa*4);
pT=(n_T_c+aofa)/(n_c+aofa*4);
pC=(n_C_c+aofa)/(n_c+aofa*4);
pG=(n_G_c+aofa)/(n_c+aofa*4);
//根据修改后的ATCG字符的概率,修改ATCG的码区范围
A[0]=0;
A[1]=pA;
T[0]=A[1];
T[1]=pA+pT;
C[0]=T[1];
C[1]=pA+pT+pC;
G[0]=C[1];
G[1]=1;
//根据修改后的ATCG的码区范围,对buff数组的第K+1位字符进行算术编码
range=rng[1]-rng[0];
low=rng[0];
if(buff[K]=='A')
{
range_low=A[0];
range_high=A[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
else if(buff[K]=='T')
{
range_low=T[0];
range_high=T[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
else if(buff[K]=='C')
{
range_low=C[0];
range_high=C[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
else
{
range_low=G[0];
range_high=G[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
//输出当前位的算数编码值
cout<<"["<<rng[0]<<","<<"rng[1]"<<"]"<<endl;
}
}