#include <fstream>
#include <iostream>
#include <string>
#include <list>
using namespace std;
#define MATCH 0 //字符相同时的代价
#define MISMATCH 3 //字符不同时的代价
#define GAP 2 //补充空格时的代价
//Coordinates结构体保存的是比对路径坐标
struct Coordinates
{
int row,col; //行和列坐标
//重载 < 运算符,在坐标队列排序时调用(list的sort()方法)
bool operator < (Coordinates &b)
{
if (row < b.row)
return true;
else
if (row == b.row)
return col < b.col;
else
return false;
}
//重载 == 运算符,在坐标队列比较时调用(list的unique()方法)
bool operator == (Coordinates &b)
{
return (row == b.row) && (col == b.col);
}
};
//Record记录用Alignment函数比对的结果
struct Record
{
int currow,curcol; //当前的坐标
int identifier; //它的前驱在数组里的位置
};
class SqAlignment
{
private:
string X, Y; //要进行比对的字符串
unsigned int m, n; //X,Y的长度
unsigned int gaps, mismatchs, matchs;
const char *pX, *pY; //字符串转化为数组
list<Coordinates> P; //记录比对坐标的关键队列
long **SEA, **BSEA; //SpaceEfficientAlignment和BackSpaceEfficientAlignment函数用到的数组
Record *record; //记录路径
string sX, sY; //进行比对后的字符串
unsigned long cost; //比对总代价
double similarity; //相似度
public:
SqAlignment(string,string); //构造函数,接受需要比对的字符串
~SqAlignment(); //析构函数
void Alignment(int, int, int, int); //处理比对函数
unsigned long Min(unsigned long, unsigned long, unsigned long);//求最小值
int Match(char,char); //计分规则
void TraceBack(int); //回溯函数,寻找比对路径
void SpaceEfficientAlignment(int, int, int, int); //在DivideandConquerAlignment中处理前一半数据
void BackSpaceEfficientAlignment(int, int, int, int);//在DivideandConquerAlignment中处理后一半数据
void DivideandConquerAlignment(int, int, int, int); //分置函数
void PrintList(); //打印比对坐标队列
void PrintCount() //打印比对统计结果
{
cout << "比对总代价:" << cost
<< "\n比对后的字符串长度:" << sX.size()
<< "\n相同字符的个数:" << matchs
<< "\n空格的个数:" << gaps
<< "\n不同字符的个数:" << mismatchs << endl;
}
bool SaveResultStr(string, string); //保存结果序列
void CreateResultStr(); //根据坐标队列生成结果序列
void PrintResultStr() //打印结果序列
{
cout << sX << '\n' << sY << endl;
}
unsigned int GetXLength()
{
return m;
}
unsigned int GetYLength()
{
return n;
}
double GetSimilarity() //计算相似度
{
return 1.0 - double(cost)/double(sX.size() * MISMATCH);
}
};
void SqAlignment::PrintList()
{
list<Coordinates>::iterator iter;
for (iter = P.begin(); iter != P.end(); iter++)
cout <<static_cast<Coordinates>(*iter).row << '\t'<<static_cast<Coordinates>(*iter).col<<'\n';
}
unsigned long SqAlignment::Min(unsigned long a, unsigned long b, unsigned long c)
{
if(a <= b && a <= c)
return a;
else
if(b <= a && b <= c)
return b;
else
return c;
}
SqAlignment::SqAlignment(string x, string y)
{
X = x;
Y = y;
m = X.size();
n = Y.size();
gaps = matchs = mismatchs = cost = 0;
similarity = 0.0;
pX = X.c_str();
pY = Y.c_str();
SEA = new long *[m+1];
BSEA = new long *[m+2];
for (int i = 0; i <= m; i++)
{
SEA[i] = new long [2];
BSEA[i] = new long [2];
}
BSEA[m+1] = new long [2];
}
SqAlignment::~SqAlignment()
{
delete []SEA;
delete []BSEA;
}
int SqAlignment::Match(char i, char j)
{
if (i == j) //如果两个字符相同,付出0代价
return MATCH;
else
return MISMATCH; //不相同扣MISMATCH分
}
void SqAlignment::Alignment(int xbegin, int xend, int ybegin, int yend)
{
int len1 = xend-xbegin+2;
int len2 = yend-ybegin+2;
record = new Record [len1*len2];
int **s = new int *[len1];
int k = 0;
for (int i = 0; i <= len1-1; i++)
{
s[i] = new int [len2];
s[i][0] = i * GAP;
record[i*len2].identifier = (i-1)*len2;
record[i*len2].currow = i;
record[i*len2].curcol = 0;
}
for (i = 0; i <= len2-1; i++)
{
s[0][i] = i * GAP;
record[i].identifier = i - 1;
record[i].currow = 0;
record[i].curcol = i;
}
for (int j = 1; j <= len2-1; j++)
{
for (int i = 1; i <= len1-1; i++)
{
k = i*len2+j;
unsigned long a = Match(pX[i+xbegin-2], pY[j+ybegin-2]) + s[i-1][j-1];
unsigned long b = GAP + s[i][j-1];
unsigned long c = GAP + s[i-1][j];
record[k].currow = xbegin + i - 1;
record[k].curcol = ybegin + j - 1;
if (a <= b && a <= c)
{
s[i][j] = a;
record[k].identifier = (i-1) * len2 + j - 1;
}
else
if (b <= a && b <= c)
{
s[i][j] = b;
record[k].identifier = i * len2 + j - 1;
}
else
{
s[i][j] = c;
record[k].identifier = (i-1) * len2 + j ;
}
}
}
delete []s;
TraceBack(len1*len2-1);
delete record;
}
void SqAlignment::TraceBack(int num)
{
if ((num > 0) && record[num].currow && record[num].curcol)
{
Coordinates a;
a.row = record[num].currow;
a.col = record[num].curcol;
P.push_back(a);
TraceBack(record[num].identifier);
}
}
bool SqAlignment::SaveResultStr(string fXaddress, string fYaddress)
{
fXaddress.append(".txt");
fYaddress.append(".txt");
ofstream foutX,foutY;
foutX.open(fXaddress.c_str(),ios::out);
foutY.open(fYaddress.c_str(),ios::out);
if (!foutX || !foutY)
return false;
cout << "正在生成文件" << fXaddress << "...\n";
foutX << sX;
cout << "生成" << fXaddress << "成功。\n\n";
cout << "正在生成文件" << fYaddress << "...\n";
foutY << sY;
cout << "生成" << fYaddress << "成功。\n";
foutX.close();
foutY.close();
return true;
}
void SqAlignment::CreateResultStr()
{
P.sort();
P.unique();
int rowidentifier=1,colidentifier=1;
list<Coordinates>::iterator iter,iter1;
iter1 = P.end();
int row,col;
for (iter = P.begin(); iter != P.end(); iter++)
{
row = static_cast<Coordinates>(*iter).row;
col = static_cast<Coordinates>(*iter).col;
while (row > rowidentifier)
{
sX.append(1,pX[rowidentifier-1]);
sY.append(1,'-');
rowidentifier++;
cost = cost + GAP;
gaps++;
}
while (col > colidentifier)
{
sY.append(1,pY[colidentifier-1]);
sX.append(1,'-');
colidentifier++;
cost = cost + GAP;
gaps++;
}
if (row == static_cast<Coordinates>(*iter1).row)
{
sY.append(1,pY[static_cast<Coordinates>(*iter).col-1]);
sX.append(1,'-');
iter1 = iter;
rowidentifier++;
colidentifier++;
cost = cost + GAP;
gaps++;
continue;
}
if(col == static_cast<Coordinates>(*iter1).col)
{
sX.append(1,pX[row-1]);
sY.append(1,'-');
iter1 = iter;
rowidentifier++;
colidentifier++;
cost = cost + GAP;
gaps++;
continue;
}
sX.append(1,pX[row-1]);
sY.append(1,pY[col-1]);
if (MATCH == Match(pX[row-1], pY[col-1]))
matchs++;
else
mismatchs++;
iter1 = iter;
rowidentifier++;
colidentifier++;
cost = cost + Match(pX[row-1], pY[col-1]);
}
}
void SqAlignment::SpaceEfficientAlignment(int xbegin, int xend, int ybegin, int yend)
{
for (int i = xbegin-1; i <= xend; i++)
{
SEA[i][0] = (i-xbegin+1) * GAP;
}
for (int j = ybegin; j <= yend; j++)
{
SEA[xbegin-1][1] = (j-ybegin+1) * GAP;
for (int i = xbegin; i <= xend; i++)
{
SEA[i][1] = Min(Match(pX[i-1], pY[j-1]) + SEA[i-1][0], GAP + SEA[i-1][1], GAP + SEA[i][0]);
}
if (j != yend)
{
for (i = xbegin-1; i <= xend; i++)
SEA[i][0] = SEA[i][1];
}
}
}
void SqAlignment::BackSpaceEfficientAlignment(int xbegin, int xend, int ybegin, int yend)
{
for (int i = xbegin; i <= xend+1; i++)
{
BSEA[i][0] = (xend+1-i) * GAP;
}
for (int j = yend; j >= ybegin; j--)
{
BSEA[xend+1][1] = (yend+1-j) * GAP;
for (int i = xend; i >= xbegin; i--)
{
BSEA[i][1] = Min(Match(pX[i-1], pY[j-1]) + BSEA[i+1][0], GAP + BSEA[i+1][1], GAP + BSEA[i][0]);
}
if (j != ybegin)
{
for (i = xbegin; i <= xend+1; i++)
BSEA[i][0] = BSEA[i][1];
}
}
}
void SqAlignment::DivideandConquerAlignment(int xbegin, int xend, int ybegin, int yend)
{
int len1 = xend - xbegin + 1;
int len2 = yend - ybegin + 1;
if (len1 <= 2 || len2 <=2)
Alignment(xbegin, xend, ybegin, yend);
else
{
SpaceEfficientAlignment(xbegin, xend, ybegin, (yend+ybegin)/2);
BackSpaceEfficientAlignment(xbegin, xend, (yend+ybegin)/2, yend);
int p = xbegin;
for (int i = xbegin; i <= xend; i++)
{
if ((SEA[i][1] + BSEA[i][1]) < (SEA [p][1] + BSEA[p][1]))
p = i;
}
Coordinates a;
a.row = p;
a.col = (yend + ybegin) / 2;
P.push_back(a);
int xe, ye, xb, yb;
unsigned long ca = Match(pX[p-1], pY[(yend + ybegin) / 2-1]) + SEA[p-1][0], cb = GAP + SEA[p-1][1], cc = GAP + SEA[p][0];
if (ca <= cb && ca <= cc)
{
xe = p - 1;
ye = (yend + ybegin) / 2-1;
}
else
if (cb <= ca && cb <= cc)
{
xe = p-1;
ye = (yend + ybegin) / 2;
}
else
{
xe = p;
ye = (yend + ybegin) / 2-1;
}
ca = Match(pX[p-1], pY[(yend + ybegin) / 2-1]) + BSEA[p+1][0], cb = GAP + BSEA[p-1][1], cc = GAP + BSEA[p][0];
if (ca <= cb && ca <= cc)
{
xb = p + 1;
yb = (yend + ybegin) / 2+1;
}
else
if (cb <= ca && cb <= cc)
{
xb = p+1;
yb = (yend + ybegin) / 2;
}
else
{
xb = p;
yb = (yend + ybegin) / 2+1;
}
DivideandConquerAlignment(xbegin, xe, ybegin, ye);
DivideandConquerAlignment(xb, xend, yb, yend);
}
}
int main(void)
{
cout << "***********************************************************\n"
<< "* 序列比对算法 *\n"
<< "* 时间复杂度为O(mn) | 空间复杂度为O(m+n) *\n"
<< "* *\n"
<< "*比对规则: *\n"
<< "* 两字符相同时付出的代价为:0 *\n"
<< "* 补充空格时付出的代价为 :2 *\n"
<< "* 两字符不同是付出的代价为:3 *\n"
<< "***********************************************************\n"
<< endl;
string ReadData(string, char);
char choosein =0;
string X,Y;
string fXaddress="",fYaddress="";
cout << "你是要从文件读入还是输入字符串(输入1从文件读入,否则自己输入字符串):";
cin >> choosein;
cin.clear();
cin.sync();
if ('1' != choosein)
{
cout << "输入字符串X:";
cin >> X;
cout << "输入字符串Y:";
cin >> Y;
}
else
{
char rule;
cout << "输入第一个文件地址:";
cin >> fXaddress;
cout << "输入第二个文件地址:";
cin >> fYaddress;
cout << "是否忽略空格和换行符?y/n:";
cin >> rule;
while (rule != 'y' && rule != 'n' && rule != 'Y' && rule != 'N')
{
cout << "输入的指令不正确,请输入Y(y)/N(n):";
cin >> rule;
}
X = ReadData(fXaddress, rule);
Y = ReadData(fYaddress, rule);
}
if ("" == X || "" == Y)
{
cout << "字符串都不能为空。" << endl;
abort();
}
SqAlignment s (X, Y);
cout << "正在比对字符串序列...\n" << endl;
s.DivideandConquerAlignment(1, X.size(), 1, Y.size());
s.CreateResultStr();
cout << "序列比对结束\n";
cout << "字符串X的长度为" << s.GetXLength() << '\n';
cout << "字符串Y的长度为" << s.GetYLength() << '\n';
s.PrintCount();
cout << "字符串的相似度为:" << s.GetSimilarity() << '\n';
char chooseout = 0;
cout << "打印比对后的字符串还是将其保存为文件?(输入1仅打印,输入2即打印又保存为文件,否则仅保存为文件):";
cin >> chooseout;
switch (chooseout)
{
case '1':
s.PrintResultStr();
break;
case '2':
s.PrintResultStr();
default:
bool identifier = false;
if ('1' != choosein)
identifier = s.SaveResultStr("str1.txt", "str2.txt");
else
identifier = s.SaveResultStr(fXaddress, fYaddress);
if (false == identifier)
{
cout << "文件生成失败" << endl;
abort();
}
}
//s.PrintList();
cin.clear();
cin.sync();
getchar();
return 0;
}
string ReadData(string address, char rule)
{
string str,tmp;
ifstream fin;
fin.open(address.c_str(),ios::in);
if (!fin)
{
cout << "打开文件" << address << "失败。" << endl;
abort();
}
cout << "\n正在读入" << address << "...\n" << endl;
if (rule == 'y' || rule == 'Y')
{
while (fin)
{
fin >> tmp;
str.append(tmp);
}
}
else
{
while (fin)
{
fin >> tmp;
str.append(tmp);
str.append(1,' ');
}
}
cout << "读入" << address << "成功。\n";
fin.close();
return str;
}
序列比对算法
最新推荐文章于 2019-10-24 11:03:52 发布