最近使用C++写了NeedlemanWunsch算法,是生物信息里序列比对的重要算法,具体说明请参考之前用python实现的算法文章《基于numpy的基因序列比对算法NeedlemanWunsch》
这里就直接贴代码了:
#include <iostream>
using namespace std;
string target_str="ATCGATCGTG";
string query_str="ACGACTACGTG";
const int match_score = 1;
const int mismatch_score = -1;
const int gap_score = -1;
const int t_length = target_str.length();
const int q_length = query_str.length();
void init_matrix(int **score_matrix){
for (int i=0; i<=t_length; i++)
{
for (int j=0; j<=q_length; j++)
{
if (i == 0)
{
score_matrix[i][j] = -j;
}
else if (j == 0)
{
score_matrix[i][j] = -i;
}
else
{
score_matrix[i][j] = 0;
}
}
}
}
void print_matrix(int **score_matrix){
int i,j;
cout << "\t\t";
for (j=0; j<=q_length; j++)
{
cout << query_str[j] << "\t";
}
cout << endl;
for (i=0; i<=t_length; i++)
{
for (j=0; j<=q_length; j++)
{
if (j==q_length)
{
cout << score_matrix[i][j] << endl;
}
else if (j==0 && i==0)
{
cout << "\t" << score_matrix[i][j] << "\t";
}
else if (j==0 && i!=0)
{
cout << target_str[i-1] << "\t" << score_matrix[i][j] << "\t";
}
else
{
cout << score_matrix[i][j] << "\t";
}
}
}
}
void cal_score(int **score_matrix){
for (int i=1; i<=t_length; i++)
{
for (int j=1; j<=q_length; j++)
{
if (score_matrix[i-1][j-1]>=score_matrix[i-1][j] && score_matrix[i-1][j-1]>=score_matrix[i][j-1]){
score_matrix[i][j] = score_matrix[i-1][j-1];
if (target_str[i-1] == query_str[j-1]){
score_matrix[i][j]+=match_score;
}
else {
score_matrix[i][j]+=mismatch_score;
}
}
else {
score_matrix[i][j] = (score_matrix[i][j-1]>=score_matrix[i-1][j])?score_matrix[i][j-1]:score_matrix[i-1][j];
score_matrix[i][j]+=gap_score;
}
}
}
}
typedef struct Result
{
string query;
string target;
}Result;
Result back_tracking(int **score_matrix){
Result result;
int i = t_length;
int j = q_length;
while (i>0 && j>0){
if ((score_matrix[i-1][j-1]>=score_matrix[i-1][j]) && (score_matrix[i-1][j-1]>=score_matrix[i][j-1])){
result.target += target_str[i-1];
result.query += query_str[j-1];
i--;
j--;
}
else if ((score_matrix[i][j-1]>score_matrix[i-1][j-1]) && (score_matrix[i][j-1]>score_matrix[i-1][j])){
result.target += "-";
result.query += query_str[j-1];
j--;
}
else{
result.target += target_str[i-1];
result.query += "-";
i--;
}
}
return result;
}
void print_seq(Result result){
for (int i=result.query.size(); i>0; i--){
cout<<result.query[i-1];
}
cout<<endl;
for (int i=result.query.size(); i>0; i--){
if (result.query[i-1] == result.target[i-1]){
cout << "|";
}
else{
cout << " ";
}
}
cout<<endl;
for (int i=result.target.size(); i>0; i--){
cout<<result.target[i-1];
}
cout<<endl;
}
int main()
{
int **score_matrix = new int *[t_length+1];
for (int i=0; i<=t_length; i++){
score_matrix[i] = new int [q_length+1];
}
init_matrix(score_matrix);
cout << "init score matrix" << endl << "--------" << endl;
print_matrix(score_matrix);
cal_score(score_matrix);
cout << "scored matrix" << endl << "--------" << endl;
print_matrix(score_matrix);
Result result = back_tracking(score_matrix);
print_seq(result);
}
代码运行结果如下: