Hirschberg's algorithm to find string alignment

// Based on Hirschberg's algorithm to find the best alignment for two strings.
// This algorithm is proved to run in O(mn) time and O(m + n) space
// reference:
// https://en.wikipedia.org/wiki/Hirschberg%27s_algorithm
// http://www.cs.princeton.edu/~wayne/kleinberg-tardos/pdf/06DynamicProgrammingII.pdf
// http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Dynamic/Hirsch/




#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <Windows.h>
using namespace std;


vector<int> score(string X, string Y, int alpha, int theta, bool rev)
{
	int m = static_cast<int>(X.length());
	int n = static_cast<int>(Y.length());
	vector<int> ret(n + 1, 0);
	for (int j = 0; j <= n; ret[j] = j * theta, ++j);

	if (rev)
	{
		reverse(X.begin(), X.end());
		reverse(Y.begin(), Y.end());
	}


	for (int i = 1; i <= m; ++i)
	{
		int prev = ret[0];
		ret[0] += theta;
		for (int j = 1; j <= n; ++j)
		{
			int cost = 0;
			

			if (X[i - 1] == Y[j - 1])
				cost = prev;
			else
				cost = prev + alpha;
			cost = min(cost, theta + ret[j]);
			cost = min(cost, theta + ret[j - 1]);
			prev = ret[j];
			ret[j] = cost;
		}
	}
	return ret;
}

pair<string, string> NeedlemanWunsch(string X, string Y, int alpha, int theta)
{
	int m = X.length();
	int n = Y.length();
	vector<vector<int>> M(m + 1, vector<int>(n + 1, 0));
	for (int i = 0; i <= m; ++i)
		M[i][0] = i * theta;
	for (int j = 0; j <= n; ++j)
		M[0][j] = j * theta;

	for (int i = 1; i <= m; ++i)
	{
		for (int j = 1; j <= n; ++j)
		{
			int cost = 0; int kind = 1;
			if (X[i - 1] == Y[j - 1])
				cost = M[i - 1][j - 1];
			else
				cost = M[i - 1][j - 1] + alpha;
			cost = min(cost, theta + M[i - 1][j]);
			cost = min(cost, theta + M[i][j - 1]);
			M[i][j] = cost;
		}
	}

	int i = m, j = n;
	vector<string> align(2);
	// backtracking to get the solution
	while (i >= 1 || j >= 1)
	{
		if ((i > 0 && M[i][j] == theta + M[i - 1][j]) || j == 0)
		{
			align[0].push_back(X[i - 1]);
			align[1].push_back('-');
			--i;
		}
		else if ((j > 0 && M[i][j] == theta + M[i][j - 1]) || i == 0)
		{
			align[0].push_back('-');
			align[1].push_back(Y[j - 1]);
			--j;
		}
		else if (M[i][j] == M[i - 1][j - 1] + alpha || M[i][j] == M[i - 1][j - 1])
		{
			align[0].push_back(X[i - 1]);
			align[1].push_back(Y[j - 1]);
			--i, --j;
		}
	}
	reverse(align[0].begin(), align[0].end());
	reverse(align[1].begin(), align[1].end());

	return {align[0], align[1]};
}

int partitionY(vector<int> &scoreL, vector<int> &scoreR)
{
	int n = static_cast<int>(scoreL.size());
	int minimum = INT_MAX, index = 0;

	reverse(scoreR.begin(), scoreR.end());

	for (int i = 0; i < n; ++i)
	{
		if (scoreL[i] + scoreR[i] < minimum)
		{
			minimum = scoreL[i] + scoreR[i];
			index = i;
		}
	}
	return index;
}

pair<string, string> Hirschberg(string X, string Y, int alpha, int theta)
{
	int m = static_cast<int>(X.length());
	int n = static_cast<int>(Y.length());

	string Z, W;

	if (m == 0)
	{
		return{ string(n, '-'), Y };
	}
	else if (n == 0)
	{
		return{ X, string(m, '-') };
	}
	else if (m == 1 || n == 1)
	{

		return NeedlemanWunsch(X, Y, alpha, theta);
	}
	else
	{
		int xmid = m / 2;

		string X_l = X.substr(0, xmid);
		string X_r = X.substr(xmid, m - xmid);


		int ymid = partitionY(score(X_l, Y, alpha, theta, false), score(X_r, Y, alpha, theta, true));


		string Y_l = Y.substr(0, ymid);
		string Y_r = Y.substr(ymid, n - ymid);


		pair<string, string> ret1 = Hirschberg(X_l, Y_l, alpha, theta);
		pair<string, string> ret2 = Hirschberg(X_r, Y_r, alpha, theta);

		Z = ret1.first + ret2.first;
		W = ret1.second + ret2.second;
	}
	return { Z, W };
}


int main()
{
	string X = "CTACCG";
	string Y = "TACATG";

	string X1 = "AGTACGCA";
	string Y1 = "TATGC";
	
	auto ret1 = Hirschberg(X, Y, 1, 2);
	cout << "X: " << X << endl;
	cout << "Y: " << Y << endl;
	cout << "the alignment for X and Y:" << endl;
	cout << ret1.first << endl;
	cout << ret1.second << endl;

	cout << "--------------------------------------------------" << endl;

	auto ret2 = Hirschberg(X1, Y1, 1, 2);
	cout << "X1: " << X << endl;
	cout << "Y1: " << Y << endl;
	cout << "the alignment for X1 and Y1:" << endl;
	cout << ret2.first << endl;
	cout << ret2.second << endl;

	system("PAUSE");
	return 0;
}

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值