序列比对算法

#include <fstream>
#include <iostream>
#include <string>
#include <list>
using namespace std;

#define MATCH 0            //字符相同时的代价
#define MISMATCH 3        //字符不同时的代价
#define GAP 2            //补充空格时的代价

//Coordinates结构体保存的是比对路径坐标
struct Coordinates
{
    int row,col;        //行和列坐标

    //重载  <  运算符,在坐标队列排序时调用(list的sort()方法)
    bool operator < (Coordinates &b)
    {
        if (row < b.row)
            return true;
        else
            if (row == b.row)
                return col < b.col;
        else
            return false;
    }
    //重载  ==  运算符,在坐标队列比较时调用(list的unique()方法)
    bool operator == (Coordinates &b)
    {
        return (row == b.row) && (col == b.col);
    }
};

//Record记录用Alignment函数比对的结果
struct Record
{
    int currow,curcol;    //当前的坐标
    int identifier;        //它的前驱在数组里的位置
};
class SqAlignment
{
private:
    string X, Y;                                            //要进行比对的字符串
    unsigned int m, n;                                    //X,Y的长度
    unsigned int gaps, mismatchs, matchs;
    const char *pX, *pY;                                    //字符串转化为数组
    list<Coordinates> P;                                //记录比对坐标的关键队列
    long **SEA, **BSEA;                                    //SpaceEfficientAlignment和BackSpaceEfficientAlignment函数用到的数组
    Record *record;                                        //记录路径
    string sX, sY;                                        //进行比对后的字符串
    unsigned long cost;                                    //比对总代价
    double similarity;                                    //相似度
public:
    SqAlignment(string,string);                            //构造函数,接受需要比对的字符串
    ~SqAlignment();                                        //析构函数
    void Alignment(int, int, int, int);                    //处理比对函数
    unsigned long Min(unsigned long, unsigned long, unsigned long);//求最小值
    int Match(char,char);                                //计分规则
    void TraceBack(int);                                //回溯函数,寻找比对路径
    void SpaceEfficientAlignment(int, int, int, int);    //在DivideandConquerAlignment中处理前一半数据
    void BackSpaceEfficientAlignment(int, int, int, int);//在DivideandConquerAlignment中处理后一半数据
    void DivideandConquerAlignment(int, int, int, int);    //分置函数
    void PrintList();                                    //打印比对坐标队列
    void PrintCount()                            //打印比对统计结果
    {
        cout << "比对总代价:" << cost
             << "\n比对后的字符串长度:" << sX.size()
             << "\n相同字符的个数:" << matchs
             << "\n空格的个数:" << gaps
             << "\n不同字符的个数:" << mismatchs << endl;
    }
    bool SaveResultStr(string, string);                    //保存结果序列
    void CreateResultStr();                                //根据坐标队列生成结果序列
    void PrintResultStr()                                //打印结果序列
    {
        cout << sX << '\n' << sY << endl;
    }
    unsigned int GetXLength()
    {
        return m;
    }
    unsigned int GetYLength()
    {
        return n;
    }
    double GetSimilarity()                                //计算相似度
    {
        return 1.0 - double(cost)/double(sX.size() * MISMATCH);
    }
};

void SqAlignment::PrintList()
{
    list<Coordinates>::iterator iter;
    for (iter = P.begin(); iter != P.end(); iter++)
        cout <<static_cast<Coordinates>(*iter).row << '\t'<<static_cast<Coordinates>(*iter).col<<'\n';
}

unsigned long SqAlignment::Min(unsigned long a, unsigned long b, unsigned long c)
{
    if(a <= b && a <= c)
        return a;
    else
        if(b <= a && b <= c)
            return b;
        else
            return c;
}

SqAlignment::SqAlignment(string x, string y)
{
    X = x;
    Y = y;
    m = X.size();
    n = Y.size();
    gaps = matchs = mismatchs = cost = 0;
    similarity = 0.0;
    pX = X.c_str();
    pY = Y.c_str();
    SEA = new long *[m+1];
    BSEA = new long *[m+2];
    for (int i = 0; i <= m; i++)
    {
        SEA[i] = new long [2];
        BSEA[i] = new long [2];
    }
    BSEA[m+1] = new long [2];

}

SqAlignment::~SqAlignment()
{
    delete []SEA;
    delete []BSEA;
}

int SqAlignment::Match(char i, char j)
{
    if (i == j)                                //如果两个字符相同,付出0代价
        return MATCH;
    else
        return MISMATCH;                    //不相同扣MISMATCH分
}

void SqAlignment::Alignment(int xbegin, int xend, int ybegin, int yend)
{
    int len1 = xend-xbegin+2;
    int len2 = yend-ybegin+2;
    record = new Record [len1*len2];
    int **s = new int *[len1];
    int k = 0;

    for (int i = 0; i <= len1-1; i++)
    {
        s[i] = new int [len2];
        s[i][0] = i * GAP;
        record[i*len2].identifier = (i-1)*len2;
        record[i*len2].currow = i;
        record[i*len2].curcol = 0;
    }
    for (i = 0; i <= len2-1; i++)
    {
        s[0][i] = i * GAP;
        record[i].identifier = i - 1;
        record[i].currow = 0;
        record[i].curcol = i;
    }
    for (int j = 1; j <= len2-1; j++)
    {
        for (int i = 1; i <= len1-1; i++)
        {
            k = i*len2+j;
            unsigned long a = Match(pX[i+xbegin-2], pY[j+ybegin-2]) + s[i-1][j-1];
            unsigned long b = GAP + s[i][j-1];
            unsigned long c = GAP + s[i-1][j];
            record[k].currow = xbegin + i - 1;
            record[k].curcol = ybegin + j - 1;
            if (a <= b && a <= c)
            {
                s[i][j] = a;
                record[k].identifier = (i-1) * len2 + j - 1;
            }
            else
                if (b <= a && b <= c)
                {
                    s[i][j] = b;
                    record[k].identifier = i * len2 + j - 1;
                }
            else
            {
                s[i][j] = c;
                record[k].identifier = (i-1) * len2 + j ;
            }
        }
    }
    delete []s;
    TraceBack(len1*len2-1);
    delete record;
}

void SqAlignment::TraceBack(int num)
{
    if ((num > 0) && record[num].currow && record[num].curcol)
    {
        Coordinates a;
        a.row = record[num].currow;
        a.col = record[num].curcol;
        P.push_back(a);
        TraceBack(record[num].identifier);
    }
}

bool SqAlignment::SaveResultStr(string fXaddress, string fYaddress)
{
    fXaddress.append(".txt");
    fYaddress.append(".txt");

    ofstream foutX,foutY;

    foutX.open(fXaddress.c_str(),ios::out);
    foutY.open(fYaddress.c_str(),ios::out);

    if (!foutX || !foutY)
        return false;

    cout << "正在生成文件" << fXaddress << "...\n";
    foutX << sX;
    cout << "生成" << fXaddress << "成功。\n\n";
    cout << "正在生成文件" << fYaddress << "...\n";
    foutY << sY;
    cout << "生成" << fYaddress << "成功。\n";
    foutX.close();
    foutY.close();

    return true;
}

void SqAlignment::CreateResultStr()
{
    P.sort();
    P.unique();

    int rowidentifier=1,colidentifier=1;
    list<Coordinates>::iterator iter,iter1;
    iter1 = P.end();
    int row,col;

    for (iter = P.begin(); iter != P.end(); iter++)
    {
        row = static_cast<Coordinates>(*iter).row;
        col = static_cast<Coordinates>(*iter).col;
        while (row > rowidentifier)
        {
            sX.append(1,pX[rowidentifier-1]);
            sY.append(1,'-');
            rowidentifier++;
            cost = cost + GAP;
            gaps++;
        }
        while (col > colidentifier)
        {
            sY.append(1,pY[colidentifier-1]);
            sX.append(1,'-');
            colidentifier++;
            cost = cost + GAP;
            gaps++;
        }
        if (row == static_cast<Coordinates>(*iter1).row)
        {
            sY.append(1,pY[static_cast<Coordinates>(*iter).col-1]);
            sX.append(1,'-');
            iter1 = iter;
            rowidentifier++;
            colidentifier++;
            cost = cost + GAP;
            gaps++;
            continue;
        }
        if(col == static_cast<Coordinates>(*iter1).col)
        {
            sX.append(1,pX[row-1]);
            sY.append(1,'-');
            iter1 = iter;
            rowidentifier++;
            colidentifier++;
            cost = cost + GAP;
            gaps++;
            continue;
        }

        sX.append(1,pX[row-1]);
        sY.append(1,pY[col-1]);
        if (MATCH == Match(pX[row-1], pY[col-1]))
            matchs++;
        else
            mismatchs++;
        iter1 = iter;
        rowidentifier++;
        colidentifier++;
        cost = cost + Match(pX[row-1], pY[col-1]);
    }
}

void SqAlignment::SpaceEfficientAlignment(int xbegin, int xend, int ybegin, int yend)
{
    
    for (int i = xbegin-1; i <= xend; i++)
    {
        SEA[i][0] = (i-xbegin+1) * GAP;
    }

    for (int j = ybegin; j <= yend; j++)
    {
        SEA[xbegin-1][1] = (j-ybegin+1) * GAP;
        for (int i = xbegin; i <= xend; i++)
        {
            SEA[i][1] = Min(Match(pX[i-1], pY[j-1]) + SEA[i-1][0], GAP + SEA[i-1][1], GAP + SEA[i][0]);
        }
        if (j != yend)
        {
            for (i = xbegin-1; i <= xend; i++)
                SEA[i][0] = SEA[i][1];
    
        }
    }
}

void SqAlignment::BackSpaceEfficientAlignment(int xbegin, int xend, int ybegin, int yend)
{
    for (int i = xbegin; i <= xend+1; i++)
    {
        BSEA[i][0] = (xend+1-i) * GAP;
    }

    for (int j = yend; j >= ybegin; j--)
    {
        BSEA[xend+1][1] = (yend+1-j) * GAP;
        for (int i = xend; i >= xbegin; i--)
        {
            BSEA[i][1] = Min(Match(pX[i-1], pY[j-1]) + BSEA[i+1][0], GAP + BSEA[i+1][1], GAP + BSEA[i][0]);
        }
        if (j != ybegin)
        {
            for (i = xbegin; i <= xend+1; i++)
                BSEA[i][0] = BSEA[i][1];
    
        }
    }
}

void SqAlignment::DivideandConquerAlignment(int xbegin, int xend, int ybegin, int yend)
{
    int len1 = xend - xbegin + 1;
    int len2 = yend - ybegin + 1;
    if (len1 <= 2 || len2 <=2)
        Alignment(xbegin, xend, ybegin, yend);
    else
    {
        SpaceEfficientAlignment(xbegin, xend, ybegin, (yend+ybegin)/2);
        BackSpaceEfficientAlignment(xbegin, xend, (yend+ybegin)/2, yend);

        int p = xbegin;
        for (int i = xbegin; i <= xend; i++)
        {
            if ((SEA[i][1] + BSEA[i][1]) < (SEA [p][1] + BSEA[p][1]))
                p = i;
        }
        Coordinates a;
        a.row = p;
        a.col = (yend + ybegin) / 2;
        P.push_back(a);
        int xe, ye, xb, yb;
        unsigned long ca = Match(pX[p-1], pY[(yend + ybegin) / 2-1]) + SEA[p-1][0], cb = GAP + SEA[p-1][1], cc = GAP + SEA[p][0];
        if (ca <= cb && ca <= cc)
        {
            xe = p - 1;
            ye = (yend + ybegin) / 2-1;
        }
        else
            if (cb <= ca && cb <= cc)
            {
                xe = p-1;
                ye = (yend + ybegin) / 2;
            }
        else
        {
            xe = p;
            ye = (yend + ybegin) / 2-1;
        }

        ca = Match(pX[p-1], pY[(yend + ybegin) / 2-1]) + BSEA[p+1][0], cb = GAP + BSEA[p-1][1], cc = GAP + BSEA[p][0];
        if (ca <= cb && ca <= cc)
        {
            xb = p + 1;
            yb = (yend + ybegin) / 2+1;
        }
        else
            if (cb <= ca && cb <= cc)
            {
                xb = p+1;
                yb = (yend + ybegin) / 2;
            }
        else
        {
            xb = p;
            yb = (yend + ybegin) / 2+1;
        }
        DivideandConquerAlignment(xbegin, xe, ybegin, ye);
        DivideandConquerAlignment(xb, xend, yb, yend);
    }
}
int main(void)
{
    cout << "***********************************************************\n"
         << "*                     序列比对算法                        *\n"
         << "*         时间复杂度为O(mn) | 空间复杂度为O(m+n)          *\n"
         << "*                                                         *\n"
         << "*比对规则:                                               *\n"
         << "*         两字符相同时付出的代价为:0                     *\n"
         << "*         补充空格时付出的代价为  :2                     *\n"
         << "*         两字符不同是付出的代价为:3                     *\n"
         << "***********************************************************\n"
         << endl;

    string ReadData(string, char);
    char choosein =0;
    string X,Y;
    string fXaddress="",fYaddress="";
    cout << "你是要从文件读入还是输入字符串(输入1从文件读入,否则自己输入字符串):";
    cin >> choosein;
    cin.clear();
    cin.sync();
    if ('1' != choosein)
    {
        cout << "输入字符串X:";
        cin >> X;
        cout << "输入字符串Y:";
        cin >> Y;
    }
    else
    {
        
        char rule;

        cout << "输入第一个文件地址:";
        cin >> fXaddress;
        cout << "输入第二个文件地址:";
        cin >> fYaddress;

        cout << "是否忽略空格和换行符?y/n:";
        cin >> rule;
        while (rule != 'y' && rule != 'n' && rule != 'Y' && rule != 'N')
        {
            cout << "输入的指令不正确,请输入Y(y)/N(n):";
            cin >> rule;
        }
    
        X = ReadData(fXaddress, rule);
        Y = ReadData(fYaddress, rule);
    }
    if ("" == X || "" == Y)
    {
        cout << "字符串都不能为空。" << endl;
        abort();
    }
    SqAlignment s (X, Y);
    cout << "正在比对字符串序列...\n" << endl;
    s.DivideandConquerAlignment(1, X.size(), 1, Y.size());
    s.CreateResultStr();
    cout << "序列比对结束\n";
    cout << "字符串X的长度为" << s.GetXLength() << '\n';
    cout << "字符串Y的长度为" << s.GetYLength() << '\n';
    s.PrintCount();
    cout << "字符串的相似度为:" << s.GetSimilarity() << '\n';
    char chooseout = 0;

    cout << "打印比对后的字符串还是将其保存为文件?(输入1仅打印,输入2即打印又保存为文件,否则仅保存为文件):";
    cin >> chooseout;
    switch (chooseout)
    {
    case '1':
        s.PrintResultStr();
        break;
    case '2':
        s.PrintResultStr();
    default:
        bool identifier = false;
        if ('1' != choosein)
            identifier = s.SaveResultStr("str1.txt", "str2.txt");
        else
            identifier = s.SaveResultStr(fXaddress, fYaddress);
        if (false == identifier)
        {
            cout << "文件生成失败" << endl;
            abort();
        }
    }
    
    //s.PrintList();
    cin.clear();
    cin.sync();
    getchar();
    return 0;
 }

string ReadData(string address, char rule)
{
    string str,tmp;

    ifstream fin;
    fin.open(address.c_str(),ios::in);

    if (!fin)
    {
        cout << "打开文件" << address << "失败。" << endl;
        abort();
    }

    cout << "\n正在读入" << address << "...\n" << endl;
    if (rule == 'y' || rule == 'Y')
    {
        while (fin)
        {
            fin >> tmp;
            str.append(tmp);
        }
    }
    else
    {
        while (fin)
        {
            fin >> tmp;
            str.append(tmp);
            str.append(1,' ');
        }
    }
    cout << "读入" << address << "成功。\n";
    
    fin.close();
    
    return str;
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值