c++ 计算引物退火温度

引物(primer)的退火温度(Tm)是指引物与目标DNA序列形成双链DNA的温度。盐浓度、引物长度、引物的碱基序列等都可影响Tm。

本代码参考primer3源码 https://github.com/primer3-org/primer3  

//
//  main.cpp
//  test3
//
//  Created by  zhengxueming on 2024/4/22.
//

#include <iostream>
#include <math.h>
#include <string.h>
#include <map>

//计算引物的退火温度Tm, 引物长度一般为 2 ~ 36 base
using namespace std;

typedef enum tm_method_type {
        breslauer_auto      = 0,
        santalucia_auto     = 1,
} tm_method_type;

typedef enum salt_correction_type {
        schildkraut    = 0,
        santalucia     = 1,
        owczarzy       = 2,
} salt_correction_type;


#define PRIMER_ERROR -1.99
#define T_KELVIN 273.15

/* Table 1 (old parameters):
 * See table 2 in the paper [Breslauer KJ, Frank R, Blˆcker H and
 * Marky LA (1986) "Predicting DNA duplex stability from the base
 * sequence" Proc Natl Acad Sci 83:4746-50
 * http://dx.doi.org/10.1073/pnas.83.11.3746]
 */
const map<std::string, int> getBreslauerMap(){
    const map<std::string, int> breslauer_map = {{"DS_A_A", 222}, {"DS_A_C", 224},{"DS_A_G", 210}, {"DS_A_T", 204}, {"DS_A_N", 224}, {"DS_C_A", 227}, {"DS_C_C", 199}, {"DS_C_G", 272}, {"DS_C_T", 210}, {"DS_C_N", 272},
        {"DS_G_A", 222}, {"DS_G_C", 244}, {"DS_G_G", 199},{"DS_G_T", 224}, {"DS_G_N", 244},
        {"DS_T_A", 213}, {"DS_T_C", 222}, {"DS_T_G", 227},{"DS_T_T", 222}, {"DS_T_N", 227},
        {"DS_N_A", 168}, {"DS_N_C", 210}, {"DS_N_G", 220},{"DS_N_T", 215}, {"DS_N_N", 220},
        {"DH_A_A", 79}, {"DH_A_C", 84}, {"DH_A_G", 78},{"DH_A_T", 72}, {"DH_A_N", 72},
        {"DH_C_A", 85}, {"DH_C_C", 80}, {"DH_C_G", 106},{"DH_C_T", 78}, {"DH_C_N", 78},
        {"DH_G_A", 82}, {"DH_G_C", 98}, {"DH_G_G", 80},{"DH_G_T", 84}, {"DH_G_N", 80},
        {"DH_T_A", 72}, {"DH_T_C", 82}, {"DH_T_G", 85},{"DH_T_T", 79}, {"DH_T_N", 72},
        {"DH_N_A", 72}, {"DH_N_C", 80}, {"DH_N_G", 78},{"DH_N_T", 72}, {"DH_N_N", 72},
        {"DG_A_A", 1000}, {"DG_A_C", 1440}, {"DG_A_G", 1280},{"DG_A_T", 880}, {"DG_A_N", 880},
        {"DG_C_A", 1450}, {"DG_C_C", 1840}, {"DG_C_G", 2170},{"DG_C_T", 1280}, {"DG_C_N", 1450},
        {"DG_G_A", 1300}, {"DG_G_C", 2240}, {"DG_G_G", 1840},{"DG_G_T", 1440}, {"DG_G_N", 1300},
        {"DG_T_A", 580}, {"DG_T_C", 1300}, {"DG_T_G", 1450},{"DG_T_T", 1000}, {"DG_T_N", 580},
        {"DG_N_A", 580}, {"DG_N_C", 1300}, {"DG_N_G", 1280},{"DG_N_T", 880}, {"DG_N_N", 580},
    };
    return breslauer_map;
}


/* Table 2, new parameters:
 * Tables of nearest-neighbor thermodynamics for DNA bases, from the
 * paper [SantaLucia JR (1998) "A unified view of polymer, dumbbell
 * and oligonucleotide DNA nearest-neighbor thermodynamics", Proc Natl
 * Acad Sci 95:1460-65 http://dx.doi.org/10.1073/pnas.95.4.1460]
 */

const map<std::string, int> getSantaluciaMap(){
    const map<std::string, int> santalucia_map = {
        {"S_A_A", 240}, {"S_A_C", 173}, {"S_A_G", 208}, {"S_A_T", 239}, {"S_A_N", 215},
        {"S_C_A", 129}, {"S_C_C", 266}, {"S_C_G", 278}, {"S_C_T", 208}, {"S_C_N", 220},
        {"S_G_A", 135}, {"S_G_C", 267}, {"S_G_G", 266}, {"S_G_T", 173}, {"S_G_N", 210},
        {"S_T_A", 169}, {"S_T_C", 135}, {"S_T_G", 129}, {"S_T_T", 240}, {"S_T_N", 168},
        {"S_N_A", 168}, {"S_N_C", 210}, {"S_N_G", 220}, {"S_N_T", 215}, {"S_N_N", 203},
        {"H_A_A", 91}, {"H_A_C", 65}, {"H_A_G", 78}, {"H_A_T", 86}, {"H_A_N", 80},
        {"H_C_A", 58}, {"H_C_C", 110}, {"H_C_G", 119}, {"H_C_T", 78}, {"H_C_N", 91},
        {"H_G_A", 56}, {"H_G_C", 111}, {"H_G_G", 110}, {"H_G_T", 65}, {"H_G_N", 85},
        {"H_T_A", 60}, {"H_T_C", 56}, {"H_T_G", 58}, {"H_T_T", 91}, {"H_T_N", 66},
        {"H_N_A", 66}, {"H_N_C", 85}, {"H_N_G", 91}, {"H_N_T", 80}, {"H_N_N", 80},
        {"G_A_A", 1900}, {"G_A_C", 1300}, {"G_A_G", 1600}, {"G_A_T", 1500}, {"G_A_N", 1575},
        {"G_C_A", 1900}, {"G_C_C", 3100}, {"G_C_G", 3600}, {"G_C_T", 1600}, {"G_C_N", 2550},
        {"G_G_A", 1600}, {"G_G_C", 3100}, {"G_G_G", 3100}, {"G_G_T", 1300}, {"G_G_N", 2275},
        {"G_T_A", 900}, {"G_T_C", 1600}, {"G_T_G", 1900}, {"G_T_T", 1900}, {"G_T_N", 1575},
        {"G_N_A", 1575}, {"G_N_C", 2275}, {"G_N_G", 2550}, {"G_N_T", 1575}, {"G_N_N", 1994}
    };
    
    return santalucia_map;
}


double divalentToMonovalent(double divalent, double dntp) {
   if(divalent==0) dntp=0;
    if(divalent<0 || dntp<0) return PRIMER_ERROR;
   if(divalent<dntp)
     /* According to theory, melting temperature does not depend on
    divalent cations */
     divalent=dntp;
   return 120*(sqrt(divalent-dntp));
}


/* Return 1 if string is symmetrical, 0 otherwise. */
int symmetry(const string seq) {
    char s;
    char e;
    int i = 0;
    unsigned long seq_len=seq.size();
    
    unsigned long mp = seq_len/2;
    if(seq_len%2==1) {
        return 0;
    }
    
    while(i<mp) {
        i++;
        s=seq[i];
        e=seq[0-i];
        if ((s=='A' && e!='T')
            || (s=='T' && e!='A')
            || (e=='A' && s!='T')
            || (e=='T' && s!='A')) {
            return 0;
        }
        if ((s=='C' && e!='G')
            || (s=='G' && e!='C')
            || (e=='C' && s!='G')
            || (e=='G' && s!='C')) {
            return 0;
        }
    }
    return 1;
}

double calculateGCPercent(const string& sequence) {
    int gc_count = 0;
    int total_count = 0;

    for (char nucleotide : sequence) {
        if (nucleotide == 'G' || nucleotide == 'C') {
            gc_count++;
        }
        total_count++;
    }

    // 计算GC含量的百分比
    double gc_percent = (static_cast<double>(gc_count) / total_count);
    
    
    std::cout << "gc_percent: " << gc_percent << std::endl;
    
    
    return gc_percent;
}

/*
 参数介绍:
 const string seq: The sequence.
 double dna_conc: DNA concentration (nanomolar).

 double salt_conc: Salt concentration (millimolar).

 double divalent_conc: Concentration of divalent cations (millimolar)

 double dntp_conc: Concentration of dNTPs (millimolar)

 tm_method_type tm_method: method to calculate Tm according to paper.
 If tm_method==santalucia_auto, then the table of
 nearest-neighbor thermodynamic parameters and method for Tm
 calculation in the paper [SantaLucia JR (1998) "A unified view of
 polymer, dumbbell and oligonucleotide DNA nearest-neighbor
 thermodynamics", Proc Natl Acad Sci 95:1460-65
 http://dx.doi.org/10.1073/pnas.95.4.1460] is used.
 *THIS IS THE RECOMMENDED VALUE*.

 If tm_method==breslauer_auto, then method for Tm
 calculations in the paper [Rychlik W, Spencer WJ and Rhoads RE
 (1990) "Optimization of the annealing temperature for DNA
 amplification in vitro", Nucleic Acids Res 18:6409-12
 http://www.pubmedcentral.nih.gov/articlerender.fcgi?tool=pubmed&pubmedid=2243783].
 and the thermodynamic parameters in the paper [Breslauer KJ, Frank
 R, Blˆcker H and Marky LA (1986) "Predicting DNA duplex stability
 from the base sequence" Proc Natl Acad Sci 83:4746-50
 http://dx.doi.org/10.1073/pnas.83.11.3746], are is used.  This is
 the method and the table that primer3 used up to and including
 version 1.0.1
 
 salt_correction_type salt_corrections: salt correction method according to paper.
 If salt_corrections==schildkraut, then formula for
 salt correction in the paper [Schildkraut, C, and Lifson, S (1965)
 "Dependence of the melting temperature of DNA on salt
 concentration", Biopolymers 3:195-208 (not available on-line)] is
 used.  This is the formula that primer3 used up to and including
 version 1.0.1.

 If salt_corrections==santalucia, then formula for
 salt correction suggested by the paper [SantaLucia JR (1998) "A
 unified view of polymer, dumbbell and oligonucleotide DNA
 nearest-neighbor thermodynamics", Proc Natl Acad Sci 95:1460-65
 http://dx.doi.org/10.1073/pnas.95.4.1460] is used.

 If salt_corrections==owczarzy, then formula for
 salt correction in the paper [Owczarzy, R., Moreira, B.G., You, Y.,
 Behlke, M.A., and Walder, J.A. (2008) "Predicting stability of DNA
 duplexes in solutions containing magnesium and monovalent cations",
 Biochemistry 47:5336-53 http://dx.doi.org/10.1021/bi702363u] is used.

*/

/*
 CR反应体系中:
 每种dNTP的最终浓度通常在200 μM到500 μM之间。即总浓度可以在800 μM到2000 μM之间。
 钾离子浓度在50 mM(毫摩尔)至100 mM之间
 镁离子(Mg^2+)的浓度通常在1 mM到5 mM之间,但具体浓度可能需要优化。
 模板DNA浓度通常在1 ng/μL到100 ng/μL之间
 */

//tm_method=breslauer_auto
//tm_method=santalucia_auto

//salt_corrections=schildkraut
//salt_corrections=santalucia
//salt_corrections=owczarzy


double calPrimerTm(const string seq, double dna_conc=5, double salt_conc=50, double divalent_conc=1, double dntp_conc=1, tm_method_type tm_method=santalucia_auto, salt_correction_type salt_corrections=owczarzy){
    
    int dh = 0; // 焓变
    int ds = 0; //熵变
    double delta_H, delta_S; //最终的焓变和熵变
    
    double Tm; /* Melting temperature */
    
    double correction;
    int sym;
    unsigned long len = seq.size();
    
    if(divalentToMonovalent(divalent_conc, dntp_conc) == PRIMER_ERROR)
        return PRIMER_ERROR;
    
   /** K_mM = K_mM + divalent_to_monovalent(divalent_conc, dntp_conc); **/
    if (tm_method != breslauer_auto
        && tm_method != santalucia_auto)
        return PRIMER_ERROR;
    if (salt_corrections != schildkraut
        && salt_corrections != santalucia
        && salt_corrections != owczarzy)
        return PRIMER_ERROR;

    sym = symmetry(seq); /*Add symmetry correction if seq is symmetrical*/
    if( tm_method == breslauer_auto)
        ds=108;
    else {
        if(sym == 1)
            ds+=14;
        
        /** Terminal AT penalty **/
        //单引号用于表示字符,双引号用于表示字符串
        if(seq[0] == 'A' || seq[0] == 'T') {
            ds += -41;
            dh += -23;
        } else if (seq[0] == 'C' || seq[0] == 'G') {
            ds += 28;
            dh += -1;
        }
        
        if(seq[len-1] == 'A' || seq[len-1] == 'T') {
            ds += -41;
            dh += -23;
        } else if (seq[len-1] == 'C'  || seq[len-1] == 'G') {
            ds += 28;
            dh += -1;
        }
    }
   
    // 根据nearest-neighbor thermodynamics计算dh和ds
    const map<std::string, int> santalucia_map = getSantaluciaMap();
    const map<std::string, int> breslauer_map = getBreslauerMap();

    for (unsigned long i = 0; i < len-1; i++){
        if (tm_method == santalucia_auto) {
            string s_key = "S_" + seq.substr(i, 1) + "_" + seq.substr(i+1, 1);
            string h_key = "H_" + seq.substr(i, 1) + "_" + seq.substr(i+1, 1);
            //const std::map,你不能使用[]运算符来获取某个键对应的值
            //可以使用at()方法来获取特定键对应的值
            ds += santalucia_map.at(s_key);
            dh += santalucia_map.at(h_key);
        } else {
            string ds_key = "DS_" + seq.substr(i, 1) + "_" + seq.substr(i+1, 1);
            string dh_key = "DH_" + seq.substr(i, 1) + "_" + seq.substr(i+1, 1);
            ds += breslauer_map.at(ds_key);
            dh += breslauer_map.at(dh_key);
        }
    }
    
    
    delta_H = dh * -100.0;  /*
                * Nearest-neighbor thermodynamic values for dh
                * are given in 100 cal/mol of interaction.
                */
    delta_S = ds * -0.1;     /*
                 * Nearest-neighbor thermodynamic values for ds
                 * are in in .1 cal/K per mol of interaction.
                 */
    Tm=0;  /* Melting temperature */
    

    /**********************************************/
    if (salt_corrections == schildkraut) {
        salt_conc = salt_conc + divalentToMonovalent(divalent_conc, dntp_conc);
        correction = 16.6 * log10(salt_conc/1000.0) - T_KELVIN;
        Tm = delta_H / (delta_S + 1.987 * log(dna_conc/4000000000.0)) + correction;
    } else if (salt_corrections== santalucia) {
        salt_conc = salt_conc + divalentToMonovalent(divalent_conc, dntp_conc);
        delta_S = delta_S + 0.368 * (len - 1) * log(salt_conc / 1000.0 );
        if(sym == 1) { // primer is symmetrical
            // Equation A
            Tm = delta_H / (delta_S + 1.987 * log(dna_conc/1000000000.0)) - T_KELVIN;
        } else {
            // Equation B
            Tm = delta_H / (delta_S + 1.987 * log(dna_conc/4000000000.0)) - T_KELVIN;
        }
    } else if (salt_corrections == owczarzy) {
        double gc_percent = calculateGCPercent(seq);
        // conc of divalent cations minus dNTP conc
        double free_divalent;
       
        /**** BEGIN: UPDATED SALT BY OWCZARZY *****/
         /* different salt corrections for monovalent (Owczarzy et al.,2004)
         and divalent cations (Owczarzy et al.,2008)
         */
        /* competition bw magnesium and monovalent cations, see Owczarzy et al., 2008 Figure 9 and Equation 16 */
      
        static const double crossover_point = 0.22; /* depending on the value of div_monov_ratio respect
                          to value of crossover_point Eq 16 (divalent corr, Owczarzy et al., 2008)
                          or Eq 22 (monovalent corr, Owczarzy et al., 2004) should be used */
       double div_monov_ratio;
       if(dntp_conc >= divalent_conc) {
           free_divalent = 0.00000000001; /* to not to get log(0) */
       } else {
           free_divalent = (divalent_conc - dntp_conc)/1000.0;
       }
        
        
       static double a = 0,b = 0,c = 0,d = 0,e = 0,f = 0,g = 0;
       if(salt_conc==0) {
           div_monov_ratio = 6.0;
       } else {
           div_monov_ratio = (sqrt(free_divalent))/(salt_conc/1000); /* if conc of monov cations is provided
                                     a ratio is calculated to further calculate
                                     the _correct_ correction */
       }
       if (div_monov_ratio < crossover_point) {
           /* use only monovalent salt correction, Eq 22 (Owczarzy et al., 2004) */
           correction
              = (((4.29 * gc_percent) - 3.95) * pow(10,-5) * log(salt_conc / 1000.0))
                 + (9.40 * pow(10,-6) * (pow(log(salt_conc / 1000.0),2)));
       } else {
           /* magnesium effects are dominant, Eq 16 (Owczarzy et al., 2008) is used */
           b =- 9.11 * pow(10,-6);
           c = 6.26 * pow(10,-5);
           e =- 4.82 * pow(10,-4);
           f = 5.25 * pow(10,-4);
           a = 3.92 * pow(10,-5);
           d = 1.42 * pow(10,-5);
           g = 8.31 * pow(10,-5);
           if(div_monov_ratio < 6.0) {
           /* in particular ratio of conc of monov and div cations
            *             some parameters of Eq 16 must be corrected (a,d,g) */
               a = 3.92 * pow(10,-5) * (0.843 - (0.352 * sqrt(salt_conc/1000.0) * log(salt_conc/1000.0)));
               d = 1.42 * pow(10,-5) * (1.279 - 4.03 * pow(10,-3) * log(salt_conc/1000.0) - 8.03 * pow(10,-3) * pow(log(salt_conc/1000.0),2));
               g = 8.31 * pow(10,-5) * (0.486 - 0.258 * log(salt_conc/1000.0) + 5.25 * pow(10,-3) * pow(log(salt_conc/1000.0),3));
            }
     
            correction = a + (b * log(free_divalent))
                + gc_percent * (c + (d * log(free_divalent)))
                + (1/(2 * (len - 1))) * (e + (f * log(free_divalent))
                      + g * (pow((log(free_divalent)),2)));
        }
        /**** END: UPDATED SALT BY OWCZARZY *****/
        if (sym == 1) {
            /* primer is symmetrical */
            /* Equation A */
            Tm = 1/((1/(delta_H/
                 (delta_S + 1.9872 * log(dna_conc/1000000000.0)))) + correction) - T_KELVIN;
         } else {
             /* Equation B */
             Tm = 1/((1/(delta_H/
                 (delta_S + 1.9872 * log(dna_conc/4000000000.0)))) + correction) - T_KELVIN;
         }
    } /* END else if (salt_corrections == owczarzy) { */
    
    return Tm;
}
    


int main(int argc, const char * argv[]) {
    const string primer1 = "CTTAATCATGAAATCCATANGTC";
    double tm = calPrimerTm(primer1);
    std::cout << "Tm: "  << tm << std::endl;
    return 0;
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值