引物(primer)的退火温度(Tm)是指引物与目标DNA序列形成双链DNA的温度。盐浓度、引物长度、引物的碱基序列等都可影响Tm。
本代码参考primer3源码 https://github.com/primer3-org/primer3
//
// main.cpp
// test3
//
// Created by zhengxueming on 2024/4/22.
//
#include <iostream>
#include <math.h>
#include <string.h>
#include <map>
//计算引物的退火温度Tm, 引物长度一般为 2 ~ 36 base
using namespace std;
typedef enum tm_method_type {
breslauer_auto = 0,
santalucia_auto = 1,
} tm_method_type;
typedef enum salt_correction_type {
schildkraut = 0,
santalucia = 1,
owczarzy = 2,
} salt_correction_type;
#define PRIMER_ERROR -1.99
#define T_KELVIN 273.15
/* Table 1 (old parameters):
* See table 2 in the paper [Breslauer KJ, Frank R, Blˆcker H and
* Marky LA (1986) "Predicting DNA duplex stability from the base
* sequence" Proc Natl Acad Sci 83:4746-50
* http://dx.doi.org/10.1073/pnas.83.11.3746]
*/
const map<std::string, int> getBreslauerMap(){
const map<std::string, int> breslauer_map = {{"DS_A_A", 222}, {"DS_A_C", 224},{"DS_A_G", 210}, {"DS_A_T", 204}, {"DS_A_N", 224}, {"DS_C_A", 227}, {"DS_C_C", 199}, {"DS_C_G", 272}, {"DS_C_T", 210}, {"DS_C_N", 272},
{"DS_G_A", 222}, {"DS_G_C", 244}, {"DS_G_G", 199},{"DS_G_T", 224}, {"DS_G_N", 244},
{"DS_T_A", 213}, {"DS_T_C", 222}, {"DS_T_G", 227},{"DS_T_T", 222}, {"DS_T_N", 227},
{"DS_N_A", 168}, {"DS_N_C", 210}, {"DS_N_G", 220},{"DS_N_T", 215}, {"DS_N_N", 220},
{"DH_A_A", 79}, {"DH_A_C", 84}, {"DH_A_G", 78},{"DH_A_T", 72}, {"DH_A_N", 72},
{"DH_C_A", 85}, {"DH_C_C", 80}, {"DH_C_G", 106},{"DH_C_T", 78}, {"DH_C_N", 78},
{"DH_G_A", 82}, {"DH_G_C", 98}, {"DH_G_G", 80},{"DH_G_T", 84}, {"DH_G_N", 80},
{"DH_T_A", 72}, {"DH_T_C", 82}, {"DH_T_G", 85},{"DH_T_T", 79}, {"DH_T_N", 72},
{"DH_N_A", 72}, {"DH_N_C", 80}, {"DH_N_G", 78},{"DH_N_T", 72}, {"DH_N_N", 72},
{"DG_A_A", 1000}, {"DG_A_C", 1440}, {"DG_A_G", 1280},{"DG_A_T", 880}, {"DG_A_N", 880},
{"DG_C_A", 1450}, {"DG_C_C", 1840}, {"DG_C_G", 2170},{"DG_C_T", 1280}, {"DG_C_N", 1450},
{"DG_G_A", 1300}, {"DG_G_C", 2240}, {"DG_G_G", 1840},{"DG_G_T", 1440}, {"DG_G_N", 1300},
{"DG_T_A", 580}, {"DG_T_C", 1300}, {"DG_T_G", 1450},{"DG_T_T", 1000}, {"DG_T_N", 580},
{"DG_N_A", 580}, {"DG_N_C", 1300}, {"DG_N_G", 1280},{"DG_N_T", 880}, {"DG_N_N", 580},
};
return breslauer_map;
}
/* Table 2, new parameters:
* Tables of nearest-neighbor thermodynamics for DNA bases, from the
* paper [SantaLucia JR (1998) "A unified view of polymer, dumbbell
* and oligonucleotide DNA nearest-neighbor thermodynamics", Proc Natl
* Acad Sci 95:1460-65 http://dx.doi.org/10.1073/pnas.95.4.1460]
*/
const map<std::string, int> getSantaluciaMap(){
const map<std::string, int> santalucia_map = {
{"S_A_A", 240}, {"S_A_C", 173}, {"S_A_G", 208}, {"S_A_T", 239}, {"S_A_N", 215},
{"S_C_A", 129}, {"S_C_C", 266}, {"S_C_G", 278}, {"S_C_T", 208}, {"S_C_N", 220},
{"S_G_A", 135}, {"S_G_C", 267}, {"S_G_G", 266}, {"S_G_T", 173}, {"S_G_N", 210},
{"S_T_A", 169}, {"S_T_C", 135}, {"S_T_G", 129}, {"S_T_T", 240}, {"S_T_N", 168},
{"S_N_A", 168}, {"S_N_C", 210}, {"S_N_G", 220}, {"S_N_T", 215}, {"S_N_N", 203},
{"H_A_A", 91}, {"H_A_C", 65}, {"H_A_G", 78}, {"H_A_T", 86}, {"H_A_N", 80},
{"H_C_A", 58}, {"H_C_C", 110}, {"H_C_G", 119}, {"H_C_T", 78}, {"H_C_N", 91},
{"H_G_A", 56}, {"H_G_C", 111}, {"H_G_G", 110}, {"H_G_T", 65}, {"H_G_N", 85},
{"H_T_A", 60}, {"H_T_C", 56}, {"H_T_G", 58}, {"H_T_T", 91}, {"H_T_N", 66},
{"H_N_A", 66}, {"H_N_C", 85}, {"H_N_G", 91}, {"H_N_T", 80}, {"H_N_N", 80},
{"G_A_A", 1900}, {"G_A_C", 1300}, {"G_A_G", 1600}, {"G_A_T", 1500}, {"G_A_N", 1575},
{"G_C_A", 1900}, {"G_C_C", 3100}, {"G_C_G", 3600}, {"G_C_T", 1600}, {"G_C_N", 2550},
{"G_G_A", 1600}, {"G_G_C", 3100}, {"G_G_G", 3100}, {"G_G_T", 1300}, {"G_G_N", 2275},
{"G_T_A", 900}, {"G_T_C", 1600}, {"G_T_G", 1900}, {"G_T_T", 1900}, {"G_T_N", 1575},
{"G_N_A", 1575}, {"G_N_C", 2275}, {"G_N_G", 2550}, {"G_N_T", 1575}, {"G_N_N", 1994}
};
return santalucia_map;
}
double divalentToMonovalent(double divalent, double dntp) {
if(divalent==0) dntp=0;
if(divalent<0 || dntp<0) return PRIMER_ERROR;
if(divalent<dntp)
/* According to theory, melting temperature does not depend on
divalent cations */
divalent=dntp;
return 120*(sqrt(divalent-dntp));
}
/* Return 1 if string is symmetrical, 0 otherwise. */
int symmetry(const string seq) {
char s;
char e;
int i = 0;
unsigned long seq_len=seq.size();
unsigned long mp = seq_len/2;
if(seq_len%2==1) {
return 0;
}
while(i<mp) {
i++;
s=seq[i];
e=seq[0-i];
if ((s=='A' && e!='T')
|| (s=='T' && e!='A')
|| (e=='A' && s!='T')
|| (e=='T' && s!='A')) {
return 0;
}
if ((s=='C' && e!='G')
|| (s=='G' && e!='C')
|| (e=='C' && s!='G')
|| (e=='G' && s!='C')) {
return 0;
}
}
return 1;
}
double calculateGCPercent(const string& sequence) {
int gc_count = 0;
int total_count = 0;
for (char nucleotide : sequence) {
if (nucleotide == 'G' || nucleotide == 'C') {
gc_count++;
}
total_count++;
}
// 计算GC含量的百分比
double gc_percent = (static_cast<double>(gc_count) / total_count);
std::cout << "gc_percent: " << gc_percent << std::endl;
return gc_percent;
}
/*
参数介绍:
const string seq: The sequence.
double dna_conc: DNA concentration (nanomolar).
double salt_conc: Salt concentration (millimolar).
double divalent_conc: Concentration of divalent cations (millimolar)
double dntp_conc: Concentration of dNTPs (millimolar)
tm_method_type tm_method: method to calculate Tm according to paper.
If tm_method==santalucia_auto, then the table of
nearest-neighbor thermodynamic parameters and method for Tm
calculation in the paper [SantaLucia JR (1998) "A unified view of
polymer, dumbbell and oligonucleotide DNA nearest-neighbor
thermodynamics", Proc Natl Acad Sci 95:1460-65
http://dx.doi.org/10.1073/pnas.95.4.1460] is used.
*THIS IS THE RECOMMENDED VALUE*.
If tm_method==breslauer_auto, then method for Tm
calculations in the paper [Rychlik W, Spencer WJ and Rhoads RE
(1990) "Optimization of the annealing temperature for DNA
amplification in vitro", Nucleic Acids Res 18:6409-12
http://www.pubmedcentral.nih.gov/articlerender.fcgi?tool=pubmed&pubmedid=2243783].
and the thermodynamic parameters in the paper [Breslauer KJ, Frank
R, Blˆcker H and Marky LA (1986) "Predicting DNA duplex stability
from the base sequence" Proc Natl Acad Sci 83:4746-50
http://dx.doi.org/10.1073/pnas.83.11.3746], are is used. This is
the method and the table that primer3 used up to and including
version 1.0.1
salt_correction_type salt_corrections: salt correction method according to paper.
If salt_corrections==schildkraut, then formula for
salt correction in the paper [Schildkraut, C, and Lifson, S (1965)
"Dependence of the melting temperature of DNA on salt
concentration", Biopolymers 3:195-208 (not available on-line)] is
used. This is the formula that primer3 used up to and including
version 1.0.1.
If salt_corrections==santalucia, then formula for
salt correction suggested by the paper [SantaLucia JR (1998) "A
unified view of polymer, dumbbell and oligonucleotide DNA
nearest-neighbor thermodynamics", Proc Natl Acad Sci 95:1460-65
http://dx.doi.org/10.1073/pnas.95.4.1460] is used.
If salt_corrections==owczarzy, then formula for
salt correction in the paper [Owczarzy, R., Moreira, B.G., You, Y.,
Behlke, M.A., and Walder, J.A. (2008) "Predicting stability of DNA
duplexes in solutions containing magnesium and monovalent cations",
Biochemistry 47:5336-53 http://dx.doi.org/10.1021/bi702363u] is used.
*/
/*
CR反应体系中:
每种dNTP的最终浓度通常在200 μM到500 μM之间。即总浓度可以在800 μM到2000 μM之间。
钾离子浓度在50 mM(毫摩尔)至100 mM之间
镁离子(Mg^2+)的浓度通常在1 mM到5 mM之间,但具体浓度可能需要优化。
模板DNA浓度通常在1 ng/μL到100 ng/μL之间
*/
//tm_method=breslauer_auto
//tm_method=santalucia_auto
//salt_corrections=schildkraut
//salt_corrections=santalucia
//salt_corrections=owczarzy
double calPrimerTm(const string seq, double dna_conc=5, double salt_conc=50, double divalent_conc=1, double dntp_conc=1, tm_method_type tm_method=santalucia_auto, salt_correction_type salt_corrections=owczarzy){
int dh = 0; // 焓变
int ds = 0; //熵变
double delta_H, delta_S; //最终的焓变和熵变
double Tm; /* Melting temperature */
double correction;
int sym;
unsigned long len = seq.size();
if(divalentToMonovalent(divalent_conc, dntp_conc) == PRIMER_ERROR)
return PRIMER_ERROR;
/** K_mM = K_mM + divalent_to_monovalent(divalent_conc, dntp_conc); **/
if (tm_method != breslauer_auto
&& tm_method != santalucia_auto)
return PRIMER_ERROR;
if (salt_corrections != schildkraut
&& salt_corrections != santalucia
&& salt_corrections != owczarzy)
return PRIMER_ERROR;
sym = symmetry(seq); /*Add symmetry correction if seq is symmetrical*/
if( tm_method == breslauer_auto)
ds=108;
else {
if(sym == 1)
ds+=14;
/** Terminal AT penalty **/
//单引号用于表示字符,双引号用于表示字符串
if(seq[0] == 'A' || seq[0] == 'T') {
ds += -41;
dh += -23;
} else if (seq[0] == 'C' || seq[0] == 'G') {
ds += 28;
dh += -1;
}
if(seq[len-1] == 'A' || seq[len-1] == 'T') {
ds += -41;
dh += -23;
} else if (seq[len-1] == 'C' || seq[len-1] == 'G') {
ds += 28;
dh += -1;
}
}
// 根据nearest-neighbor thermodynamics计算dh和ds
const map<std::string, int> santalucia_map = getSantaluciaMap();
const map<std::string, int> breslauer_map = getBreslauerMap();
for (unsigned long i = 0; i < len-1; i++){
if (tm_method == santalucia_auto) {
string s_key = "S_" + seq.substr(i, 1) + "_" + seq.substr(i+1, 1);
string h_key = "H_" + seq.substr(i, 1) + "_" + seq.substr(i+1, 1);
//const std::map,你不能使用[]运算符来获取某个键对应的值
//可以使用at()方法来获取特定键对应的值
ds += santalucia_map.at(s_key);
dh += santalucia_map.at(h_key);
} else {
string ds_key = "DS_" + seq.substr(i, 1) + "_" + seq.substr(i+1, 1);
string dh_key = "DH_" + seq.substr(i, 1) + "_" + seq.substr(i+1, 1);
ds += breslauer_map.at(ds_key);
dh += breslauer_map.at(dh_key);
}
}
delta_H = dh * -100.0; /*
* Nearest-neighbor thermodynamic values for dh
* are given in 100 cal/mol of interaction.
*/
delta_S = ds * -0.1; /*
* Nearest-neighbor thermodynamic values for ds
* are in in .1 cal/K per mol of interaction.
*/
Tm=0; /* Melting temperature */
/**********************************************/
if (salt_corrections == schildkraut) {
salt_conc = salt_conc + divalentToMonovalent(divalent_conc, dntp_conc);
correction = 16.6 * log10(salt_conc/1000.0) - T_KELVIN;
Tm = delta_H / (delta_S + 1.987 * log(dna_conc/4000000000.0)) + correction;
} else if (salt_corrections== santalucia) {
salt_conc = salt_conc + divalentToMonovalent(divalent_conc, dntp_conc);
delta_S = delta_S + 0.368 * (len - 1) * log(salt_conc / 1000.0 );
if(sym == 1) { // primer is symmetrical
// Equation A
Tm = delta_H / (delta_S + 1.987 * log(dna_conc/1000000000.0)) - T_KELVIN;
} else {
// Equation B
Tm = delta_H / (delta_S + 1.987 * log(dna_conc/4000000000.0)) - T_KELVIN;
}
} else if (salt_corrections == owczarzy) {
double gc_percent = calculateGCPercent(seq);
// conc of divalent cations minus dNTP conc
double free_divalent;
/**** BEGIN: UPDATED SALT BY OWCZARZY *****/
/* different salt corrections for monovalent (Owczarzy et al.,2004)
and divalent cations (Owczarzy et al.,2008)
*/
/* competition bw magnesium and monovalent cations, see Owczarzy et al., 2008 Figure 9 and Equation 16 */
static const double crossover_point = 0.22; /* depending on the value of div_monov_ratio respect
to value of crossover_point Eq 16 (divalent corr, Owczarzy et al., 2008)
or Eq 22 (monovalent corr, Owczarzy et al., 2004) should be used */
double div_monov_ratio;
if(dntp_conc >= divalent_conc) {
free_divalent = 0.00000000001; /* to not to get log(0) */
} else {
free_divalent = (divalent_conc - dntp_conc)/1000.0;
}
static double a = 0,b = 0,c = 0,d = 0,e = 0,f = 0,g = 0;
if(salt_conc==0) {
div_monov_ratio = 6.0;
} else {
div_monov_ratio = (sqrt(free_divalent))/(salt_conc/1000); /* if conc of monov cations is provided
a ratio is calculated to further calculate
the _correct_ correction */
}
if (div_monov_ratio < crossover_point) {
/* use only monovalent salt correction, Eq 22 (Owczarzy et al., 2004) */
correction
= (((4.29 * gc_percent) - 3.95) * pow(10,-5) * log(salt_conc / 1000.0))
+ (9.40 * pow(10,-6) * (pow(log(salt_conc / 1000.0),2)));
} else {
/* magnesium effects are dominant, Eq 16 (Owczarzy et al., 2008) is used */
b =- 9.11 * pow(10,-6);
c = 6.26 * pow(10,-5);
e =- 4.82 * pow(10,-4);
f = 5.25 * pow(10,-4);
a = 3.92 * pow(10,-5);
d = 1.42 * pow(10,-5);
g = 8.31 * pow(10,-5);
if(div_monov_ratio < 6.0) {
/* in particular ratio of conc of monov and div cations
* some parameters of Eq 16 must be corrected (a,d,g) */
a = 3.92 * pow(10,-5) * (0.843 - (0.352 * sqrt(salt_conc/1000.0) * log(salt_conc/1000.0)));
d = 1.42 * pow(10,-5) * (1.279 - 4.03 * pow(10,-3) * log(salt_conc/1000.0) - 8.03 * pow(10,-3) * pow(log(salt_conc/1000.0),2));
g = 8.31 * pow(10,-5) * (0.486 - 0.258 * log(salt_conc/1000.0) + 5.25 * pow(10,-3) * pow(log(salt_conc/1000.0),3));
}
correction = a + (b * log(free_divalent))
+ gc_percent * (c + (d * log(free_divalent)))
+ (1/(2 * (len - 1))) * (e + (f * log(free_divalent))
+ g * (pow((log(free_divalent)),2)));
}
/**** END: UPDATED SALT BY OWCZARZY *****/
if (sym == 1) {
/* primer is symmetrical */
/* Equation A */
Tm = 1/((1/(delta_H/
(delta_S + 1.9872 * log(dna_conc/1000000000.0)))) + correction) - T_KELVIN;
} else {
/* Equation B */
Tm = 1/((1/(delta_H/
(delta_S + 1.9872 * log(dna_conc/4000000000.0)))) + correction) - T_KELVIN;
}
} /* END else if (salt_corrections == owczarzy) { */
return Tm;
}
int main(int argc, const char * argv[]) {
const string primer1 = "CTTAATCATGAAATCCATANGTC";
double tm = calPrimerTm(primer1);
std::cout << "Tm: " << tm << std::endl;
return 0;
}