探针和靶序列之间的杂交热力学参数可以通过计算热力学性质,如熔解温度(Tm)、热力学自由能(ΔG)、焓变(ΔH)和熵变(ΔS)来获得。这些参数可以帮助预测探针和靶序列之间的杂交稳定性。
1. Tm(熔解温度)的计算
Tm是探针与靶序列形成双链DNA并开始解离的温度。可以使用Nearest-neighbor法等方法计算Tm。常见的计算方法包括Wallace等人提出的近似公式。
2. ΔG(热力学自由能)的计算
热力学自由能表示反应的自发性,可以通过下面的公式计算: ΔG=ΔH−TΔSΔG=ΔH−TΔS 其中,ΔH是焓变,ΔS是熵变,T是温度(单位:开尔文)。
3. ΔH(焓变)和 ΔS(熵变)的计算
焓变和熵变可以通过实验测量获得,或者可以使用计算模型进行估算。例如,可以使用近似公式或生物信息学软件(如NUPACK)来估算这些参数。
通过计算这些热力学参数,可以评估探针和靶序列之间的相互作用强度和稳定性。这对于设计和优化实验,如引物设计、核酸杂交等,具有重要意义。
python 代码:
ambiguous_dna_complement = {
"a": "t",
"c": "g",
"g": "c",
"t": "a",
"A": "T",
"C": "G",
"G": "C",
"T": "A",
"M": "K",
"m": "k",
"R": "Y",
"r": "y",
"W": "W",
"w": "w",
"S": "S",
"s": "s",
"Y": "R",
"y": "r",
"K": "M",
"k": "m",
"V": "B",
"v": "b",
"H": "D",
"h": "d",
"D": "H",
"d": "h",
"B": "V",
"b": "v",
"X": "X",
"x": "x",
"N": "N",
"n": "n",
"-": "-",
}
def complement(seq):
'''Return the complement sequence'''
return ''.join([ambiguous_dna_complement[base] for base in seq])
def reverse(seq):
"""Reverses a string given to it."""
return seq[::-1]
def rev_com(seq):
'''Return the reverse_complement sequence'''
com = complement(seq)
rev_com = reverse(com)
return rev_com
dH_full={
'AATT' : -7.9, 'TTAA' : -7.9,
'ATTA' : -7.2, 'TAAT' : -7.2,
'CAGT' : -8.5, 'TGAC' : -8.5,
'GTCA' : -8.4, 'ACTG' : -8.4,
'CTGA' : -7.8, 'AGTC' : -7.8,
'GACT' : -8.2, 'TCAG' : -8.2,
'CGGC' : -10.6, 'GCCG' : -9.8,
'GGCC' : -8.0, 'CCGG' : -8.0,
'initCG' : 0.1, 'initGC' : 0.1,
'initAT' : 2.3, 'initTA' : 2.3,
# Like pair mismatches
'AATA' : 1.2, 'ATAA' : 1.2,
'CAGA' : -0.9, 'AGAC' : -0.9,
'GACA' : -2.9, 'ACAG' : -2.9,
'TAAA' : 4.7, 'AAAT' : 4.7,
'ACTC' : 0.0, 'CTCA' : 0.0,
'CCGC' : -1.5, 'CGCC' : -1.5,
'GCCC' : 3.6, 'CCCG' : 3.6,
'TCAC' : 6.1, 'CACT' : 6.1,
'AGTG' : -3.1, 'GTGA' : -3.1,
'CGGG' : -4.9, 'GGGC' : -4.9,
'GGCG' : -6.0, 'GCGG' : -6.0,
'TGAG' : 1.6, 'GAGT' : 1.6,
'ATTT' : -2.7, 'TTTA' : -2.7,
'CTGT' : -5.0, 'TGTC' : -5.0,
'GTCT' : -2.2, 'TCTG' : -2.2,
'TTAT' : 0.2, 'TATT' : 0.2,
# G.T mismatches
'AGTT' : 1.0, 'TTGA' : 1.0,
'ATTG' : -2.5, 'GTTA' : -2.5,
'CGGT' : -4.1, 'TGGC' : -4.1,
'CTGG' : -2.8, 'GGTC' : -2.8,
'GGCT' : 3.3, 'TCGG' : 3.3,
'GGTT' : 5.8, 'TTGG' : 5.8,
'GTCG' : -4.4, 'GCTG' : -4.4,
'GTTG' : 4.1, 'GTTG' : 4.1,
'TGAT' : -0.1, 'TAGT' : -0.1,
'TGGT' : -1.4, 'TGGT' : -1.4,
'TTAG' : -1.3, 'GATT' : -1.3,
#G.A mismatches
'AATG' : -0.6, 'GTAA' : -0.6,
'AGTA' : -0.7, 'ATGA' : -0.7,
'CAGG' : -0.7, 'GGAC' : -0.7,
'CGGA' : -4.0, 'AGGC' : -4.0,
'GACG' : -0.6, 'GCAG' : -0.6,
'GGCA' : 0.5, 'ACGG' : 0.5,
'TAAG' : 0.7, 'GAAT' : 0.7,
'TGAA' : 3.0, 'AAGT' : 3.0,
#C.T mismatches
'ACTT' : 0.7, 'TTCA' : 0.7,
'ATTC' : -1.2, 'CTTA' : -1.2,
'CCGT' : -0.8, 'TGCC' : -0.8,
'CTGC' : -1.5, 'CGTC' : -1.5,
'GCCT' : 2.3, 'TCCG' : 2.3,
'GTCC' : 5.2, 'CCTG' : 5.2,
'TCAT' : 1.2, 'TACT' : 1.2,
'TTAC' : 1.0, 'CATT' : 1.0,
#A.C mismatches
'AATC' : 2.3, 'CTAA':2.3,
'ACTA' : 5.3, 'ATCA':5.3,
'CAGC' : 1.9, 'CGAC':1.9,
'CCGA' : 0.6, 'AGCC':0.6,
'GACC' : 5.2, 'CCAG':5.2,
'GCCA' : -0.7, 'ACCG':-0.7,
'TAAC' : 3.4, 'CAAT':3.4,
'TCAA' : 7.6, 'AACT':7.6
}
#--------------------#
# deltaS (cal/K.mol) #
#--------------------#
dS_full={
'AATT' : -22.2, 'TTAA':-22.2,
'ATTA' : -20.4, 'TAAT':-21.3,
'CAGT' : -22.7, 'TGAC':-22.7,
'GTCA' : -22.4, 'ACTG':-22.4,
'CTGA' : -21.0, 'AGTC':-21.0,
'GACT' : -22.2, 'TCAG':-22.2,
'CGGC' : -27.2, 'GCCG':-24.4,
'GGCC' : -19.9, 'CCGG':-19.9,
'initCG' : -2.8, 'initGC':-2.8,
'initAT' : 4.1, 'initTA':4.1,
'sym' : -1.4,
# : Like:pair:mismatches
'AATA' : 1.7, 'ATAA':1.7,
'CAGA' : -4.2, 'AGAC':-4.2,
'GACA' : -9.8, 'ACAG':-9.8,
'TAAA' : 12.9, 'AAAT':12.9,
'ACTC' : -4.4, 'CTCA':-4.4,
'CCGC' : -7.2, 'CGCC':-7.2,
'GCCC' : 8.9, 'CCCG':8.9,
'TCAC' : 16.4, 'CACT':16.4,
'AGTG' : -9.5, 'GTGA':-9.5,
'CGGG' : -15.3, 'GGGC':-15.3,
'GGCG' : -15.8, 'GCGG':-15.8,
'TGAG' : 3.6, 'GAGT':3.6,
'ATTT' : -10.8, 'TTTA':-10.8,
'CTGT' : -15.8, 'TGTC':-15.8,
'GTCT' : -8.4, 'TCTG':-8.4,
'TTAT' : -1.5, 'TATT':-1.5,
# : G.T:mismatches
'AGTT' : 0.9, 'TTGA':0.9,
'ATTG' : -8.3, 'GTTA':-8.3,
'CGGT' : -11.7, 'TGGC':-11.7,
'CTGG' : -8.0, 'GGTC':-8.0,
'GGCT' : 10.4, 'TCGG':10.4,
'GGTT' : 16.3, 'TTGG':16.3,
'GTCG' : -12.3, 'GCTG':-12.3,
'GTTG' : 9.5, 'GTTG':9.5,
'TGAT' : -1.7, 'TAGT':-1.7,
'TGGT' : -6.2, 'TGGT':-6.2,
'TTAG' : -5.3, 'GATT':-5.3,
# G.A mismatches
'AATG' : -2.3, 'GTAA' : -2.3,
'AGTA' : -2.3, 'ATGA' : -2.3,
'CAGG' : -2.3, 'GGAC' : -2.3,
'CGGA' : -13.2, 'AGGC' : -13.2,
'GACG' : -1.0, 'GCAG' : -1.0,
'GGCA' : 3.2, 'ACGG' : 3.2,
'TAAG' : 0.7, 'GAAT' : 0.7,
'TGAA' : 7.4, 'AAGT' : 7.4,
# C.T mismatches
'ACTT' : 0.2, 'TTCA' : 0.2,
'ATTC' : -6.2, 'CTTA' : -6.2,
'CCGT' : -4.5, 'TGCC' : -4.5,
'CTGC' : -6.1, 'CGTC' : -6.1,
'GCCT' : 5.4, 'TCCG' : 5.4,
'GTCC' : 13.5, 'CCTG' : 13.5,
'TCAT' : 0.7, 'TACT' : 0.7,
'TTAC' : 0.7, 'CATT' : 0.7,
# A.C mismatches
'AATC' : 4.6, 'CTAA' : 4.6,
'ACTA' : 14.6, 'ATCA' : 14.6,
'CAGC' : 3.7, 'CGAC' : 3.7,
'CCGA' : -0.6, 'AGCC' : -0.6,
'GACC' : 14.2, 'CCAG' : 14.2,
'GCCA' : -3.8, 'ACCG' : -3.8,
'TAAC' : 8.0, 'CAAT' : 8.0,
'TCAA' : 20.2, 'AACT' : 20.2
}
import math
def calDeltaHS(qseq, sseq):
""" Calculate deltaH and deltaS """
#qseq = re.sub('[atcgn]+', '', qseq)
#sseq = re.sub('[atcgn]+', '', sseq)
init_begin = 'init%s%s' % (qseq[0], sseq[0])
init_end = 'init%s%s' % (qseq[-1], sseq[-1])
if init_begin in dH_full and init_end in dH_full:
deltaH = dH_full[init_begin] + dH_full[init_end]
deltaS = dS_full[init_begin] + dS_full[init_end]
else:
deltaH = 0
deltaS = 0
for i in range(len(qseq) - 1):
dinuc = '%s%s' % (qseq[i : (i+2)], sseq[i : (i+2)])
if dinuc in dH_full and dinuc in dS_full:
deltaH += dH_full[dinuc]
deltaS += dS_full[dinuc]
return deltaH, deltaS
def calDeltaG(qseq, sseq, mono_conc=50, diva_conc=1.5, dntp_conc=0.25, deltaH=None, deltaS=None):
""" Calculate the free Gibbs energy """
mono_conc = float(mono_conc)
diva_conc = float(diva_conc)
dntp_conc = float(dntp_conc)
if not (deltaH and deltaS):
deltaH, deltaS = calDeltaHS(qseq, sseq)
# Calculate the free Gibbs energy
tao = 273.15 + 37 # Constant temperature tao in Kelvin
# Many thanks for the anonymous referee who help me fix the bug in last version.
mono_conc = mono_conc + divalent2monovalent(diva_conc, dntp_conc)
mono_conc = mono_conc / 1000
deltaS_adjust = deltaS + 0.368 * (len(sseq) - 1) * math.log(mono_conc, math.e)
deltaG = (deltaH * 1000 - tao * deltaS_adjust) / 1000
return deltaG
def calTm(qseq, sseq, mono_conc=50, diva_conc=1.5, oligo_conc=50, dntp_conc=0.25, deltaH=None, deltaS=None):
""" Calculate Tm value of amplicon"""
mono_conc = float(mono_conc)
diva_conc = float(diva_conc)
oligo_conc = float(oligo_conc)
dntp_conc = float(dntp_conc)
if not (deltaH and deltaS):
deltaH, deltaS = calDeltaHS(qseq, sseq)
deltaH = deltaH * 1000
oligo_conc = oligo_conc / 1000000000
# Many thanks for the anonymous referee who help me fix the bug in last version.
mono_conc = mono_conc + divalent2monovalent(diva_conc, dntp_conc)
mono_conc = mono_conc / 1000
deltaS = deltaS + 0.368 * (len(qseq) - 1) * math.log(mono_conc, math.e)
Tm = deltaH / (deltaS + 1.987 * math.log(oligo_conc / 4, math.e)) - 273.15
return Tm
def divalent2monovalent(divalent, dntp):
'''Divalent to monovalent'''
if divalent==0:
dntp = 0
if divalent < 0 or dntp <0:
print >> sys.stderr, 'Error: divalent2monovalent, please contact Wubin Qu <quwubin@gmail.com>.'
exit()
if divalent < dntp:
# According to the theory, melting temperature doesn't depend on divalent cations
divalent = dntp
return 120 * (math.sqrt(divalent - dntp))
class Cal():
def __init__(self, qseq, sseq, mono_conc=50, diva_conc=1.5, oligo_conc=50, dntp_conc=0.25):
deltaH, deltaS = calDeltaHS(qseq, sseq)
self.Tm = calTm(qseq, sseq, mono_conc=mono_conc, diva_conc=diva_conc, oligo_conc=oligo_conc, dntp_conc=dntp_conc,deltaH=deltaH, deltaS=deltaS)
self.DeltaG = calDeltaG(qseq, sseq, mono_conc=mono_conc, diva_conc=diva_conc, dntp_conc=dntp_conc,deltaH=deltaH, deltaS=deltaS)
def main():
qseq = 'TTAACTTCTCAGAATTTATCTTCTAGAGGGTAACTTCTGTTTCCCGATTTTGCTCTAGGAAGTAATTTAAGTGCACAAGACATTGGTCAAAATGGTTTCCAAAAGAAGACTGTCAAAATC'
sseq = complement(qseq)
print(sseq)
sseq = 'AAGGGAAGAATCTTAAATAGAAGATCTGGCATCGCTGACAAAGGGCTAACGCGAGATCCTTCATTAAATTCACGTGTTCTGTAACCAGNNNNACCAAAGGGGGGCTTCTGACAGTTTTAG'
mono = 50
diva = 1.5
oligo = 50
dntp = 0.25
seq = Cal(qseq, sseq, mono_conc=50, diva_conc=1, oligo_conc=50, dntp_conc=1)
print ('Tm: ', seq.Tm)
print ('DeltaG: ', seq.DeltaG)
if __name__ == '__main__':
main()
参考: