primer3有提供批量设计引物的方法,话不多说,直接放脚本!
不过,在这之前要先保证python里有pandas、primer3-py和biopython这3个包
import primer3
import pandas as pd
import sys
from Bio.Seq import Seq
## primer_condition 可以根据实际情况进行条件设置
global_args = {
'PRIMER_NUM_RETURN': 5, # 引物条数
'PRIMER_OPT_SIZE': 21, # 引物最佳长度
'PRIMER_MIN_SIZE': 18, # 引物最小长度
'PRIMER_MAX_SIZE': 24, # 引物最大长度
'PRIMER_OPT_TM': 60.0, # 引物最佳Tm值
'PRIMER_MIN_TM': 57.0, # 引物最小Tm值
'PRIMER_MAX_TM': 63.0, # 引物最大Tm值
'PRIMER_MIN_GC': 40.0, # 引物最小GC含量
'PRIMER_MAX_GC': 60.0, # 引物最大GC含量
'PRIMER_THERMODYNAMIC_OLIGO_ALIGNMENT': 1, # 该值设为1,则程序会使用热力学模型来计算oligos形成发夹结构和二聚体的可能性
'PRIMER_MAX_POLY_X': 5, # 所允许的单核苷酸重复的次数
'PRIMER_INTERNAL_MAX_POLY_X': 5, # internal所允许的单核苷酸重复的次数
'PRIMER_SALT_MONOVALENT': 50.0, # 单价盐离子浓度(mM)
'PRIMER_DNA_CONC': 50.0, # DNA产物浓度(mM)
'PRIMER_MAX_NS_ACCEPTED': 0, # 引物中允许的 N 的数目
'PRIMER_MAX_SELF_ANY': 8, # 引物自身进行反向互补
'PRIMER_MAX_SELF_END': 3, # 引物自身进行 3' 端反向互补形成引物二聚体
'PRIMER_PAIR_MAX_COMPL_ANY': 8, # left primer 和 right primer 序列的反向互补
'PRIMER_PAIR_MAX_COMPL_END': 3, # left primer 和 right primer 进行 3' 端反向互补形成引物二聚体
'PRIMER_PRODUCT_SIZE_RANGE': [400,600], # 指定引物产物的长度范围
'PRIMER_GC_CLAMP': 1 #要求 left primer 和 right primer 的 3' 末端序列中有连续指定数目的 Gs 或 Cs 碱基。
}
## function of read fasta
def readfasta(lines):
seq = []
index = []
current_seq = []
for line in lines:
line = line.strip()
if line.startswith(">"):
if current_seq:
seq.append("".join(current_seq))
current_seq = []
index.append(line[1:])
else:
current_seq.append(line)
if current_seq:
seq.append("".join(current_seq))
return index, seq
###读取序列
f = open(sys.argv[1], 'r')
lines = f.readlines()
(index, seq) = readfasta(lines)
f.close()
## build table
primer_df = pd.DataFrame()
## primer finder, dic -> datafrme
for i in range(len(index)):
seq_args = {
'SEQUENCE_ID': str(index[i]),
'SEQUENCE_TEMPLATE': str(seq[i]),
'SEQUENCE_INCLUDED_REGION': [0, len(seq[i]) - 1],
}
GeneID = str(index[i])
primer3_result = primer3.bindings.designPrimers(seq_args, global_args)
## change dic
primer3_result_table_dict = {}
for j in range(primer3_result["PRIMER_PAIR_NUM_RETURNED"]):
primer_id = str(j)
for key in primer3_result:
if primer_id in key:
info_tag = key.replace("_" + primer_id, "")
try:
primer3_result_table_dict[info_tag]
except:
primer3_result_table_dict[info_tag] = []
finally:
primer3_result_table_dict[info_tag].append(primer3_result[key])
df_index = []
## append dataframe
for m in range(primer3_result["PRIMER_PAIR_NUM_RETURNED"]):
df_index.append(GeneID + "_" + str(m + 1))
primer3_result_df = pd.DataFrame(primer3_result_table_dict, index=df_index)
primer_df = primer_df._append(primer3_result_df)
primer_df.to_csv(sys.argv[2], sep='\t') # 引物详细文件路径
target_df = primer_df.loc[:,["PRIMER_LEFT_SEQUENCE","PRIMER_RIGHT_SEQUENCE","PRIMER_LEFT","PRIMER_RIGHT"]]
target_df['PRIMER_LEFT'] = target_df['PRIMER_LEFT'].apply(lambda x: x[0])
target_df['PRIMER_RIGHT'] = target_df['PRIMER_RIGHT'].apply(lambda x: x[0])
seq_dict = dict(zip(index, seq))
# 创建空的 DataFrame 来存储截取的序列片段和相关信息
result_df = pd.DataFrame(columns=['F', 'F-6','R', 'R-6', 'seq'])
for i, row in target_df.iterrows():
seq_index = i.strip().split('_')[0]
if seq_index in seq_dict:
# 提取左引物的起始位置和右引物的终止位置
left_start = row['PRIMER_LEFT']
right_end = row['PRIMER_RIGHT']
# 截取序列片段
sequence = seq_dict[seq_index][left_start:right_end+1]
# 计算 F, F-6, R-6 位置
F_minus_6_position = len(row['PRIMER_LEFT_SEQUENCE']) + 6
R_minus_6_position = len(row['PRIMER_RIGHT_SEQUENCE']) + 6
# 反向互补序列
R_minus_6_sequence = Seq(sequence[len(sequence)-R_minus_6_position:]).reverse_complement()
# 将截取的序列片段和相关信息添加到结果 DataFrame 中
result_df = result_df._append({'F': row['PRIMER_LEFT_SEQUENCE'],
'F-6': sequence[:F_minus_6_position],
'R':row['PRIMER_RIGHT_SEQUENCE'],
'R-6': R_minus_6_sequence,
'seq': sequence},ignore_index=True)
result_df.set_index(target_df.index, inplace=True)
# 打印结果 DataFrame
result_df.to_csv(sys.argv[3],sep = '\t')
运行命令为:python3 primer.py {详细引物文件} {引物文件}