import time
import csv
import time
start = time.time()
f = open("GM18.txt" , "r")
txt = f.read()
txt = txt.strip(">Gm18")
txt = txt.replace("\n","").replace("\r","") #更改了此处的删除换行符!
txt = txt.strip("")
pie5 = list()
with open("5pie.list", 'r') as fin:
for f in fin:
pie5.append(f.replace("\n","").replace("\t",""))
pie3 = list()
with open("3pie.list", 'r') as fin:
for f in fin:
pie3.append(f.replace("\n","").replace("\t",""))
pie_all = list()
for i in range(len(pie3)):
pie_all.append([pie5[i], pie3[i]])
print(pie_all[:5])
def indexstr(str1,str2):
'''查找指定字符串str1包含指定子字符串str2的全部位置,
以列表形式返回'''
lenth2 = len(str2)
lenth1 = len(str1)
indexstr2 = []
i = 0
while str2 in str1[i:]:
indextmp = str1.index(str2, i, lenth1)
indexstr2.append(indextmp)
i = (indextmp + lenth2)
return indexstr2
def main():
pie_result = list()
for i in pie_all:
pie5_pos = indexstr(txt,i[0])
sequence1 = ""
sequence2 = ""
for number in pie5_pos:
if len(i[0]) == 44:
sequence1 = txt[number - 302 : number-2]
sequence2 = txt[number + 44 : number + 344]
elif len(i[0]) == 50:
sequence1 = txt[number - 300 : number]
sequence2 = txt[number + 50 : number + 350]
print("由以上条件我们可以得知前300个碱基为 {} ,后300个碱基为 {} ".format(sequence1,sequence2))
pie3_pos = indexstr(txt,i[1])
sequence3 = ''
for number in pie3_pos:
if len(i[1]) == 44:
sequence3 = txt[number + 44 : number + 344]
elif len(i[1]) == 50:
sequence3 = txt[number + 50 : number + 350]
print("由以上条件我们可以得知前300个碱基为 {} ,后300个碱基为 {} ".format(sequence1,sequence2))
pie_result.append([pie5_pos, pie3_pos, sequence1, sequence2, sequence3])
print("此次查找花费的时间是:{:.5f} 秒".format(time.time() - start))
with open('csv.csv', 'w') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['5_pos', '3_pos', '5_front', '5_end', '3_end'])
for result in pie_result:
writer.writerow([result[0], result[1], result[2], result[3], result[4]])
if __name__ == '__main__':
main()
代码背景介绍:
查找SNP位点的5‘端的前300基因序列和后300基因序列以及3’端后300基因序列,并保存到csv文件中。