def findplace(str,seq):
import re
place = []
end = 0
pattern = re.compile(seq)
while(1):
a = pattern.search(str,end)
if a == None:
return place
break
else:
position = a.span()
place.append(position)
end = position[1]
使用方式:
position = findplace(seq1,"N+")
seq1为字符串,本次全部脚本是为了找出一个fasta序列中的全部NNN所在的位置信息
现将全部脚本列下
#find the place of N
import sys,re
f1 = open(sys.argv[1],'r')
seq1 = ""
for i in f1:
if re.match(">",i):
continue
else:
i=i.strip("\n")
seq1 = seq1+i
#print(seq1[97865:97983])
def findplace(str,seq):
import re
place = []
end = 0
pattern = re.compile(seq)
while(1):
a = pattern.search(str,end)
if a == None:
return place
break
else:
position = a.span()
place.append(position)
end = position[1]
position = findplace(seq1,"N+")
#print(position)
for i in range(len(position)):
num = i+1
st = position[i][0]+1
ed = position[i][1]
le = ed - st +1
print("loc{}\t{}\t{}\t{}".format(num,st,ed,le))
使用脚本的命令为:
python find_N.py data\out.masked.fasta > position.xlsx
输出的结果为: