import re
fasta = {}
content = {}
#读取fasta文件——字典
with open("../examples/ros_bio5_GC.txt") as f:
file = f.readlines()
for line in file:
line = re.sub(r'\n$', "", line)
match = re.match(r'^>.*', line)
if match:
fasta[match.group()] = ''
title = match.group()
else:
fasta[title] = fasta[title] + line
print(fasta)
#计算GC含量
for key in fasta:
s = fasta[key].count('G')/len(fasta[key]) + fasta[key].count('C')/len(fasta[key])
content[key] = s
maxgc = max(content[key] for key in content)
name = ''.join([key for key, value in content.items() if value == maxgc])
print(name, '{:.6f}'.format(content[name] * 100), sep="\n")
#列表写法——作者:未琢 https://www.bilibili.com/read/cv1986543 出处:bilibili
'''
with open("../examples/ros_bio5_GC.txt") as f:
lines = f.readlines()
seq = []
index = []
seqplast = ""
numlines = 0
for i in lines:
if '>' in i:# 判断是序列行还是说明行
index.append(i.replace("\n", "").replace(">", ""))
seq.append(seqplast.replace("\n", ""))
seqplast = ""
numlines += 1
else:
seqplast = seqplast + i.replace("\n", "")# 把分行的序列拼接成一个字符串
numlines += 1
if numlines == len(lines):
seq.append(seqplast.replace("\n", ""))
seq = seq[1:]
print(index, seq)
'''
'''
失败!!
import re
title = []
i = 0
list = []
sequence = []
with open("../examples/ros_bio5_GC.txt") as f:
fasta = f.read().rstrip()
print(fasta)
for line in fasta:
match = re.match(r'^>.*', line)
if match:
title.append(match.group())
i += 1
continue
else:
list.append(line)
dna = "".join(list)
list = []
sequence.insert(i, dna)
print(sequence)
#print(dna)
print(title)
#fasta = f.readline().rstrip()
pattern = re.compile(r'^>.*')
title = pattern.match(fasta)
print(title.group(0))
for line in fasta:
print(line)
title = re.search(r'^>.*', fasta)
#print(title.group())
'''
ros_bio5_GC
最新推荐文章于 2023-11-09 14:25:21 发布