ros_bio5_GC

最新推荐文章于 2023-11-09 14:25:21 发布

他城她糖i

最新推荐文章于 2023-11-09 14:25:21 发布

阅读量117

点赞数

分类专栏： ROSALIND答案文章标签：生物信息学

本文链接：https://blog.csdn.net/qq_45380519/article/details/118937474

版权

ROSALIND答案专栏收录该内容

35 篇文章 7 订阅

订阅专栏

如果第一次阅读，请查看写在前面

import re
fasta = {}
content = {}
#读取fasta文件——字典
with open("../examples/ros_bio5_GC.txt") as f:
    file = f.readlines()
    for line in file:
        line = re.sub(r'\n$', "", line)
        match = re.match(r'^>.*', line)
        if match:
            fasta[match.group()] = ''
            title = match.group()
        else:
            fasta[title] = fasta[title] + line
print(fasta)
#计算GC含量
for key in fasta:
    s = fasta[key].count('G')/len(fasta[key]) + fasta[key].count('C')/len(fasta[key])
    content[key] = s
maxgc = max(content[key] for key in content)
name = ''.join([key for key, value in content.items() if value == maxgc])
print(name, '{:.6f}'.format(content[name] * 100), sep="\n")



#列表写法——作者：未琢 https://www.bilibili.com/read/cv1986543 出处：bilibili
'''
with open("../examples/ros_bio5_GC.txt") as f:
    lines = f.readlines()
seq = []
index = []
seqplast = ""
numlines = 0
for i in lines:

    if '>' in i:# 判断是序列行还是说明行

        index.append(i.replace("\n", "").replace(">", ""))

        seq.append(seqplast.replace("\n", ""))

        seqplast = ""

        numlines += 1

    else:
        seqplast = seqplast + i.replace("\n", "")# 把分行的序列拼接成一个字符串
        numlines += 1
    if numlines == len(lines):

        seq.append(seqplast.replace("\n", ""))

seq = seq[1:]
print(index, seq)
'''



'''
失败！！
import re
title = []
i = 0
list = []
sequence = []
with open("../examples/ros_bio5_GC.txt") as f:
    fasta = f.read().rstrip()
    print(fasta)
    for line in fasta:
        match = re.match(r'^>.*', line)
        if match:
            title.append(match.group())
            i += 1
            continue
        else:
            list.append(line)
            dna = "".join(list)
            list = []
        sequence.insert(i, dna)
    print(sequence)
    #print(dna)
    print(title)
    #fasta = f.readline().rstrip()


pattern = re.compile(r'^>.*')
title = pattern.match(fasta)
print(title.group(0))

for line in fasta:
    print(line)
    title = re.search(r'^>.*', fasta)
    #print(title.group())
'''