2.1 端粒蛋白质序列中氨基酸出现的频率
insulin = '''
MPRAPRCRAVRSLLRSHYREVLPLATFVRRLGPQGWRLVQRGDPAAFRALVAQCLVCVPWDARPPPAAPS
FRQVSCLKELVARVLQRLCERGAKNVLAFGFALLDGARGGPPEAFTTSVRSYLPNTVTDALRGSGAWGLL
LRRVGDDVLVHLLARCALFVLVAPSCAYQVCGPPLYQLGAATQARPPPHASGPRRRLGCERAWNHSVREA
GVPLGLPAPGARRRGGSASRSLPLPKRPRRGAAPEPERTPVGQGSWAHPGRTRGPSDRGFCVVSPARPAE
EATSLEGALSGTRHSHPSVGRQHHAGPPSTSRPPRPWDTPCPPVYAETKHFLYSSGDKEQLRPSFLLSSL
RPSLTGARRLVETIFLGSRPWMPGTPRRLPRLPQRYWQMRPLFLELLGNHAQCPYGVLLKTHCPLRAAVT
PAAGVCAREKPQGSVAAPEEEDTDPRRLVQLLRQHSSPWQVYGFVRACLRRLVPPGLWGSRHNERRFLRN
TKKFISLGKHAKLSLQELTWKMSVRDCAWLRRSPGVGCVPAAEHRLREEILAKFLHWLMSVYVVELLRSF
FYVTETTFQKNRLFFYRKSVWSKLQSIGIRQHLKRVQLRELSEAEVRQHREARPALLTSRLRFIPKPDGL
RPIVNMDYVVGARTFRREKRAERLTSRVKALFSVLNYERARRPGLLGASVLGLDDIHRAWRTFVLRVRAQ
DPPPELYFVKVDVTGAYDTIPQDRLTEVIASIIKPQNTYCVRRYAVVQKAAHGHVRKAFKSHVSTLTDLQ
PYMRQFVAHLQETSPLRDAVVIEQSSSLNEASSGLFDVFLRFMCHHAVRIRGKSYVQCQGIPQGSILSTL
LCSLCYGDMENKLFAGIRRDGLLLRLVDDFLLVTPHLTHAKTFLRTLVRGVPEYGCVVNLRKTVVNFPVE
DEALGGTAFVQMPAHGLFPWCGLLLDTRTLEVQSDYSSYARTSIRASLTFNRGFKAGRNMRRKLFGVLRL
KCHSLFLDLQVNSLQTVCTNIYKILLLQAYRFHACVLQLPFHQQVWKNPTFFLRVISDTASLCYSILKAK
NAGMSLGAKGAAGPLPSEAVQWLCHQAFLLKLTRHRVTYVPLLGSLRTAQTQLSRKLPGTTLTALEAAAN
PALPSDFKTILD'''
dict0={}
for amino_acid in "ACDEFGHIKLMNPQRSTVWY":
number=insulin.count(amino_acid) #amino_acid中的字母各出现的次数
dict0[amino_acid]=number
#print(dict0)
#print(sum(dict0.values()))
ls=list(dict0.items()) #以列表返回值
# 将ls排序,key=lambda表示列表使用元组组成的,x表示列表中的一个元组,可以命名为任意一个字母,
# x[0]表示从元组中的第一个数开始排序,reversed=True倒序由大到小排序
ls.sort(key=lambda x:x[1],reverse=True)
print('氨基酸{}出现最频繁,共出现{}次'.format(ls[0][0],ls[0][1]))
结果
氨基酸L出现最频繁,共出现147次
2.2 DNA序列中核苷酸碱基出现的频率
amid='AAAACCCGGT'
dict={}
for n in "ATGC":
number=amid.count(n)
dict[n]=number #依次遍历ATGC,输出对应的出现次数
ls=list(dict.items()) #将dict元组转成列表
ls.sort(key=lambda x:x[1],reverse=True)
print('核苷酸出现最频繁的是{},出现次数为{}'.format(ls[0][0],ls[0][1]))
#print(dict)
#print(number)
结果
核苷酸出现最频繁的是A,出现次数为4
2.3 一次一个残基地输出氨基酸序列
insulin = "GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVCGERGFFYTPKT"
for i in range(len(insulin)): #依次输出G GI GIV ...
# print(i) 输出1 2 3 4 ... 50
print(insulin[0:i+1]) #i+1 最后一个循环
#range(5)----[1,2,3,4,5]
结果
G
GI
GIV
GIVE
GIVEQ
GIVEQC
GIVEQCC
GIVEQCCT
GIVEQCCTS
GIVEQCCTSI
GIVEQCCTSIC
GIVEQCCTSICS
GIVEQCCTSICSL
GIVEQCCTSICSLY
GIVEQCCTSICSLYQ
GIVEQCCTSICSLYQL
GIVEQCCTSICSLYQLE
GIVEQCCTSICSLYQLEN
GIVEQCCTSICSLYQLENY
GIVEQCCTSICSLYQLENYC
GIVEQCCTSICSLYQLENYCN
GIVEQCCTSICSLYQLENYCNF
GIVEQCCTSICSLYQLENYCNFV
GIVEQCCTSICSLYQLENYCNFVN
GIVEQCCTSICSLYQLENYCNFVNQ
GIVEQCCTSICSLYQLENYCNFVNQH
GIVEQCCTSICSLYQLENYCNFVNQHL
GIVEQCCTSICSLYQLENYCNFVNQHLC
GIVEQCCTSICSLYQLENYCNFVNQHLCG
GIVEQCCTSICSLYQLENYCNFVNQHLCGS
GIVEQCCTSICSLYQLENYCNFVNQHLCGSH
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHL
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLV
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVE
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEA
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEAL
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALY
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYL
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLV
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVC
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVCG
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVCGE
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVCGER
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVCGERG
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVCGERGF
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVCGERGFF
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVCGERGFFY
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVCGERGFFYT
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVCGERGFFYTP
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVCGERGFFYTPK
GIVEQCCTSICSLYQLENYCNFVNQHLCGSHLVEALYLVCGERGFFYTPKT