file_r = open('SwissProt.fasta', 'r')
file_w = open('SwissProtHomo.fasta', 'w')
seq = ''
for line in file_r:
if line[0] == '>' and seq == '': #若seq为空,该行只有标题没有序列
header = line #赋予行的内容给header
elif line[0] != '>': #若该行没有标题只有序列
seq = seq+line #添加行的内容到seq序列
elif line[0] == '>' and seq != '': #有标题有序列
if "Homo sapiens" in header:
file_w.write(header+seq)
seq ='' #seq初始化为空字符串
header = line #重新初始化到当前行 ,初始化目的:每次循环的header和seq的值都不一样
if "Homo sapiens" in header:
file_w.write(header+seq)
file_w.close()
结果
>sp|Q66K14|TBC9B_HUMAN TBC1 domain family member 9B OS=Homo sapiens OX=9606 GN=TBC1D9B PE=1 SV=3
MWLSPEEVLVANALWVTERANPFFVLQRRRGHGRGGGLTGLLVGTLDVVLDSSARVAPYR
ILHQTQDSQVYWTVACGSSRKEITKHWEWLENNLLQTLSIFDSEEDITTFVKGKIHGIIA
EENKNLQPQGDEDPGKFKEAELKMRKQFGMPEGEKLVNYYSCSYWKGRVPRQGWLYLTVN
HLCFYSFLLGKEVSLVVQWVDITRLEKNATLLFPESIRVDTRDQELFFSMFLNIGETFKL
MEQLANLAMRQLLDSEGFLEDKALPRPIRPHRNISALKRDLDARAKNECYRATFRLPRDE
RLDGHTSCTLWTPFNKLHIPGQMFISNNYICFASKEEDACHLIIPLREVTIVEKADSSSV
LPSPLSISTKSKMTFLFANLKDRDFLVQRISDFLQKTPSKQPGSIGSRKASVVDPSTESS
PAPQEGSEQPASPASPLSSRQSFCAQEAPTASQGLLKLFQKNSPMEDLGAKGAKEKMKEE
SWHIHFFEYGRGVCMYRTAKTRALVLKGIPESLRGELWLLFSGAWNEMVTHPGYYAELVE
KSTGKYSLATEEIERDLHRSMPEHPAFQNELGIAALRRVLTAYAFRNPTIGYCQAMNIVT
SVLLLYGSEEEAFWLLVALCERMLPDYYNTRVVGALVDQGIFEELTRDFLPQLSEKMQDL
GVISSISLSWFLTLFLSVMPFESAVVIVDCFFYEGIKVILQVALAVLDANMEQLLGCSDE
GEAMTMLGRYLDNVVNKQSVSPPIPHLRALLSSSDDPPAEVDIFELLKVSYEKFSSLRAE
DIEQMRFKQRLKVIQSLEDTAKRSVVRAIPVDIGFSIEELEDLYMVFKAKHLASQYWGCS
RTMAGRRDPSLPYLEQYRIDASQFRELFASLTPWACGSHTPLLAGRMFRLLDENKDSLIN
FKEFVTGMSGMYHGDLTEKLKVLYKLHLPPALSPEEAESALEAAHYFTEDSSSEASPLAS
DLDLFLPWEAQEALPQEEQEGSGSEERGEEKGTSSPDYRHYLRMWAKEKEAQKETIKDLP
KMNQEQFIELCKTLYNMFSEDPMEQDLYHAIATVASLLLRIGEVGKKFSARTGRKPRDCA
TEEDEPPAPELHQDAARELQPPAAGDPQAKAGGDTHLGKAPQESQVVVEGGSGEGQGSPS
QLLSDDETKDDMSMSSYSVVSTGSLQCEDLADDTVLVGGEACSPTARIGGTVDTDWCISF
EQILASILTESVLVNFFEKRVDIGLKIKDQKKVERQFSTASDHEQPGVSG
>sp|Q66K14|TBC9B_HUMAN TBC1 domain family member 9B OS=Homo sapiens OX=9606 GN=TBC1D9B PE=1 SV=3
MWLSPEEVLVANALWVTERANPFFVLQRRRGHGRGGGLTGLLVGTLDVVLDSSARVAPYR
ILHQTQDSQVYWTVACGSSRKEITKHWEWLENNLLQTLSIFDSEEDITTFVKGKIHGIIA
EENKNLQPQGDEDPGKFKEAELKMRKQFGMPEGEKLVNYYSCSYWKGRVPRQGWLYLTVN
HLCFYSFLLGKEVSLVVQWVDITRLEKNATLLFPESIRVDTRDQELFFSMFLNIGETFKL
MEQLANLAMRQLLDSEGFLEDKALPRPIRPHRNISALKRDLDARAKNECYRATFRLPRDE
RLDGHTSCTLWTPFNKLHIPGQMFISNNYICFASKEEDACHLIIPLREVTIVEKADSSSV
LPSPLSISTKSKMTFLFANLKDRDFLVQRISDFLQKTPSKQPGSIGSRKASVVDPSTESS
PAPQEGSEQPASPASPLSSRQSFCAQEAPTASQGLLKLFQKNSPMEDLGAKGAKEKMKEE
SWHIHFFEYGRGVCMYRTAKTRALVLKGIPESLRGELWLLFSGAWNEMVTHPGYYAELVE
KSTGKYSLATEEIERDLHRSMPEHPAFQNELGIAALRRVLTAYAFRNPTIGYCQAMNIVT
SVLLLYGSEEEAFWLLVALCERMLPDYYNTRVVGALVDQGIFEELTRDFLPQLSEKMQDL
GVISSISLSWFLTLFLSVMPFESAVVIVDCFFYEGIKVILQVALAVLDANMEQLLGCSDE
GEAMTMLGRYLDNVVNKQSVSPPIPHLRALLSSSDDPPAEVDIFELLKVSYEKFSSLRAE
DIEQMRFKQRLKVIQSLEDTAKRSVVRAIPVDIGFSIEELEDLYMVFKAKHLASQYWGCS
RTMAGRRDPSLPYLEQYRIDASQFRELFASLTPWACGSHTPLLAGRMFRLLDENKDSLIN
FKEFVTGMSGMYHGDLTEKLKVLYKLHLPPALSPEEAESALEAAHYFTEDSSSEASPLAS
DLDLFLPWEAQEALPQEEQEGSGSEERGEEKGTSSPDYRHYLRMWAKEKEAQKETIKDLP
KMNQEQFIELCKTLYNMFSEDPMEQDLYHAIATVASLLLRIGEVGKKFSARTGRKPRDCA
TEEDEPPAPELHQDAARELQPPAAGDPQAKAGGDTHLGKAPQESQVVVEGGSGEGQGSPS
QLLSDDETKDDMSMSSYSVVSTGSLQCEDLADDTVLVGGEACSPTARIGGTVDTDWCISF
EQILASILTESVLVNFFEKRVDIGLKIKDQKKVERQFSTASDHEQPGVSG
fasta
>sp|Q664P8|TAUB_YERPS Taurine import ATP-binding protein TauB OS=Yersinia pseudotuberculosis serotype I (strain IP32953) OX=273123 GN=tauB PE=3 SV=1
MLNVSGLWAEYQGKPALQDVSLQIASGQLVVVLGPSGCGKTTLLNLIAGFMTPSAGVITL
DNIPVSGPSAERGVVFQNEGLLPWRDVVSNVEFGLQLAGMSKEQRRVTALKMLNRVGLAG
FEHHFIWQLSGGMRQRVGIARALAVDPRLLLLDEPFGALDAFTREQMQELLLTIWRDTGK
QILLITHDIEEAVFLASELLLLSPGPGQVVERLSLNFGQRYAEGEPCRAIKSDPEFIARR
EYVLGKVFQQREVLI
>sp|Q66K14|TBC9B_HUMAN TBC1 domain family member 9B OS=Homo sapiens OX=9606 GN=TBC1D9B PE=1 SV=3
MWLSPEEVLVANALWVTERANPFFVLQRRRGHGRGGGLTGLLVGTLDVVLDSSARVAPYR
ILHQTQDSQVYWTVACGSSRKEITKHWEWLENNLLQTLSIFDSEEDITTFVKGKIHGIIA
EENKNLQPQGDEDPGKFKEAELKMRKQFGMPEGEKLVNYYSCSYWKGRVPRQGWLYLTVN
HLCFYSFLLGKEVSLVVQWVDITRLEKNATLLFPESIRVDTRDQELFFSMFLNIGETFKL
MEQLANLAMRQLLDSEGFLEDKALPRPIRPHRNISALKRDLDARAKNECYRATFRLPRDE
RLDGHTSCTLWTPFNKLHIPGQMFISNNYICFASKEEDACHLIIPLREVTIVEKADSSSV
LPSPLSISTKSKMTFLFANLKDRDFLVQRISDFLQKTPSKQPGSIGSRKASVVDPSTESS
PAPQEGSEQPASPASPLSSRQSFCAQEAPTASQGLLKLFQKNSPMEDLGAKGAKEKMKEE
SWHIHFFEYGRGVCMYRTAKTRALVLKGIPESLRGELWLLFSGAWNEMVTHPGYYAELVE
KSTGKYSLATEEIERDLHRSMPEHPAFQNELGIAALRRVLTAYAFRNPTIGYCQAMNIVT
SVLLLYGSEEEAFWLLVALCERMLPDYYNTRVVGALVDQGIFEELTRDFLPQLSEKMQDL
GVISSISLSWFLTLFLSVMPFESAVVIVDCFFYEGIKVILQVALAVLDANMEQLLGCSDE
GEAMTMLGRYLDNVVNKQSVSPPIPHLRALLSSSDDPPAEVDIFELLKVSYEKFSSLRAE
DIEQMRFKQRLKVIQSLEDTAKRSVVRAIPVDIGFSIEELEDLYMVFKAKHLASQYWGCS
RTMAGRRDPSLPYLEQYRIDASQFRELFASLTPWACGSHTPLLAGRMFRLLDENKDSLIN
FKEFVTGMSGMYHGDLTEKLKVLYKLHLPPALSPEEAESALEAAHYFTEDSSSEASPLAS
DLDLFLPWEAQEALPQEEQEGSGSEERGEEKGTSSPDYRHYLRMWAKEKEAQKETIKDLP
KMNQEQFIELCKTLYNMFSEDPMEQDLYHAIATVASLLLRIGEVGKKFSARTGRKPRDCA
TEEDEPPAPELHQDAARELQPPAAGDPQAKAGGDTHLGKAPQESQVVVEGGSGEGQGSPS
QLLSDDETKDDMSMSSYSVVSTGSLQCEDLADDTVLVGGEACSPTARIGGTVDTDWCISF
EQILASILTESVLVNFFEKRVDIGLKIKDQKKVERQFSTASDHEQPGVSG
>sp|Q8K9I1|SYV_BUCAP Valine--tRNA ligase OS=Buchnera aphidicola subsp. Schizaphis graminum (strain Sg) OX=198804 GN=valS PE=3 SV=1
MKKNYNPKDIEEHLYNFWEKNGFFKPNNNLNKPAFCIMMPPPNITGNLHMGHAFQQTIMD
ILIRYNRMQGKNTLWQVGTDHAGIATQILIERQIFSEERKTKKDYSRNDFIKKIWKWKKK
SNFSVKKQMKRLGNSVDWDREKFTLDPDISNSVKEAFIILYKNNLIYQKKRLVHWDSKLE
TVISDLEVEHRLIKSKKWFIRYPIIKNIKNINIEYLLVATTRPETLLGDTALAINPKDDK
YNHLIGQSVICPIVNRIIPIIADHYADMNKDTGCVKITPGHDFNDYEVGQRHKLPMINIF
TFNGKIKSNFSIYDYQGSKSNFYDSSIPTEFQNLDILSARKKIIYEIEKLGLLEKIEECN
FFTPYSERSGVIIQPMLTNQWYLKTSHLSQSAIDVVREKKIKFIPNQYKSMYLSWMNNIE
DWCISRQLWWGHQIPVWYDDKKNIYVGHSEKKIREEYNISDDMILNQDNDVLDTWFSSGL
WTFSTLGWPEKTEFLKIFHSTDVLVSGFDIIFFWIARMIMLTMYLVKDSYGNPQIPFKDV
YITGLIRDEEGKKMSKSKGNVIDPIDMIDGISLNELIEKRTSNLLQPHLSQKIRYHTIKQ
FPNGISATGTDALRFTFSALASNTRDIQWDMNRLKGYRNFCNKLWNASRFVLKNTKDHDY
FNFSVNDNMLLINKWILIKFNNTVKSYRNSLDSYRFDIAANILYDFIWNVFCDWYLEFVK
SVIKSGSYQDIYFTKNVLIHVLELLLRLSHPIMPFITEAIWQRVKIIKHIKDRTIMLQSF
PEYNDQLFDKSTLSNINWIKKIIIFIRNTRSKMNISSTKLLSLFLKNINSEKKKVIQENK
FILKNIASLEKISILSKQDDEPCLSLKEIIDGVDILVPVLKAIDKEIELKRLNKEIEKIK
SKMLISEKKMSNQDFLSYAPKNIIDKEIKKLKSLNEIYLTLSQQLESLHDAFCKKNKIFN
>sp|Q664P8|TAUB_YERPS Taurine import ATP-binding protein TauB OS=Yersinia pseudotuberculosis serotype I (strain IP32953) OX=273123 GN=tauB PE=3 SV=1
MLNVSGLWAEYQGKPALQDVSLQIASGQLVVVLGPSGCGKTTLLNLIAGFMTPSAGVITL
DNIPVSGPSAERGVVFQNEGLLPWRDVVSNVEFGLQLAGMSKEQRRVTALKMLNRVGLAG
FEHHFIWQLSGGMRQRVGIARALAVDPRLLLLDEPFGALDAFTREQMQELLLTIWRDTGK
QILLITHDIEEAVFLASELLLLSPGPGQVVERLSLNFGQRYAEGEPCRAIKSDPEFIARR
EYVLGKVFQQREVLI
>sp|Q66K14|TBC9B_HUMAN TBC1 domain family member 9B OS=Homo sapiens OX=9606 GN=TBC1D9B PE=1 SV=3
MWLSPEEVLVANALWVTERANPFFVLQRRRGHGRGGGLTGLLVGTLDVVLDSSARVAPYR
ILHQTQDSQVYWTVACGSSRKEITKHWEWLENNLLQTLSIFDSEEDITTFVKGKIHGIIA
EENKNLQPQGDEDPGKFKEAELKMRKQFGMPEGEKLVNYYSCSYWKGRVPRQGWLYLTVN
HLCFYSFLLGKEVSLVVQWVDITRLEKNATLLFPESIRVDTRDQELFFSMFLNIGETFKL
MEQLANLAMRQLLDSEGFLEDKALPRPIRPHRNISALKRDLDARAKNECYRATFRLPRDE
RLDGHTSCTLWTPFNKLHIPGQMFISNNYICFASKEEDACHLIIPLREVTIVEKADSSSV
LPSPLSISTKSKMTFLFANLKDRDFLVQRISDFLQKTPSKQPGSIGSRKASVVDPSTESS
PAPQEGSEQPASPASPLSSRQSFCAQEAPTASQGLLKLFQKNSPMEDLGAKGAKEKMKEE
SWHIHFFEYGRGVCMYRTAKTRALVLKGIPESLRGELWLLFSGAWNEMVTHPGYYAELVE
KSTGKYSLATEEIERDLHRSMPEHPAFQNELGIAALRRVLTAYAFRNPTIGYCQAMNIVT
SVLLLYGSEEEAFWLLVALCERMLPDYYNTRVVGALVDQGIFEELTRDFLPQLSEKMQDL
GVISSISLSWFLTLFLSVMPFESAVVIVDCFFYEGIKVILQVALAVLDANMEQLLGCSDE
GEAMTMLGRYLDNVVNKQSVSPPIPHLRALLSSSDDPPAEVDIFELLKVSYEKFSSLRAE
DIEQMRFKQRLKVIQSLEDTAKRSVVRAIPVDIGFSIEELEDLYMVFKAKHLASQYWGCS
RTMAGRRDPSLPYLEQYRIDASQFRELFASLTPWACGSHTPLLAGRMFRLLDENKDSLIN
FKEFVTGMSGMYHGDLTEKLKVLYKLHLPPALSPEEAESALEAAHYFTEDSSSEASPLAS
DLDLFLPWEAQEALPQEEQEGSGSEERGEEKGTSSPDYRHYLRMWAKEKEAQKETIKDLP
KMNQEQFIELCKTLYNMFSEDPMEQDLYHAIATVASLLLRIGEVGKKFSARTGRKPRDCA
TEEDEPPAPELHQDAARELQPPAAGDPQAKAGGDTHLGKAPQESQVVVEGGSGEGQGSPS
QLLSDDETKDDMSMSSYSVVSTGSLQCEDLADDTVLVGGEACSPTARIGGTVDTDWCISF
EQILASILTESVLVNFFEKRVDIGLKIKDQKKVERQFSTASDHEQPGVSG
>sp|Q8E4B4|TARI_STRA3 Ribitol-5-phosphate cytidylyltransferase OS=Streptococcus agalactiae serotype III (strain NEM316) OX=211110 GN=tarI PE=3 SV=1
MNIGVIFAGGVGRRMNTKGKPKQFLEVHGKPIIVHTIDIFQNTEAIDAVVVVCVSDWLDY
MNNLVERFNLTKVKAVVAGGETGQMSIFKGLEAAEQLATDDAVVLIHDGVRPLINEEVIN
ANIKSVKETGSAVTSVRAKETVVLVNDSSKISEVVDRTRSFIAKAPQSFYLSDILSVERD
AISKGITDAIDSSTLMGMYNRELTIVEGPYENIKITTPDDFYMFKALYDARENEQIYGM
>sp|B3CQ06|SYS_WOLPP Serine--tRNA ligase OS=Wolbachia pipientis subsp. Culex pipiens (strain wPip) OX=570417 GN=serS PE=3 SV=1
MHDIEHIRKNPKGFEKAIKSRGVKEFTAKEILEIDHKKRSLTTKLQALNKQRNEVTEEIK
RLKMNKSPCEEQVKLSKSITSEIETISLKEQTEKNKLVDILSNLPNISAQNVPIGEDESS
NVEIRKYGKKRKFDFTPKFHYELGERLGLMDFEQAAKISGSRFTILKGQLAKLGRALINF
MLETHVNEFAYTEVYHPALVKNEAMYNVGQLPKFSDDSYLTTDKLRLIPTSEVVLTNLVA
DKIIEEKELPIRFTAYSECFRKEAGSAGRDTRGMIRQHQFGKVELVSITTEDQSKDELER
MTNAAEEILKKLELPYRIMLLCSGDMGFAAQKTYDIEVWLPEQNKYREISSCSNCGTFQA
RRMNTKYFLETDRKTKKYVHTLNGSALAIGRTIVAIMENYQNSDGSVTIPNVLQRYMSND
TVISKQ
>sp|Q9ATB4|TAD2B_ARATH Transcriptional adapter ADA2b OS=Arabidopsis thaliana OX=3702 GN=ADA2B PE=1 SV=1
MGRSRGNFQNFEDPTQRTRKKKNAANVENFESTSLVPGAEGGGKYNCDYCQKDITGKIRI
KCAVCPDFDLCIECMSVGAEITPHKCDHPYRVMGNLTFPLICPDWSADDEMLLLEGLEIY
GLGNWAEVAEHVGTKSKEQCLEHYRNIYLNSPFFPLPDMSHVAGKNRKELQAMAKGRIDD
KKAEQNMKEEYPFSPPKVKVEDTQKESFVDRSFGGKKPVSTSVNNSLVELSNYNQKREEF
DPEYDNDAEQLLAEMEFKENDTPEEHELKLRVLRIYSKRLDERKRRKEFIIERNLLYPNP
FEKDLSQEEKVQCRRLDVFMRFHSKEEHDELLRNVVSEYRMVKRLKDLKEAQVAGCRSTA
EAERYLGRKRKRENEEGMNRGKESGQFGQIAGEMGSRPPVQASSSYVNDLDLIGFTESQL
LSESEKRLCSEVKLVPPVYLQMQQVMSHEIFKGNVTKKSDAYSLFKIDPTKVDRVYDMLV
KKGIAQL
>sp|Q83JA5|SYW_SHIFL Tryptophan--tRNA ligase OS=Shigella flexneri OX=623 GN=trpS PE=3 SV=1
MTKPIVFSGAQPSGELTIGNYMGALRQWVNMQDDYHCIYCIVDQHAITVRQDAQKLRKAT
LDTLALYLACGIDPEKSTIFVQSHVPEHAQLGWALNCYTYFGELSRMTQFKDKSARYAEN
INAGLFGYPVLMAADILLYQTNLVPVGEDQKQHLELSRDIAQRFNALYGEIFKVPEPFIP
KSGARVMSLLEPTKKMSKSDDNRNNVIGLLEDPKSVVKKIKRAVTDSDEPPVVRYDVQNK
AGVSNLLDILSAVTGQSIPELEKQFEGKMYGHLKGEVADAVSGMLTELQERYHRFRNDEA
FLQQVMKDGAEKASVHASRTLKAVYEAIGFVAKP
>sp|P14213|TAC1_TACTR Tachyplesin-1 OS=Tachypleus tridentatus OX=6853 PE=1 SV=2
MKKLVIALCLMMVLAVMVEEAEAKWCFRVCYRGICYRRCRGKRNEVRQYRDRGYDVRAIP
EETFFTRQDEDEDDDEE
>sp|Q7SZM9|TB1RA_XENLA F-box-like/WD repeat-containing protein TBL1XR1-A OS=Xenopus laevis OX=8355 GN=tbl1xr1-a PE=1 SV=1
MSISSDEVNFLVYRYLQESGFSHSAFTFGIESHISQSNINGALAPPAALISIIQKGLQYV
EAEVSINEDGTLFDGRPIESLSLIDAVMPDVVQTRQQAYRDKLAQQQTAAAAAAAAAAAA
TPNNQQPPAKNGENTANGEENGGHALANNHTDMMEVDGDVEIPSSKAVVLRGHESEVFIC
AWNPVSDLLASGSGDSTARIWNLSENSTSGSTQLVLRHCIREGGQDVPSNKDVTSLDWNS
EGTLLATGSYDGFARIWTKDGNLASTLGQHKGPIFALKWNKKGNFILSAGVDKTTIIWDA
HTGEAKQQFPFHSAPALDVDWQSNNTFASCSTDMCIHVCKLGQDRPIKTFQGHTNEVNAI
KWDPTGNLLASCSDDMTLKIWSMKHDTCVHDLQAHNKEIYTIKWSPTGPGTNNPNANLML
ASASFDSTVRLWDVDRGICIHTLTKHQEPVYSVAFSPDGRYLASGSFDKCVHIWNTQTGA
LVHSYRGTGGIFEVCWNAAGDKVGASASDGSVCVLDLRK
>sp|Q9FGE9|TBL12_ARATH Protein trichome birefringence-like 12 OS=Arabidopsis thaliana OX=3702 GN=TBL12 PE=2 SV=1
MELGSRRIYTTMPSKLRSSSSLLPRILLLSLLLLLFYSLILRRPITSNIASPPPCDLFSG
RWVFNPETPKPLYDETCPFHRNAWNCLRNKRDNMDVINSWRWEPNGCGLSRIDPTRFLGM
MRNKNVGFVGDSLNENFLVSFLCILRVADPSAIKWKKKKAWRGAYFPKFNVTVAYHRAVL
LAKYQWQARSSAEANQDGVKGTYRVDVDVPANEWINVTSFYDVLIFNSGHWWGYDKFPKE
TPLVFYRKGKPINPPLDILPGFELVLQNMVSYIQREVPAKTLKFWRLQSPRHFYGGDWNQ
NGSCLLDKPLEENQLDLWFDPRNNGVNKEARKINQIIKNELQTTKIKLLDLTHLSEFRAD
AHPAIWLGKQDAVAIWGQDCMHWCLPGVPDTWVDILAELILTNLKTE