import pandas as pd
import os
import requests
# df1=pd.read_csv(r'不同物种cds的引物序列设计\引物设计表.txt',sep='\t',encoding='gbk')
# print(df1)
#
# # with open(r'不同物种cds的引物序列设计\\引物设计表.xlsx','r',encoding='unicode_escape') as f:
# # f.readline()
# # for line in f:
# # line=line.strip('\n')
# # list=line.split('\t')
# # print(list)
#
# # for i in range(len(df1)):
# # if df1[i,'exist']=='0':
# #
# df1=df1[df1['exist']==0].reset_index(drop=True)
# print(df1)
# list2=df1['英文名'].tolist()
# print(list2)
#
# df_url=pd.read_csv(r'不同物种cds的引物序列设计\assembly_summary_refseq.txt',sep='\t',skiprows=2,header=None)
# print(df_url)
# dict_url={}
# # print(df_url.loc[0,:])
# for i in range(len(df_url)):
# dict_url[df_url.loc[i,7]]=df_url.loc[i,19]
# print(dict_url)
#
# dict_url1={}
# for i in list2:
# for j in dict_url:
# if 'Human bocavirus' in i and 'Human bocavirus 4' not in i:
# if i == j:
# dict_url1[j]=dict_url[j]
# elif 'Human bocavirus 4' in i:
# if i in j:
# dict_url1[j]=dict_url[j]
# elif 'Human adenovirus' in i:
# if i == j:
# dict_url1[j]=dict_url[j]
# elif 'Norovirus' in i:
# if i == j:
# dict_url1[j]=dict_url[j]
# elif 'subtype' in i:
# if i.split(' ')[0] in j:
# dict_url1[j]=dict_url[j]
#
# else:
# if i in j:
# dict_url1[j] = dict_url[j]
# print(dict_url1)
# print(len(dict_url1))
#
with open(r'不同物种cds的引物序列设计\url_exist=0.txt','w') as f:
for i in dict_url1:
line=i+'\t'+dict_url1[i]+'/'+dict_url1[i].split('/')[-1]+'_translated_cds.faa.gz'+'\n'
f.write(line)
f.close()
df_url2=pd.read_csv(r'不同物种cds的引物序列设计\url_exist=0.txt',sep='\t',header=None)
print(df_url2)
for i in range(len(df_url2)):
download_address=df_url2.iloc[i,1]
f=requests.get(download_address)
with open(r'不同物种cds的引物序列设计\%s' %(str(df_url2.iloc[i,0]).split('/')[-1]+'_fa.gz'),'wb') as code:
code.write(f.content)
处理refseq数据库,下载基因组的CDS序列输出-20220331
最新推荐文章于 2024-09-02 13:29:44 发布