处理refseq数据库,下载基因组的CDS序列输出-20220331

import pandas as pd
import os
import requests

# df1=pd.read_csv(r'不同物种cds的引物序列设计\引物设计表.txt',sep='\t',encoding='gbk')
# print(df1)
#
# # with open(r'不同物种cds的引物序列设计\\引物设计表.xlsx','r',encoding='unicode_escape') as f:
# #     f.readline()
# #     for line in f:
# #         line=line.strip('\n')
# #         list=line.split('\t')
# #         print(list)
#
# # for i in range(len(df1)):
# #     if df1[i,'exist']=='0':
# #
# df1=df1[df1['exist']==0].reset_index(drop=True)
# print(df1)
# list2=df1['英文名'].tolist()
# print(list2)
#
# df_url=pd.read_csv(r'不同物种cds的引物序列设计\assembly_summary_refseq.txt',sep='\t',skiprows=2,header=None)
# print(df_url)
# dict_url={}
# # print(df_url.loc[0,:])
# for i in range(len(df_url)):
#     dict_url[df_url.loc[i,7]]=df_url.loc[i,19]
# print(dict_url)
#
# dict_url1={}
# for i in list2:
#     for j in dict_url:
#         if 'Human bocavirus' in i and 'Human bocavirus 4' not in i:
#             if i == j:
#                dict_url1[j]=dict_url[j]
#         elif 'Human bocavirus 4' in i:
#             if i in j:
#                dict_url1[j]=dict_url[j]
#         elif 'Human adenovirus' in i:
#             if i == j:
#                dict_url1[j]=dict_url[j]
#         elif 'Norovirus' in i:
#             if i == j:
#                dict_url1[j]=dict_url[j]
#         elif 'subtype' in i:
#             if i.split(' ')[0] in j:
#                dict_url1[j]=dict_url[j]
#
#         else:
#             if i in j:
#                dict_url1[j] = dict_url[j]
# print(dict_url1)
# print(len(dict_url1))
#
with open(r'不同物种cds的引物序列设计\url_exist=0.txt','w') as f:
    for i in dict_url1:
        line=i+'\t'+dict_url1[i]+'/'+dict_url1[i].split('/')[-1]+'_translated_cds.faa.gz'+'\n'
        f.write(line)
f.close()

df_url2=pd.read_csv(r'不同物种cds的引物序列设计\url_exist=0.txt',sep='\t',header=None)
print(df_url2)

for i in range(len(df_url2)):
    download_address=df_url2.iloc[i,1]
    f=requests.get(download_address)

    with open(r'不同物种cds的引物序列设计\%s' %(str(df_url2.iloc[i,0]).split('/')[-1]+'_fa.gz'),'wb') as code:
        code.write(f.content)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值