from_accesion_to-taxid(match)_202111

# 此代码成功
# import pandas as pd
#
# # accession_file='./prot.accession2taxid'
# accession_file1=r'2021-10\sample_prot.accession2taxid'
# df_accession=pd.read_csv(accession_file1,sep='\t')
# # print(df1)
# df_accession1=df_accession.iloc[:,1:3]
# print(df_accession1)
#
# search_file1=r"2021-10\SRR1370913_nt_virus_blastn.m8"
# df_search=pd.read_csv(search_file1,sep='\t',header=None)
# df_search1=df_search.iloc[:,0:2]
#
# df_search1.columns =['query seq','accession.version']
#
# df_search1.loc[10]=['NODE_1_length_5513_cov_27.529529','A0A0H2ZI72.1']
# df_search1.loc[11]=['NODE_1_length_5513_cov_27.529529','A0A009IHW8.1']
# # df_match=pd.merge(right=df2,left=df_search1,how='right')
# # print(df_match)
# df_search1['taxid']=''
# print(df_search1)
# for j in range(len(df_search1)):
#     for i in range(len(df_accession1)):
#         if df_search1.iloc[j,1] == df_accession1.iloc[i,0] :
#             df_search1.iloc[j,2]=df_accession1.iloc[i,1]
#         else:
#             i=i+1
# print(df_search1)
# df_search1.to_csv(r'2021-10\accession_match_taxid',index=False,sep='\t',header=None)
# # df_search2=df_search.iloc[:,0:2]
# # df_search2.columns=['query seq','identity accession',]
# # print(df_search2)

# 此代码失败
# File_1=r"2021-10\SRR1370913_nt_virus_blastn.m8"
# File_2=r'2021-10\sample_prot.accession2taxid'
# File_3=r'2021-10\accession_match_taxid1'
#
# import csv
# with open(File_1, "r") as F1:
#     F1_d = sorted(csv.reader(F1, delimiter='\t'))
# with open(File_2, "r") as F2:
#     F2_d = {(row[1]): row for row in csv.reader(F2, delimiter='\t')}
#
# with open("File_3", "w") as F3:
#     for match1 in F1_d:
#         if tuple(match1) in F2_d:
#             F3.write(' '.join(F2_d[match1]) + '\n')

# 时间上更短了比方法一
import pandas as pd

accession_file1 = r'2021-10\sample_prot.accession2taxid'
df_accession = pd.read_csv(accession_file1, sep='\t')
df_accession1=df_accession.iloc[:,1:3]
print(df_accession1)

# df_accession1.to_dict('accession_to_taxid_dict')['accession.version']
accession_to_taxid_dict=dict(zip(df_accession1['accession.version'],df_accession1['taxid']))
# accession_to_taxid_dict=df_accession1.set_index('accession.version.T.to_dict())
print(accession_to_taxid_dict)

search_file1=r"2021-10\SRR1370913_nt_virus_blastn.m8"
df_search=pd.read_csv(search_file1,sep='\t',header=None)
df_search.columns=['Query id','accession.version','% identity','alignment length','mismatches','gap openings',
                   'q. start','q. end','s. start','s. end','e-value','bit score'] #accession.version=Subject id
df_search['taxid']=df_search['accession.version'].apply(lambda x : accession_to_taxid_dict[x])
df_search.to_csv(r'2021-10\accession_match_taxid',index=False,sep='\t',header=None)
print(df_search)

  • 7
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值