1.对kraken2处理
inputpath1 = r'22-12373O.5.10000.kraken2'
with open(inputpath1, 'r') as f1:
for line in f1:
line = line.strip('\n')
list_line = line.split('\t')
# print(line)
if list_line[0] == 'C':
# print(list_line[4].split(' '))
list_all_value = list_line[4].split(' ')
list_all_value.remove('|:|')
# print(list_all_value)
list_last = [item for item in list_all_value if item.split(':')[0] != 'A']
sum
2.deal_k2report_to_xls
#!/usr/bin/env python3
import os, sys, re
# ms, infile_name, outfile_name = sys.argv
infile_name=r'C:\Users\Administrator\Desktop\自己测试捕获\20230619\第三张图-last\捕获\20-9263L.2.kreport2'
outfile_name=r'C:\Users\Administrator\Desktop\自己测试捕获\20230619\第三张图-last\捕获\20-9263L.2.kreport2.trans.xls'
with open(infile_name, 'r') as infile:
with open(outfile_name, 'w') as o:
Domain = ""
for line in infile:
line = line.strip()
lines = re.split(r'\t', line)
lines[5] = re.split(r'^[ ]*', lines[5])[1]
if lines[3] == "D":
Domain = lines[5]
o.write("\t".join(lines))
o.write("\n")
Kingdom = "NG" ## Kingdom非必须,Domain必须,在这里赋空值
elif lines[3] == "K":
Kingdom = lines[5]
lines[5] = Domain+";"+Kingdom
o.write("\t".join(lines))
o.write("\n")
elif lines[3] == "P":
Phylum = lines[5]
lines[5] = Domain+";"+Kingdom+";"+Phylum
o.write("\t".join(lines))
o.write("\n")
elif lines[3] == "C":
Class = lines[5]
lines[5] = Domain+";"+Kingdom+";"+Phylum+";"+Class
o.write("\t".join(lines))
o.write("\n")
elif lines[3] == "O":
Order = lines[5]
lines[5] = Domain+";"+Kingdom+";"+Phylum+";"+Class+";"+Order
o.write("\t".join(lines))
o.write("\n")
elif lines[3] == "F":
Family = lines[5]
lines[5] = Domain+";"+Kingdom+";"+Phylum+";"+Class+";"+Order+";"+Family
o.write("\t".join(lines))
o.write("\n")
elif lines[3] == "G":
Genus = lines[5]
lines[5] = Domain+";"+Kingdom+";"+Phylum+";"+Class+";"+Order+";"+Family+";"+Genus
o.write("\t".join(lines))
o.write("\n")
elif lines[3] == "S":
Species = lines[5]
lines[5] = Domain+";"+Kingdom+";"+Phylum+";"+Class+";"+Order+";"+Family+";"+Genus+";"+Species
o.write("\t".join(lines))
o.write("\n")
3.对kraken2的report进行输出
import os
import click
@click.command()
@click.option('-i1','--inputpath1',help='inputpath1(nanorv-crpm002-double-cap.report.out.xls)|...',required=True)
@click.option('-o1','--outputpath1',help='outputpath1(nanorv-crpm002-double-cap.classify.xls)|...',required=True)
# inputpath1=r'C:\Users\Administrator\Desktop\20230327需要查看的\nanorv-crpm002-double-cap.report.out.xls'
# outputpath1=r'C:\Users\Administrator\Desktop\20230327需要查看的\nanorv-crpm002-double-cap.classify.xls'
def classify_kraken2(inputpath1,outputpath1):
with open(inputpath1,'r') as f1,open(outputpath1,'w') as f2:
f2.write('Ename\ttaxid\treads/all_reads(%)\n')
for line in f1:
line=line.strip('\n')
list_1=line.split('\t')
# list_1[1] = list_1[1].strip(' ')
list_1[-1]=list_1[-1].strip()
print(list_1)
if list_1[3].startswith('S'):
if int(list_1[2]) != 0:
f2.write('{}\t{}\t{}\n'.format(list_1[-1],list_1[-2],list_1[0]))
if __name__=='__main__':
classify_kraken2()