-
clusterfinder分析软件文件输入格式
-
Pfamsan分析软件输出文件格式
需要提取的内容:
chain gene genestart genestop pfamstart pfamstop pfamID
import re
def deal_file(input_file,out_file):
"""
提取pfamscan分析后的文件(input_file)中满足clusterfinder输入文件格式的内容,函数返回clusterfinder输入文件(out_file)
chain gene genestart genestop pfamstart pfamstop pfamID
0 1 2 8 9 5
"""
ip_file = open(input_file,"r+")
ot_file = open(out_file,"a+")
lines = ip_file.readlines()
ip_file.close()
for line in lines:
line =re.split(r"[ ]+",line.strip()) # 正则表达式 n个空格字符串分割
cen_list = [] # 存储需要内容,为后续存入文件做准备
chain = line[0]
gene = "##" # gene自己命名
genestart,genestop = line[1],line[2]
pfamstart,pfamstop = line[8],line[9]
pfamID = line[5]
cen_list.append(chain)
cen_list.append(gene)
cen_list.append(genestart)
cen_list.append(genestop)
cen_list.append(pfamstart)
cen_list.append(pfamstop)
cen_list.append(pfamID)
content = "\t".join(cen_list)+"\n"
ot_file.write(content)
if __name__ == '__main__':
deal_file("results .txt","out_file.txt")
满足要求的文件格式: