接之前算的的inchikey成果,统计并制作韦恩图.
import numpy as np
import pandas as pd
import os
import re
def cross_info(dose,uni_num):
def iter_files(path):
#get all file's path under one folder
file_names =[]
for root,dirs,files in os.walk(path):
for file in files:
file_name = os.path.join(root,file)
file_names.append(file_name)
return file_names
def inchikey(path,dose,uni_num):
#get inchikey
files = iter_files(path)
inchikey =[]
for file in files:
if re.findall('\d+_\d+',file):
df = pd.read_csv(file)
df = df[df['Uniprots_num']>=uni_num]
inchikey.extend(list(df['InChI Key']))
df_out = pd.DataFrame({'Inchikey':inchikey})
return df_out
file_name = 'inchikey({},{}).csv'.format(re.findall(('\d+uM'),dose)[0],str(uni_num))
inchi_list = ['pubchem','binding_DB','chembl']
for i in inchi_list:
path = r"D:\SMALL_MOLECULAR_AIDS\{}\outcome\{}".format(i,dose)
df = inchikey(path,dose,uni_num)
print(df.head())
df.to_csv(r"D:\SMALL_MOLECULAR_AIDS\cross_info\Cross_info({})\inchikey\{}".format(i,file_name),index =False,header = False)
print('{}-{} finished'.format(i,file_name))
dose = '1uM(less)'
for uni_num in range(5,11):
cross_info(dose,uni_num)
后采取网页传文件制作韦恩图:
http://bioinformatics.psb.ugent.be/webtools/Venn/
传如喊处理过的inchikey文件输出结果: