数据统计并制作韦恩图

接之前算的的inchikey成果,统计并制作韦恩图.

import numpy as np
import pandas as pd
import os
import re

def cross_info(dose,uni_num):
    
    def iter_files(path):
#get all file's path under one folder
        file_names =[]
        for root,dirs,files in os.walk(path):
            for file in files:
                file_name = os.path.join(root,file)
                file_names.append(file_name)
    
        return file_names  

    def inchikey(path,dose,uni_num):
#get inchikey
        files = iter_files(path)
        inchikey =[]
        for file in files:
            if re.findall('\d+_\d+',file):
                df = pd.read_csv(file)
                df = df[df['Uniprots_num']>=uni_num]
                inchikey.extend(list(df['InChI Key']))
        df_out = pd.DataFrame({'Inchikey':inchikey})
        return df_out
    
    file_name = 'inchikey({},{}).csv'.format(re.findall(('\d+uM'),dose)[0],str(uni_num))
    inchi_list = ['pubchem','binding_DB','chembl']
    for i in inchi_list:        
        path = r"D:\SMALL_MOLECULAR_AIDS\{}\outcome\{}".format(i,dose)
        df = inchikey(path,dose,uni_num)
        print(df.head())
        df.to_csv(r"D:\SMALL_MOLECULAR_AIDS\cross_info\Cross_info({})\inchikey\{}".format(i,file_name),index =False,header = False)
        print('{}-{} finished'.format(i,file_name))
        
        
dose = '1uM(less)'
for uni_num in range(5,11):
    cross_info(dose,uni_num)

后采取网页传文件制作韦恩图:
http://bioinformatics.psb.ugent.be/webtools/Venn/
传如喊处理过的inchikey文件输出结果:
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值