import pandas as pd
import numpy as np
import os
import sys
import requests
def download(url,file_name):
s = requests.Session()
response = s.get(url,stream=True)
with open(file_name,'ab') as f:
for item in response.iter_content(chunk_size=512):
if item:
f.write(item)
def getCIDfromFILE(file):
CID = pd.read_csv(file)['PUBCHEM_CID']
CID = list(CID.dropna().apply(lambda x:int(x))) # TO be sure CIDS are digits
return CID
def getASSAYSUMMARYfromCID(file):
CID = getCIDfromFILE(file)
url_list =['https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/assaysummary/CSV'.format(i) for i in CID]
for i in url_list:
download(i,'assaysummary_CSV')
print('{} finished'.format(i))# tracing weather procedure is running
df = pd.read_csv('assaysummary_CSV',sep = ',')
return df
df = getASSAYSUMMARYfromCID(file)
df=df[df.ix[:,'AID'].map(lambda x:x.isdigit())]# erase 404data and "columns" of CIDS
df.to_csv(outcome_file.csv)
#file and outcome_file should be modified by your environment
利用含PUBCHEM_CID的的csv文件获取assaysummary数据
最新推荐文章于 2023-06-05 11:11:54 发布