1. 导入依赖库
from matplotlib.pyplot import figure, imshow, axis
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import DataStructs
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from math import sqrt
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from math import log10
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw
from IPython.display import display, Markdown, HTML
%matplotlib inline
2. 载入数据
#load sdf file
data = "CHEMBL952131_EGFR.sdf"
3. 定义描述符计算函数
def sdf_to_desc(data):
fps = []
targets = []
n