数据集的训练集和测试集划分
from sklearn.model_selection import train_test_split
patientdata为数据集
traindata, testdata = train_test_split(patientdata, test_size = 0.1)
数据标准化减小量纲的影响
import numpy as np
arr_mean = np.mean(patientdata) #求均值
arr_std = np.std(patientdata,ddof=1) #求标准差
newdata=(patientdata-arr_mean )/arr_std # 进行整体标准化
单独提取出Y,其余默认为X
Y=newdata['住院天数']
X=newdata.drop('住院天数',axis=1)
微信:realtimedata