pipeline管道机制,顾名思义就像水管一样,数据就像水一样,在管道之间流动,pipline的每一步就将一节节水管,数据经由这节水管流到下一节水管,流向下一节水管的就是经由上一节水管处理后的数据。
例子:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
data = load_iris()
y = data.target
x = data.data
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,stratify = y)
#将步骤写入pipeline中,原来的model.fit 替换为了pipeline.fit
#数据标准化
#pca降维
#随机森林分类
pipeline = Pipeline([
('scaler',StandardScaler()),
('pca',PCA()),
('randomforestclassifer',RandomForestClassifier())
])
pipeline.fit(x_train,y_train)
print(pipeline.score(x_test,y_test))