先导入一些常用的包
import pandas as pd
import numpy as np
import sklearn
从网上下载一些公开的数据,sklearn的一些内置数据集,如鸢尾花数据集、波士顿房价,糖尿病数据集、手写数据集等
from sklearn import datasets
boston = datasets.load_boston()
bostondf = pd.DataFrame(boston.data,columns = boston.feature_names)
from sklearn import preprocessing
std = preprocessing.StandardScaler()
from sklearn import linear_model
reg = linear_model.linearRegression()
from sklearn import decomposition
dec = decomposition.PCA()
std.get_params()
std.set_params()
std.fit(boston.data)
std.fit(bostondf)
std.mean_
std.scale_
std.transform(bostondf)
std.transform(bostondf[:3])#用的还是std.fit的mean和std
'''
持久化
from sklearn.externals import joblib
joblib.dump(std,path)
joblib.dump(reg,path)
reg2 = joblib.load(path)
'