from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
data = datasets.load_iris()
data.keys()
data_x = data.data
data_y = data.target
xtrain,xtest,ytrain,ytest = train_test_split(data_x,data_y,test_size=0.2)
standard_scale = StandardScaler()
standard_scale.fit(xtrain)
standard_scale.mean_
standard_scale.scale_
xtrain = standard_scale.transform(xtrain)
xtest = standard_scale.transform(xtest)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(xtrain,ytrain)
knn.score(xtest,ytest)
#手动实现一个简单的standardscale函数
class Standardscale:
def __init__(self):
self.mean_ = None
self.scale_ = None
def fit(self,x):
self.mean_ = np.array([np.mean(x[:,i]) for i in range(x.shape[1])])
self.scale_ = np.array([np.std(x[:,i]) for i in range(x.shape[1])])
def transform(self,x):
assert self.mean_ is not None and self.scale_ is not None,'error'
assert x.shape[1] == len(self.mean_)
res = np.empty(shape=x.shape,dtype = 'float')
for col in x.shape[1]:
x[:,col] = (x[:,col] - self.mean_[col])/self.scale_[col]
return res
scikit-learn中的standardscaler(均值方差归一化)
最新推荐文章于 2024-08-05 11:28:49 发布