数据归一化
import numpy as np
import matplotlib.pyplot as plt
最值归一化
生成0-100,100个整数
#最值归一化 Normalization
x = np.random.randint(0,100,size = 100)
print(x)
(x - np.min(x)) / (np.max(x) - np.min(x))
X = np.random.randint(0,100,(50,2))
print(X[:10,:])
X = np.array(X,dtype=float)#强制转化成float
X[:,0] = (X[:,0] - np.min(X[:,0])) / (np.max(X[:,0]) - np.min(X[:,0]))
X[:,1] = (X[:,1] - np.min(X[:,1])) / (np.max(X[:,1]) - np.min(X[:,1]))
print(X[:10,:])
plt.scatter(X[:,0],X[:,1])#归一化了
np.mean(X[:,0])
np.std(X[:,0])
均值方差归一化
#均值方差归一化
X2 = np.random.randint(0,100,(50,2))
X2 = np.array(X2,dtype=float)
X2[:,0] = ((X2[:,0]) - np.mean(X2[:,0])) / np.std(X2[:,0])
X2[:,1] = ((X2[:,1]) - np.mean(X2[:,1])) / np.std(X2[:,1])
print(np.mean(X2[:,0]))
print(np.std(X2[:,0]))
-1.1546319456101628e-16
0.9999999999999999
边界:比如学生成绩0-100,图像像素0-255,适合最值归一化,不适合收入,因为没有边际…
极端数据,均值方差归一化