原文链接:
下面在原文代码中做了注释,帮助初学者更好的理解代码
此示例显示使用多输出估计器完成图像。目标是预测脸的下半部分,给定其上半部分。
图像的第一列显示真实面。接下来的专栏将说明非常随机的树、k 最近邻域、线性回归和脊回归如何完成这些面的下半部分。
print(__doc__)
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_olivetti_faces
from sklearn.utils.validation import check_random_state
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
'''
整个程序用300个人脸的上半部分作为train_X, 300个人脸的下半部分作为train_Y
从100个人脸里面挑出5个人脸,test_X, 用不同方法预测predict_Y
'''
# Load the faces datasets
data = fetch_olivetti_faces()
targets = data.target
'''
这里data等于fetch_olivetti_faces()的返回值,返回值是一个Brunch类,支持以下方式访问Brunch类中的内容。
在pycharm中按住Ctrl键点击函数名字可以打开该函数定义的文件
>>> x = Bunch(a="1", b="2", c="3")
>>> print (x.a)
'1'
>>> print (x.b)
'2'
targets : numpy array of shape (1x400)
data: numpy array of shape (400, 64, 64)
data1: numpy array of shape (400, 4096)
'''
data1 = data.images.reshape((len(data.images), -1))
train = data1[targets < 30] #300个作为训练集
test = data1[targets >= 30] # Test on independent people #100个作为测试集
'''
a = numpy.array([[0,0,0],[1,1,1],[2,2,2]])
b = numpy.array([0,1,2])
c = (b<2)
c
Out[47]: array([ True, True, False])
a[c]
Out[48]:
array([[0, 0, 0],
[1, 1, 1]])
'''
# Test on a subset of people
n_faces = 5
rng = check_random_state(4)
face_ids = rng.randint(test.shape[0], size=(n_faces, ))
test1 = test[face_ids, :]
'''
numpy.random.randint(low, high=None, size=None, dtype='l')
low: int
生成的数值最低要大于等于low。
(hign = None时,生成的数值要在[0, low)区间内)
test1 是生成5张伪随机的脸
'''
n_pixels = data1.shape[1]
# Upper half of the faces,上半张脸是数据X
X_train = train[:, :(n_pixels + 1) // 2]
# Lower half of the faces,下班张脸是label Y
y_train = train[:, n_pixels // 2:]
X_test = test1[:, :(n_pixels + 1) // 2]
y_test = test1[:, n_pixels // 2:]
# Fit estimators
ESTIMATORS = {
"Extra trees": ExtraTreesRegressor(n_estimators=10, max_features=32,
random_state=0),
"K-nn": KNeighborsRegressor(),
"Linear regression": LinearRegression(),
"Ridge": RidgeCV(),
}
y_test_predict = dict()
for name, estimator in ESTIMATORS.items():
estimator.fit(X_train, y_train)
y_test_predict[name] = estimator.predict(X_test)
# Plot the completed faces
image_shape = (64, 64)
n_cols = 1 + len(ESTIMATORS)
plt.figure(figsize=(2. * n_cols, 2.26 * n_faces))
plt.suptitle("Face completion with multi-output estimators", size=16)
for i in range(n_faces):
true_face = np.hstack((X_test[i], y_test[i]))
# np.hstack():在水平方向上平铺
if i:
sub = plt.subplot(n_faces, n_cols, i * n_cols + 1)
else:
sub = plt.subplot(n_faces, n_cols, i * n_cols + 1,
title="true faces")
sub.axis("off")
sub.imshow(true_face.reshape(image_shape),
cmap=plt.cm.gray,
interpolation="nearest")
for j, est in enumerate(sorted(ESTIMATORS)):
completed_face = np.hstack((X_test[i], y_test_predict[est][i]))
# np.hstack():在水平方向上平铺
if i:
sub = plt.subplot(n_faces, n_cols, i * n_cols + 2 + j)
else:
sub = plt.subplot(n_faces, n_cols, i * n_cols + 2 + j,
title=est)
sub.axis("off")
sub.imshow(completed_face.reshape(image_shape),
cmap=plt.cm.gray,
interpolation="nearest")
plt.show()