- 交叉验证
在训练样本时候,通常会采用一种叫交叉验证的方法,就是将一份数据分为5份(也可以是其他份),然后遍历每一份做测试集,剩下的数据做训练集,这样,一份数据就可以被利用5次,增加了数据的利用率。
下面是代码
#将数据分成num_fold份
X_train_folds =np.array_split(X_train,num_folds)
Y_train_folds = np.array_split(y_train,num_folds)
k_to_accuracies = {}
for k in k_choices:
k_to_accuracies[k] = []
for i in range(num_folds):
X_val = X_train_folds[i]
Y_val = Y_train_folds[i]
X_train = np.vstack(X_train_folds[:i] + X_train_folds[i + 1:])
Y_train = np.hstack(Y_train_folds[:i] + Y_train_folds[i + 1:])
#vatack和hstack这两个函数的使用是关键
classifier.train(X_train, y_train)
y_pred = classifier.predict(X_val, k, 0)
num_correct = np.sum(y_pred == Y_val)
accuracy = float(num_correct) / len(Y_val)
k_to_accuracies[k].append(accuracy)
2、图像的均值化预处理
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image
3.讲wx+b 变为WX
# add bias dimension and transform into columns
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])
4.将一张彩色图片变为向量化
# Preprocessing: reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))