使用的数据集链接: https://pan.baidu.com/s/1yBmG9IoZ2cqZlq3lpsQ5ZA 密码: 3ao1
一、爬取百度图片数据集
import requests
import json
base_url = "http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=" \
"result&queryWord=%E7%8C%AB&cl=2&lm=&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=&" \
"copyright=&word=%E7%8C%AB&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&" \
"force=&pn={}&rn=30&gsm=3c&1586780971519="
referer = "http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=&st=-1&fm=result&" \
"fr=&sf=1&fmq=1586774219869_R&pv=&ic=&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&" \
"height=&face=0&istype=2&ie=utf-8&sid=&word=%E7%8C%AB"
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/80.0.3987.106 Safari/537.36", "Referer":referer}
index = 1
for i in range(1, 16):
url = base_url.format(str(i * 30))
response_1 = requests.get(url)
js_data = json.loads(response_1.text)
for j in range(0, 30):
img_url = js_data["data"][j]["thumbURL"]
print(img_url, index, sep=" ")
response_2 = requests.get(img_url, headers=header)
if index == 430:
print(response_2.status_code)
if response_2.status_code == 200:
with open("/home/yan/jupyter_notebook_file/Neural_Network/baidu_image_cat_dataset/cat_" + str(index) + ".jpg",
'wb') as f:
f.write(response_2.content)
f.close()
index += 1
else:
print("Warning:403")
二、制作h5数据集
功能
-
前提:准备好若干图片样本,文件名格式为[0, 1]_index.jpg, 文件名首位为1为正例,0为反例
-
图片转换为numpy数组,获取正例反例数num_of_positive、num_of_negative, 生成标签数组
-
打乱X、Y数组内部顺序
-
按照7 / 3划分为训练集、测试集
-
保存为train_catvnoncat.h5、test_catvnoncat.h5
import numpy as np
import h5py
from skimage.io import imread
from PIL import Image
import os
base_path = "/home/yan/jupyter_notebook_file/Neural_Network/baidu_image_cat_dataset/{tag}_{index}.jpg"
px = 64
dataset_x = []
for i in range(2):
index = 1
while os.path.exists(base_path.format(tag = i, index = index)):
img_ary = imread(base_path.format(tag = i, index = index))
img_ary = np.array(Image.fromarray(img_ary).resize((px, px)))
dataset_x.append(img_ary)
index += 1
if i == 0:
num_of_negatives = len(dataset_x)
dataset_y = np.hstack((np.zeros(num_of_negatives), np.ones(len(dataset_x) - num_of_negatives)))
dataset_x = np.array(dataset_x)
np.random.seed(0)
np.random.shuffle(dataset_x)
np.random.seed(0)
np.random.shuffle(dataset_y)
(train_set_x, test_set_x) = np.vsplit(dataset_x, [(int)(0.7 * dataset_x.shape[0])])
(train_set_y, test_set_y) = np.hsplit(dataset_y, [(int)(0.7 * dataset_y.shape[0])])
f = h5py.File("/home/yan/jupyter_notebook_file/Neural_Network/baidu_image_cat_dataset/train_catvnoncat.h5", 'w')
f.create_dataset("train_set_x", data = train_set_x)
f.create_dataset("train_set_y", data = train_set_y)
f.close()
f = h5py.File("/home/yan/jupyter_notebook_file/Neural_Network/baidu_image_cat_dataset/test_catvnoncat.h5", 'w')
f.create_dataset("test_set_x", data = test_set_x)
f.create_dataset("test_set_y", data = test_set_y)
f.close()
三、训练模型
import numpy as np
import matplotlib.pyplot as plt
import h5py
import time
get_dataset(path)
def get_dataset(path):
train_dataset = h5py.File(path.format("train"), 'r')
train_x = np.array(train_dataset["train_set_x"]) #注意转为数组
train_x = (train_x.reshape(train_x.shape[0], -1).T - 128.0) / 256.0 #注意特征平铺、特征缩放
train_y = np.array(train_dataset["train_set_y"])
train_y = train_y.reshape(1, -1)
train_dataset.close()
test_dataset = h5py.File(path.format("test"), 'r')
test_x = np.array(test_dataset["test_set_x"])
test_img = np.array(test_dataset["test_set_x"])
test_x = (test_x.reshape(test_x.shape[0], -1).T - 128.0) / 256.0
test_y = np.array(test_dataset["test_set_y"])
test_y = test_y.reshape(1, -1)
test_dataset.close()
return train_x, train_y, test_x, test_y, test_img
init_parameters(num_of_units, M)
def init_parameters(num_of_units, M):
W = {}
B = {}
for i in range(1, len(num_of_units)):
np.random.seed(0)
W[str(i)] = np.random.randn(num_of_units[i], num_of_units[i - 1]) / np.sqrt(num_of_units[i - 1])
B[str(i)] = np.zeros((num_of_units[i], 1))
L = len(num_of_units) - 1
return W, B, L, M
forward_propagation(A, W, B, activation_function)
def forward_propagation(A_pre, W, B, activation_function):
Z = W @ A_pre + B
if activation_function == "sigmoid":
return 1 / (1 + np.exp(-Z))
elif activation_function == "tanh":
return np.tanh(Z)
elif activation_function == "relu":
return np.maximum(0, Z)
else:
return np.maximum(0.01 * Z, Z)
backward_propagation(dA, A, A_pre, W, M, activation_function)
def backward_propagation(dA, A, A_pre, W, M, activation_function):
if activation_function == "sigmoid":
dZ = dA * A * (1 - A)
elif activation_function == "tanh":
dZ = dA * (1 - A * A)
elif activation_function == "relu":
dZ = dA * np.clip((A> 0), 0, 1)
else:
dZ = dA * np.clip((A > 0), 0.01, 1)
dW = dZ @ A_pre.T / M
dB = dZ.sum(axis=1, keepdims=True) / M
dA_pre = W.T @ dZ
return dW, dB, dA_pre
gradient_descent(x, y, W, B, M, L)
def gradient_descent(x, y, W, B, L, M, activation, learning_rate):
A = {}
dW = {}
dB = {}
A["0"] = x # Forward_propagation
for i in range(1, L + 1):
A[str(i)] = forward_propagation(A[str(i - 1)], W[str(i)], B[str(i)],
"sigmoid" if i == L else activation)
dA = -1 * y / A[str(L)] + (1 - y) / (1 - A[str(L)]) #Backward_propafation
for i in range(L, 0, -1):
dW[str(i)], dB[str(i)], dA = backward_propagation(dA, A[str(i)], A[str(i - 1)], W[str(i)], M,
"sigmoid" if i == L else activation)
for i in range(1, L + 1): # Update
W[str(i)] -= learning_rate * dW[str(i)]
B[str(i)] -= learning_rate * dB[str(i)]
return A[str(L)]
predict(x, y, W, B, L, M, imgs)
def predict(x, y, W, B, L, M, imgs, activation):
p = x
for i in range(1, L + 1):
p = forward_propagation(p, W[str(i)], B[str(i)], "sigmoid" if i == L else activation)
p = (p > 0.5) * 1
print("Testing Set Accuracy :" + str(round((1 - np.abs(p - y).sum() / M) * 100, 2)) +
"\n---------------------------------------------------------" +
"\nMispredicted Samples:")
error_indices = ((p + y).squeeze() == 1).nonzero()
error_imgs = imgs[error_indices]
error_p = p.T[error_indices]
error_y = y.T[error_indices]
fig = plt.figure(dpi = 800)
for i in range(error_imgs.shape[0]):
ax = fig.add_subplot(error_imgs.shape[0] / 5 + 1, 5, i + 1)
ax.axis("off")
ax.set_title("label:" + ("cat" if error_y[i] else "noncat") +
"\npredict:" + ("cat" if error_p[i] else "noncat"), fontsize = 4, y = 0.8)
ax.imshow(error_imgs[i])
plt.subplots_adjust(wspace = 0, hspace = 1.5)
plt.show()
model(hyperpram, train_x, train_y, test_x, test_y, test_img)
def model(hyperpram, train_x, train_y, test_x, test_y, test_img):
print("#########################################################"
"\nNetwork Structure: " + str(hyperpram["num_of_units"]) +
"\nIteration Simes: " + str(hyperpram["iteration_times"]) +
"\nActivation Function: " + hyperpram["activation_function"] +
"\nLearning Rate: " + str(hyperpram["learning_rate"]) +
"\n********************************************************" +
"\nParameters initiated successfully.\nTrainning start...")
W, B, L, M = init_parameters(hyperpram["num_of_units"], train_x.shape[1])
costs = []
start_time = time.time()
for iteration in range(hyperpram["iteration_times"]):
p = gradient_descent(train_x, train_y, W, B, L, M, hyperpram["activation_function"],
hyperpram["learning_rate"])
if iteration % 10 == 0:
costs.append((train_y @ np.log(p.T) + (1 - train_y) @ np.log(1 - p.T)).squeeze() / -M)
end_time = time.time()
print("Trainning over.Runing time: " + str(round(end_time - start_time, 2)) + "seconds")
predict(test_x, test_y, W, B, L, M, test_img, hyperpram["activation_function"])
result = {"W":W, "B":B, "costs":costs}
return result
plot_learning_curve(structure, hyperprams, models)
def plot_learning_curve(structure, hyperprams, models):
fig = plt.figure(dpi = 240)
ax = fig.add_subplot(111)
ax.set_title("Network Structure:" + str(structure))
ax.set_xlabel("iterations")
ax.set_ylabel("cost")
ax.axis([-100, hyperprams[0]["iteration_times"], 0, 1.3])
for i in range(len(models)):
ax.plot(np.arange(0, hyperprams[i]["iteration_times"], 10), models[i]["costs"],
label = "Actication Function: " + hyperprams[i]["activation_function"] +
" | Learning Rate: " + str(hyperprams[i]["learning_rate"]))
plt.legend(bbox_to_anchor = (1, 0), loc = 3)
plt.show()
主函数
path = "/home/yan/jupyter_notebook_file/spider/baidu_image_cat_dataset/{}_catvnoncat.h5"
train_x, train_y, test_x, test_y, test_img = get_dataset(path)
iteration_times = 3000
num_of_units = [train_x.shape[0], 64, 32, 16, 1]
hyperprams = [{"num_of_units":num_of_units, "iteration_times":iteration_times,
"activation_function":"sigmoid", "learning_rate":0.1},
{"num_of_units":num_of_units, "iteration_times":iteration_times,
"activation_function":"tanh", "learning_rate":0.008},
{"num_of_units":num_of_units, "iteration_times":iteration_times,
"activation_function":"relu", "learning_rate":0.03},
{"num_of_units":num_of_units, "iteration_times":iteration_times,
"activation_function":"leaky_relu", "learning_rate":0.03}]
models = []
for hyperpram in hyperprams:
models.append(model(hyperpram, train_x, train_y, test_x, test_y, test_img))
plot_learning_curve(num_of_units, hyperprams, models)
四、输出
#########################################################
Network Structure: [12288, 64, 32, 16, 1]
Iteration Simes: 3000
Activation Function: sigmoid
Learning Rate: 0.1
********************************************************
Parameters initiated successfully.
Trainning start...
Trainning over.Runing time: 240.5seconds
Testing Set Accuracy :87.85
---------------------------------------------------------
Mispredicted Samples:
#########################################################
Network Structure: [12288, 64, 32, 16, 1]
Iteration Simes: 3000
Activation Function: tanh
Learning Rate: 0.008
********************************************************
Parameters initiated successfully.
Trainning start...
Trainning over.Runing time: 195.33seconds
Testing Set Accuracy :86.58
---------------------------------------------------------
Mispredicted Samples:
#########################################################
Network Structure: [12288, 64, 32, 16, 1]
Iteration Simes: 3000
Activation Function: relu
Learning Rate: 0.03
********************************************************
Parameters initiated successfully.
Trainning start...
Trainning over.Runing time: 190.94seconds
Testing Set Accuracy :89.37
---------------------------------------------------------
Mispredicted Samples:
#########################################################
Network Structure: [12288, 64, 32, 16, 1]
Iteration Simes: 3000
Activation Function: leaky_relu
Learning Rate: 0.03
********************************************************
Parameters initiated successfully.
Trainning start...
Trainning over.Runing time: 192.64seconds
Testing Set Accuracy :89.37
---------------------------------------------------------
Mispredicted Samples:
五、超参调试
1、使用特定激活函数时,代价不收敛:
(1)检查激活函数是否正确实现
(2)检查参数初始化是否合理
2、训练集欠拟合:
(1)尝试复杂的网络框架
(2)尝试更多梯度下降迭代次数
3、验证集过拟合:
(1)优化、丰富训练集
(2)降低网络模型复杂度
(3)正则化
(4)Drop out
(5)观察拟合情况,选择适当的迭代伦次(Early Stopping)。
(6)选择更接近线性的激活函数,如:relu activation function(蛋同时需要注意避免梯度爆炸)。
W[str(i)] = np.random.randn(num_of_units[i], num_of_units[i - 1]) * np.sqrt(1 / num_of_units[i - 1])
# 或
W[str(i)] = np.random.randn(num_of_units[i], num_of_units[i - 1]) * np.sqrt(2 / num_of_units[i - 1])
# 或
W[str(i)] = np.random.randn(num_of_units[i], num_of_units[i - 1]) * np.sqrt(2 / num_of_units[i - 1] + num_of_units[i])
# 效果近似于将W[str(i)]的方差设置为2 / num_of_units[i - 1]或1 / num_of_units[i - 1]
# 对于激活函数选择relu的网络,2 / num_of_units[i - 1]效果较好
# 对于激活函数选择tanh的网络, 1 / num_of_units[i - 1]效果较好,即Xavier Initialition。