题目:
输入图片,判断是否是猫。是猫输出y=1,否则y=0。
数据集:
- train_catvnoncat.h5:
训练集,维度为:(209,64,64,3),即209张64*64的彩色图片 - test_catvnoncat.h5:
测试集,维度为:(50,64,64,3),即50张64*64的彩色图片
代码编写:
- 加载数据集:
h5和字典类似,通过key去取对应的value。
train_catvnoncat.h5有三个key,分别是list_classes(类别)、train_set_x(图片集合)、 train_set_y(标签)
需要对图片做的处理是将(64,64 ,3)的图片平铺成一维数组,作为模型的输入。
#加载和处理数据
def load_data():
#获取原始训练集数据
train_data_org = h5py.File('train_catvnoncat.h5')
test_data_org = h5py.File('test_catvnoncat.h5')
#h5文件类似字典类型,通过key去获取value
for key in train_data_org.keys():
print(key)
'''
输出:
list_classes
train_set_x
train_set_y
'''
#获取训练集数据
train_x_org = train_data_org["train_set_x"]
train_y_org = train_data_org["train_set_y"]
#获取测试集数据
test_x_org = test_data_org["test_set_x"]
test_y_org = test_data_org["test_set_y"]
#维度和样本数量
#feature维度
n = np.shape(train_x_org)[1]*np.shape(train_x_org)[2]*np.shape(train_x_org)[3]
#训练集样本数
m_train = np.shape(train_x_org)[0]
#测试集样本数
m_test = np.shape(test_x_org)[0]
#将标签平坦成一维数组
train_y = np.array(train_y_org).reshape(1,m_train)
test_y = np.array(test_y_org).reshape(1,m_test)
'''
千万注意下面维度的次序是m_train,-1,别整反了
'''
#x数据集是64x*64*3*m的图片数组,将数据集转成12288*m的图片数组
train_x = np.array(train_x_org).reshape(m_train,-1).T
test_x = np.array(test_x_org).reshape(m_test, -1).T
return train_x_org,test_x_org,train_x,test_x,train_y,test_y
- 定义激活函数和初始化函数
LR采用的是sigmoid函数,将参数w和b初始化为0
#激活函数
def sigmoid(z):
return 1/(1+np.exp(-z))
#初始化函数
def init_parameter(dim):
#权重和偏置初始化为0
w = np.zeros((dim,1))
b = 0
return (w,b)
- 传播与优化
这一步要做的就是实现下面的式子,z是sigmoid的输入,A是sigmoid的输出。dw和db是下降的梯度值。
def propagate(x,y,w,b):
#正向传播
#计算激活函数输入
z = np.dot(w.T,x) + b#z的维度:(1,m)
#计算激活函数输出
a = sigmoid(z)
m = np.shape(x)[1]
#计算代价函数
cost = (-1/m)*np.sum((y*np.log(a)+(1-y)*np.log(1-a)))
#反向传播
#预测值-真实值
dz = a - y
#计算梯度值
dw = (1/m)*np.dot(x,dz.T)#dw的维度:(n,1)
db = (1/m)*np.sum(dz)
#创建字典保存梯度
gradient = {"dw":dw,"db":db}
return gradient,cost
'''
iterations:迭代次数
alpha:学习率
'''
def optimize(x,y,w,b,iterations,alpha):
#记录代价函数值
costs = []
#迭代iterations次
for i in range(iterations):
#获取梯度值和代价函数值
gradient,cost = propagate(x,y,w,b)
dw = gradient["dw"]
db = gradient["db"]
#梯度下降更新参数
w = w - alpha*dw
b = b - alpha*db
#每迭代100次记录代价函数
if i%100 == 0:
costs.append(cost)
print("迭代次数",i,",erro:",cost)
#字典保存更新后的参数
parameter = {"w":w,"b":b}
return (parameter,costs)
- 预测函数
LR输出的是概率密度函数值,大于0.5当成猫y=1,小于0.5则y=0
def predict(x,w,b):
#预测其实就是正向传播的过程
#计算激活函数输入
z = np.dot(w.T,x) + b#z的维度:(1,m)
#计算激活函数输出
a = sigmoid(z)
for i in range(np.shape(a)[1]):
if a[0,i]>0.5:
a[0,i]=1
else:
a[0,i]=0
return a
- 模型整合
初始化参数w和b→梯度下降进行优化→获取参数w和b→测试集预测
#模型组装
def LR_model(train_x,train_y,test_x,test_y,iterations,alpha):
#初始化参数
w,b = init_parameter(np.shape(train_x)[0])
#梯度下降优化
parameter,costs = optimize(train_x,train_y,w,b,iterations,alpha)
w,b = parameter["w"],parameter["b"]
#测试
y_pred_test = predict(test_x,w,b)
y_pred_train = predict(train_x,w,b)
#准确率
print('训练集准确率:',100*(1-np.mean(np.abs(y_pred_train-train_y))),"%")
print('测试集准确率:',100*(1-np.mean(np.abs(y_pred_test-test_y))),"%")
result = {"w":w,"b":b,"costs":costs,"y_pred_test":y_pred_test,"y_pred_train":y_pred_train}
return result
- 模型调用与实验结果
train_x_org,test_x_org,train_x,test_x,train_y,test_y = load_data()
#中心化
train_x,test_x = train_x/255,test_x/255
#plt.imshow(train_x_org[0])
result = LR_model(train_x,train_y,test_x,test_y,2000,0.005)
实验结果:
7. 新图测试:
#获取参数
w,b = result['w'],result['b']
#载入新图片
new_cat = plt.imread('new_cat.jpeg')
from skimage import transform
#调整成64*64*3的大小
new_cat = transform.resize(new_cat,(64,64))
#显示图片
plt.imshow(new_cat)
#预测
predict(new_cat.reshape(64*64*3,1),w,b)
结果:
完整代码
import numpy as np
import h5py
import matplotlib.pyplot as plt
%matplotlib inline
#加载和处理数据
def load_data():
#获取原始训练集数据
train_data_org = h5py.File('train_catvnoncat.h5')
test_data_org = h5py.File('test_catvnoncat.h5')
#h5文件类似字典类型,通过key去获取value
for key in train_data_org.keys():
print(key)
'''
输出:
list_classes
train_set_x
train_set_y
'''
#获取训练集数据
train_x_org = train_data_org["train_set_x"]
train_y_org = train_data_org["train_set_y"]
#获取测试集数据
test_x_org = test_data_org["test_set_x"]
test_y_org = test_data_org["test_set_y"]
#维度和样本数量
#feature维度
n = np.shape(train_x_org)[1]*np.shape(train_x_org)[2]*np.shape(train_x_org)[3]
#训练集样本数
m_train = np.shape(train_x_org)[0]
#测试集样本数
m_test = np.shape(test_x_org)[0]
print(m_train,m_test)
#将标签平坦成一维数组
train_y = np.array(train_y_org).reshape(1,m_train)
test_y = np.array(test_y_org).reshape(1,m_test)
#x数据集是64x*64*3*m的图片数组,将数据集转成12288*m的图片数组
train_x = np.array(train_x_org).reshape(m_train,-1).T
#reshape(n,m_train)
test_x = np.array(test_x_org).reshape(m_test, -1).T
#reshape(n,m_test)
return train_x_org,test_x_org,train_x,test_x,train_y,test_y
#激活函数
def sigmoid(z):
return 1/(1+np.exp(-z))
#初始化函数
def init_parameter(dim):
#权重和偏置初始化为0
w = np.zeros((dim,1))
b = 0
return (w,b)
def propagate(x,y,w,b):
#正向传播
#计算激活函数输入
z = np.dot(w.T,x) + b#z的维度:(1,m)
#计算激活函数输出
a = sigmoid(z)
m = np.shape(x)[1]
#计算代价函数
cost = (-1/m)*np.sum((y*np.log(a)+(1-y)*np.log(1-a)))
#反向传播
#预测值-真实值
dz = a - y
#计算梯度值
dw = (1/m)*np.dot(x,dz.T)#dw的维度:(n,1)
db = (1/m)*np.sum(dz)
#创建字典保存梯度
gradient = {"dw":dw,"db":db}
return gradient,cost
'''
iterations:迭代次数
alpha:学习率
'''
def optimize(x,y,w,b,iterations,alpha):
#记录代价函数值
costs = []
#迭代iterations次
for i in range(iterations):
#获取梯度值和代价函数值
gradient,cost = propagate(x,y,w,b)
dw = gradient["dw"]
db = gradient["db"]
#梯度下降更新参数
w = w - alpha*dw
b = b - alpha*db
#每迭代100次记录代价函数
if i%100 == 0:
costs.append(cost)
print("迭代次数",i,",erro:",cost)
#字典保存更新后的参数
parameter = {"w":w,"b":b}
#return不要包在for循环里了
return (parameter,costs)
def predict(x,w,b):
#预测其实就是正向传播的过程
#计算激活函数输入
z = np.dot(w.T,x) + b#z的维度:(1,m)
#计算激活函数输出
a = sigmoid(z)
for i in range(np.shape(a)[1]):
if a[0,i]>0.5:
a[0,i]=1
else:
a[0,i]=0
return a
#模型组装
def LR_model(train_x,train_y,test_x,test_y,iterations,alpha):
#初始化参数
w,b = init_parameter(np.shape(train_x)[0])
#梯度下降优化
parameter,costs = optimize(train_x,train_y,w,b,iterations,alpha)
w,b = parameter["w"],parameter["b"]
#测试
y_pred_test = predict(test_x,w,b)
y_pred_train = predict(train_x,w,b)
#准确率
print('训练集准确率:',100*(1-np.mean(np.abs(y_pred_train-train_y))),"%")
print('测试集准确率:',100*(1-np.mean(np.abs(y_pred_test-test_y))),"%")
result = {"w":w,"b":b,"costs":costs,"y_pred_test":y_pred_test,"y_pred_train":y_pred_train}
return result
train_x_org,test_x_org,train_x,test_x,train_y,test_y = load_data()
train_x,test_x = train_x/255,test_x/255
#plt.imshow(train_x_org[0])
result = LR_model(train_x,train_y,test_x,test_y,2000,0.005)