1.数据处理
类似图片等并不是能直接使用的数据,故而训练模型之前需要对数据进行必须的处理。
#读取文件
train_data=h5py.File('此处写文件所在','r')
test_data=h5py.File('此处写文件所在','r')
#处理数据
train_data_org=train_data['train_set_x'][:]
train_lable_org=train_data['train_set_y'][:]
test_data_org=test_data['train_set_x'][:]
test_lable_org=test_data['train_set_y'][:]
m_train=train_data_org.shape[0]
m_test=test_data_org.shape[0]
train_data_tran=train_data_org.reshape(m_train,-1).T
test_data_tran=test_data_org.reshape(m_test,-1).T
train_lable_tran=train_lable_org[np.newaxis,:]
test_lable_tran=test_lable_org[np.newaxis,:]
train_data_sta=train_data_tran/255
test_data_sta=test_data_tran/255
#数据标准化时,使每个数据均除以数据最大值减去最小值
n_dim=train_data_sta.shape[0]
w=np.zeros((n_dim,1))
b=0
上述代码看着较为繁杂主要是将数据集分为了训练集,测试集,其实实质上都是重复的操作。
2激活函数
激活函数可以自由选择,大部分简单的神经网络都是用sigmoid函数,其最终目的即将整个数据集映射在一个范围内。
def sigmoid(z):
a=1/(1+np.exp(-z))
return a
3.前向传播与反向传播(梯度下降)
下式中a大于0.5则正结果,小于0.5则负结果,例如识别物体时正结果为是该物体,负结果则不是。
损失函数公式 :得到使用当前w与b的误差。
反向传播公式:更新w与b。
def propagate(w,b,x,y):
z=np.dot(w.T,x)+b
a=sigmoid(z)
m=x.shape[1]
j=-1/m*np.sum(y*np.log(a)+(1-y)*np.log(1-a))
dw=1/m*np.dot(x,(a-y).T)
db=1/m*np.sum(a-y)
grands={'dw':dw,'db':db}
return grands,j
4.优化函数
当print_cost为True时每隔100次打印一次误差。
def optimize(w,b,x,y,alpha,n_iters,print_cost):
costs=[]
for i in range(n_iters):
grands,j=propagate(w,b,x,y)
dw=grands['dw']
db=grands['db']
#更新w与b
w=w-alpha*dw
b=b-alpha*db
if i%100==0:
costs.append(j)
if print_cost==True:
print('n_iters is',i,'cost is',j)
grands={'dw':dw,'db':db}
params={'w':w,'b':b}
return grands,params,costs
5.预测函数
def predict(w,b,x_test):
z=np.dot(w.T,x_test)+b
a=sigmoid(z)
m=x_test.shape[1]
y_pred=np.zeros((1,m))
for i in range(m):
if a[:,i]>0.5:
y_pred[:,i]=1
else:
y_pred[:,i]=0
return y_pred
6.模型主体
def model(w,b,x_train,y_train,x_test,y_test,alpha,n_iters,print_cost):
grands,params,costs=optimize(w,b,x_train,y_train,alpha,n_iters,print_cost)
w=params['w']
b=params['b']
y_pred_train=predict(w,b,x_train)
y_pred_test=predict(w,b,x_test)
print('the train acc is',np.mean(y_pred_train==y_train)*100,'%')
print('the test acc is',np.mean(y_pred_test==y_test)*100,'%')
d={
'w':w,
'b':b,
'costs':costs,
'y_pred_train':y_pred_train,
'y_pred_test':y_pred_test,
'alpha':alpha
}
return d
7.全部代码
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
%matplotlib inline
#A.处理数据
train_data=h5py.File('文件位置','r')
test_data=h5py.File('文件位置(记得用\分割)','r')
for key in test_data.keys():
print(key)
train_data_org=train_data['train_set_x'][:]
print(train_data_org.shape)
train_lable_org=train_data['train_set_y'][:]
print(train_lable_org.shape)
test_data_org=test_data['train_set_x'][:]
test_lable_org=test_data['train_set_y'][:]
plt.imshow(train_data_org[148])
print(train_lable_org[148])
m_train=train_data_org.shape[0]
m_test=test_data_org.shape[0]
train_data_tran=train_data_org.reshape(m_train,-1).T
test_data_tran=test_data_org.reshape(m_test,-1).T
train_lable_tran=train_lable_org[np.newaxis,:]
test_lable_tran=test_lable_org[np.newaxis,:]
train_data_sta=train_data_tran/255
test_data_sta=test_data_tran/255
#数据标准化时,使每个数据均除以数据最大值减去最小值
print(train_lable_tran.shape)
print(train_data_sta.shape)
#B.激活函数
def sigmoid(z):
a=1/(1+np.exp(-z))
return a
n_dim=train_data_sta.shape[0]
w=np.zeros((n_dim,1))
b=0
print(w.shape)
#C.前向传播与后向传播
def propagate(w,b,x,y):
z=np.dot(w.T,x)+b
a=sigmoid(z)
m=x.shape[1]
j=-1/m*np.sum(y*np.log(a)+(1-y)*np.log(1-a))
dw=1/m*np.dot(x,(a-y).T)
db=1/m*np.sum(a-y)
grands={'dw':dw,'db':db}
return grands,j
#D.优化函数
def optimize(w,b,x,y,alpha,n_iters,print_cost):
costs=[]
for i in range(n_iters):
grands,j=propagate(w,b,x,y)
dw=grands['dw']
db=grands['db']
w=w-alpha*dw
b=b-alpha*db
if i%100==0:
costs.append(j)
if print_cost==True:
print('n_iters is',i,'cost is',j)
grands={'dw':dw,'db':db}
params={'w':w,'b':b}
return grands,params,costs
#E.预测函数
def predict(w,b,x_test):
z=np.dot(w.T,x_test)+b
a=sigmoid(z)
m=x_test.shape[1]
y_pred=np.zeros((1,m))
for i in range(m):
if a[:,i]>0.5:
y_pred[:,i]=1
else:
y_pred[:,i]=0
return y_pred
#F.模型主体
def model(w,b,x_train,y_train,x_test,y_test,alpha,n_iters,print_cost):
grands,params,costs=optimize(w,b,x_train,y_train,alpha,n_iters,print_cost)
w=params['w']
b=params['b']
y_pred_train=predict(w,b,x_train)
y_pred_test=predict(w,b,x_test)
print('the train acc is',np.mean(y_pred_train==y_train)*100,'%')
print('the test acc is',np.mean(y_pred_test==y_test)*100,'%')
d={
'w':w,
'b':b,
'costs':costs,
'y_pred_train':y_pred_train,
'y_pred_test':y_pred_test,
'alpha':alpha
}
return d
d=model(w,b,train_data_sta,train_lable_tran,test_data_sta,test_lable_tran,alpha=0.005,n_iters=2000,print_cost=False)
plt.plot(d['costs'])
plt.xlabel('per hundred iters')
plt.ylabel('cost')
index=18
print(test_data_tran.shape)
print('y is',test_lable_tran[0,index])
print('y_prediction is',int(d['y_pred_test'][0,index]))
plt.imshow(test_data_org[index])
alphas=[0.01,0.001,0.0001]
for i in alphas:
print('alpha=',i)
d=model(w,b,train_data_sta,train_lable_tran,test_data_sta,test_lable_tran,alpha=i,n_iters=2000,print_cost=False)
print('--------------------------------------')
plt.plot(d['costs'],label=str(i))
plt.xlabel('per hundred iters')
plt.ylabel('cost')
plt.legend()
重点就是贴出的几个公式,其余部分较为简单,因为用jupyter写的,代码注释就不贴了,有疑问或者需要数据集可以私信我。