第一步:安装包
numpy
matplotliv
h5py
scipy
pillow
常用公式:
#查看有什么包
conda list
#导入数据
import h5py
#训练数据
train_data=h5py.File('D:\\Learning\\WED\\train_catvnoncat.h5','r')
test_data=h5py.File('D:\\Learning\\WED\\test_catvnoncat.h5','r')
#查看关键字
for key in train_data.keys():
print(key)
#输出
#list_classes :是猫/不是猫
#train_set_x :输入的图片
#train_set_y :输入图片的分类:是或者不是
#查看train_data中train_set_x的维度
train_data['train_set_x'].shape
#输出
#(209, 64, 64, 3):209张图片,图片的三个通道(64,64,3)
#查看train_data中train_set_y的维度
train_data['train_set_y'].shape
#输出
#(209,):209个是不是猫的标签
#取出训练集和测试集
#[:]表示取出数据集train_data中特征值train_set_x的所有行所有列
train_data_org = train_data['train_set_x'][:]
train_labels_org = train_data['train_set_y'][:]
test_data_org = test_data['test_set_x'][:]
test_labels_org = test_data['test_set_y'][:]
#查看图片
import matplotlib.pyplot as plt
%matplotlib inline
#显示图片
plt.imshow(train_data_org[148])
#取出训练集和测试集
train_data_org = train_data['train_set_x'][:]
train_labels_org = train_data['train_set_y'][:]
test_data_org = test_data['test_set_x'][:]
test_labels_org = test_data['test_set_y'][:]
#查看图片
import matplotlib.pyplot as plt
%matplotlib inline
#显示图片
plt.imshow(train_data_org[130])
#数据维度的处理
m_train = train_data_org.shape[0]
m_test = test_data_org.shape[0]
train_data_tran = train_data_org.reshape(m_train,-1).T
test_data_tran = test_data_org.reshape(m_test,-1).T
print(train_data_tran.shape,test_data_tran.shape)
#导入numpy作矩阵处理
import numpy as np
train_labels_tran = train_labels_org[np.newaxis,:]
test_labels_tran = test_labels_org[np.newaxis,:]
#标准化数据到0-1范围内
train_data_sta = train_data_tran / 255
test_data_sta = test_data_tran / 255
#定义sigmoid函数
def sigmoid(z):
a = 1 / (1 + np.exp(-z))
return a
#初始化参数
n_dim = train_data_sta.shape[0]
w = np.zeros((n_dim,1))
b = 0
#定义前向传播函数,代价函数以及梯度下降
def propagate(w,b,X,y):
#前向传播函数
Z = np.dot(w.T,X) + b
A = sigmoid(Z)
#代价函数
m = X.shape[1]
J = -1 / m * np.sum(y * np.log(A) + (1-y) * np.log(1 - A))
#梯度下降
dw = 1 / m * np.dot(X,(A - y).T)
db = 1 / m * np.sum(A - y)
#dw和db放到字典
grands = {'dw':dw,'db':db}
return grands,J
#优化部分
def optimize(w,b,X,y,alpha,n_iters):
costs = []
for i in range(n_iters):
grands,J = propagate(w,b,X,y)
dw = grands['dw']
db = grands['db']
w = w - alpha * dw
b = b=alpha * db
if i % 100 == 0:
costs.append(J)
print('n_iters is ',i,'costs is ',J)
grands = {'dw':dw,'db':db}
params = {'w':w,'b':b}
return grands,params,costs
#预测部分
def predict(w,b,X_test):
Z = np.dot(w.T,X_test) + b
A = sigmoid(Z)
m = X_test.shape[1]
y_pred = np.zeros((1,m))
for i in range(m):
if A[:,i] > 0.5:
y_pred[:,i] = 1
else:
y_pred[:,i] = 0
return y_pred
#模型整合
def model(w,b,X_train,y_train,X_test,y_test,alpha,n_iters):
grands,params,costs = optimize(w,b,X_train,y_train,alpha,n_iters)
w = params['w']
b = params['b']
y_pred_train = predict(w,b,X_train)
y_pred_test = predict(w,b,X_test)
print('the train acc is',np.mean( y_pred_train == y_train)*100,'%')
print('the test acc is',np.mean( y_pred_test == y_test)*100,'%')
b={
'w' : w,
'b' : b,
'y_pred_train' : y_pred_train,
'y_pred_test' : y_pred_test,
'alpha' : alpha
}
return b
b = model(w,b,train_data_sta,train_labels_tran,test_data_sta,test_labels_tran,alpha = 0.005,n_iters = 2000)