吴恩达Coursera深度学习(1-2)编程练习

练习部分解释:跟网上大多数博客都不一样,因为之前我已经在notebook上做完了,再总结一遍放在博客上。

Class 1:神经网络和深度学习

Week 2:神经网络基础编程练习

目录

1、关于sigmoid的用法

  • sigmoid、sigmoid积分、reshape、normalize归一化数据、softmax
import numpy as np 

# 1、sigmoid函数
def sigmoid(x):
    s = 1.0/(1 + 1/np.exp(x))
    return s

# 2、梯度(积分)
def sigmoid_derivative(x):
    s = 1.0/(1 + 1/np.exp(x))
    ds = s*(1-s)
    return ds 

# 3、reshape (length,height,3) to (length*height*3,1)
def image2vector(image):
    v = image.reshape(image.shape[0]*image.shape[1]*image.shape[2],1)
    return v

# 4、normalize rows 归一化数据
#    归一化后,梯度下降收敛的更快
def normalizeRows(x):
    x_norm = np.linalg.norm(x,axis=1,keepdims = True) # 计算每一行的长度,得到一个列向
    x = x/x_norm
    return x 

# 5、softmax
def softmax(x):
    x_exp = np.exp(x)  #(n,m)
    x_sum = np.sum(x_exp,axis=1,keepdims=True)  #(n,1)
    s = x_exp/x_sum    #(n,m)
    return s 

if __name__=="__main__":
    x = np.array([1,2,3])
    print(sigmoid(x))
    print(sigmoid_derivative(x))

    image = np.array([[[0.6,0.3],[0.9,0.5],[0.4,0.3]],
                      [[0.9,0.2],[0.6,0.8],[0.7,0.1]],
                      [[0.3,0.4],[0.6,0.7],[0.9,0.9]]])
    print(image2vector(image))

    x = np.array([[0,3,4],[1,6,4]])
    print(normalizeRows(x))

    x = np.array([[9,2,5,0,0],[7,5,0,0,0]])
    print(softmax(x))
[ 0.73105858  0.88079708  0.95257413]
[ 0.19661193  0.10499359  0.04517666]
[[ 0.6]
 [ 0.3]
 [ 0.9]
 [ 0.5]
 [ 0.4]
 [ 0.3]
 [ 0.9]
 [ 0.2]
 [ 0.6]
 [ 0.8]
 [ 0.7]
 [ 0.1]
 [ 0.3]
 [ 0.4]
 [ 0.6]
 [ 0.7]
 [ 0.9]
 [ 0.9]]
[[ 0.          0.6         0.8       ]
 [ 0.13736056  0.82416338  0.54944226]]
[[  9.80897665e-01   8.94462891e-04   1.79657674e-02   1.21052389e-04
    1.21052389e-04]
 [  8.78679856e-01   1.18916387e-01   8.01252314e-04   8.01252314e-04
    8.01252314e-04]]

这里写图片描述

2、内积、L1/L2损失函数

import time
import numpy as np 

x1 = [9, 2, 5, 0, 0, 7, 5, 0, 0, 0, 9, 2, 5, 0, 0]
x2 = [9, 2, 2, 9, 0, 9, 2, 5, 0, 0, 9, 2, 5, 0, 0]

# 1 dot product、outer product、elementwise、general dot product

dot = 0
for i in range(len(x1)):
    dot += x1[i]*x2[i]

outer = np.zeros((len(x1),len(x2))) 
for i in range(len(x1)):
    for j in range(len(x2)):
        outer[i,j] = x1[i]*x2[j]

mul = np.zeros(len(x1))
for i in range(len(x1)):
    mul[i] = x1[i]*x2[i]

W = np.random.rand(3,len(x1)) 
gdot = np.zeros(W.shape[0])
for i in range(W.shape[0]):
    for j in range(len(x1)):
        gdot[i] += W[i,j]*x1[j]

#print(dot)
#print(outer)
#print(mul)

# 2、向量化实现
dot = np.dot(x1,x2)
outer = np.outer(x1,x2)
mul = np.multiply(x1,x2)

#print(dot)
#print(outer)
#print(mul)

def L1(yhat,y):
    loss = np.sum(np.abs(y-yhat))
    return loss
def L2(yhat,y):
    loss = np.sum(np.power((y-yhat),2))
    return loss

if __name__=="__main__":
    yhat = np.array([0.9,0.2,0.1,0.4,0.9])
    y = np.array([1,0,0,1,1])
    print(L1(yhat,y))
    print(L2(yhat,y))

1.1
0.43

3、数据集

import numpy as np
import h5py


def load_dataset():
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes

    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))

    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

if __name__=="__main__":
    train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()

    print(train_set_x_orig.shape,test_set_x_orig.shape,train_set_y.shape,test_set_y.shape,classes.shape)

    x = train_set_x_orig[25]
    print(x.shape)


这里写图片描述

4、神经网络模型

这里写图片描述

import numpy as np 
import matplotlib.pyplot as plt 
import h5py
import scipy
import pylab  
from PIL import Image
from lr_utils import load_dataset
from scipy import ndimage

# 1、下载数据集
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()

# 2、显示图像
index = 90
#plt.imshow(train_set_x_orig[index])
#pylab.show()   # 加上才能显示图片
# squeeze()函数:除去size=1的维度,(4,1,3)变成(4,3),(4,2,3)则不变
print ("y = " + str(train_set_y[:, index]) + ", it's a '" \
    + classes[np.squeeze(train_set_y[:, index])].decode("utf-8") +  "' picture.")

# 3、显示图像大小
m_train = train_set_x_orig.shape[0]
m_test  = test_set_x_orig.shape[0]
num_px  = train_set_x_orig.shape[1]

print("TrainSet:   " + str(train_set_x_orig.shape))
print("TestSet:    " + str(test_set_x_orig.shape))
print("TrainLabel: " + str(train_set_y.shape))
print("TestLabel:  " + str(test_set_y.shape))

# 4、reshape图片,且转换成列向量
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0],-1).T
test_set_x_flatten  = test_set_x_orig.reshape(test_set_x_orig.shape[0],-1).T
print(train_set_x_flatten.shape,test_set_x_flatten.shape)

# 5、标准化数据集
train_set_x = train_set_x_flatten/255
test_set_x  = test_set_x_flatten/255

# 6、
def sigmoid(z):
    s = 1.0/(1 + np.exp(-z))
    return s 

# 7、初始化参数
def initialize_zeros(dim):
    w = np.zeros([dim,1])
    b = 0
    assert(w.shape == (dim,1))
    assert(isinstance(b,float) or isinstance(b,int))
    return w,b

# 8、forward and backward propagation
# 计算损失函数、梯度
def propagate(w,b,x,y):
    '''
    function:
        implement the cost function and gradient

    Arguments:
        w --- 权重 (num_px*num_px*3, 1)
        b --- 偏置
        X --- 输入 (num_px*num_px*3, 样本数)
        Y --- 标签

    return:
        cost --- 逻辑回归的 log 损失函数
        dw   ---
        db   ---
    '''
    m = x.shape[1]
    # 前向传播
    y_   = sigmoid(np.dot(w.T,x) + b)
    cost = -(1.0/m)*np.sum(y*np.log(y_) + (1-y)*np.log(1-y_))
    # 后向传播
    dw = (1.0/m)*np.dot(x,(y_-y).T)
    db = (1.0/m)*np.sum(y_-y)

    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    grads = {"dw":dw, "db":db}

    return grads,cost

w,b,x,y = np.array([[1.],[2.]]), 2, np.array([[1,2,-1],[3,4,-3]]), np.array([[1,0,1]])
grads, cost = propagate(w,b,x,y)
print(grads,cost) 

# 9、optimization 优化算法(梯度下降)
def optimize(w, b, x, y, num_iterations, learning_rate, print_cost = False):
    '''
    rerutn: params, grads, cost
    '''
    costs = []
    for i in range(num_iterations):
        grads,cost = propagate(w,b,x,y)

        dw = grads["dw"]
        db = grads["db"]

        w = w - dw*learning_rate
        b = b - db*learning_rate

        if i % 100 == 0:
            costs.append(cost)

        if print_cost and i%100==0:
            print("cost after iteration %i : %f" %(i,cost))

    params = {"w":w, "b":b}
    grads  = {"dw":dw, "db":db}

    return params,grads,costs

params, grads, costs = optimize(w, b, x, y, num_iterations= 100, learning_rate = 0.009, print_cost = False)
print(params,grads)

# 10、预测
def predict(w,b,x):
    m = x.shape[1]
    y_p = np.zeros([1,m])
    w = w.reshape(x.shape[0],1)
    y_ = sigmoid(np.dot(w.T,x)+b)

    for i in range(y_.shape[1]):
        if y_[0,i] > 0.5:
            y_p[0,i]=1
        else:
            y_p[0,i]=0

    assert(y_p.shape == (1,m))

    return y_p

w = np.array([[0.1124579],[0.23106775]])
b = -0.3
x = np.array([[1.,-1.1,-3.2],[1.2,2.,0.1]])
print ("predictions = " + str(predict(w, b, x)))

# 11、将所有的功能合并到模型中 
def model(x_train, y_train, x_test, y_test, num_iterations=2000, learning_rate=0.5, print_cost=False):
    w,b = initialize_zeros(x_train.shape[0])
    parameters,grads,costs = optimize(w,b,x_train,y_train,num_iterations,learning_rate,print_cost)
    w = parameters["w"]
    b = parameters["b"]
    y_p_train = predict(w,b,x_train)
    y_p_test  = predict(w,b,x_test)

    print("train accuracy:{} %".format(100-np.mean(np.abs(y_p_train-y_train))*100))
    print("test accuracy:{} % ".format(100-np.mean(np.abs(y_p_test-y_test))*100))
    d = {"costs":costs,
         "y_p_train":y_p_train,
         "y_p_test":y_p_test,
         "w":w,
         "b":b,
         "learning_rate":learning_rate,
         "num_iterations":num_iterations}
    return d 

# 训练集正确率99%,测试集正确率70%,过拟合了
d = model(train_set_x,train_set_y,test_set_x,test_set_y,num_iterations=2000,learning_rate=0.005,print_cost=True)


# 12、画出代价函数和梯度
'''
costs = np.squeeze(d["costs"]) 
plt.plot(costs)
plt.ylabel("cost")
plt.xlabel("iterations(per hundreds)")
plt.title("learning rate = "+str(d["learning_rate"]))
plt.show()
'''
# 13、学习率的选择

learning_rate = [0.01,0.001,0.0001]
models = {}
for i in learning_rate:
    print("learning rate is: " + str(i))
    models[str(i)] = model(train_set_x,train_set_y,test_set_x,test_set_y,
                           num_iterations=1500,learning_rate=i,print_cost=False)

for i in learning_rate:
    plt.plot(np.squeeze(models[str(i)]["costs"]), label=str(models[str(i)]["learning_rate"]))
plt.ylabel("cost")
plt.xlabel("iteration")

legend = plt.legend(loc='upper center', shadow=True)
frame = legend.get_frame()
frame.set_facecolor('0.90')
plt.show()

'''
# 14、用自己的图像来测试
fname = 'images/other_test.jpg'
image = np.array(ndimage.imread(fname,flatten=False))
my_image = scipy.misc.imresize(image,size=(num_px,num_px)).reshape((1,num_px*num_px*3)).T
my_predicted_image = predict(d["w"],d["b"],my_image)

plt.imshow(image)
pylab.show()
print("y = " + str(np.squeeze(my_predicted_image)) + ", your algorithm predicts a \"" \
    + classes[int(np.squeeze(my_predicted_image)),].decode("utf-8") +  "\" picture.")
'''
y = [0], it's a 'non-cat' picture.
TrainSet:   (209, 64, 64, 3)
TestSet:    (50, 64, 64, 3)
TrainLabel: (1, 209)
TestLabel:  (1, 50)
(12288, 209) (12288, 50)
{'dw': array([[ 0.99772382],
       [ 2.3265089 ]]), 'db': 0.002187770724565441} 5.66894829829
{'b': 1.9241133755874054, 'w': array([[ 0.18954212],
       [ 0.16465723]])} {'dw': array([[ 0.69128662],
       [ 1.43797759]]), 'db': 0.2187345002480347}
predictions = [[ 1.  1.  0.]]
cost after iteration 0 : 0.693147
cost after iteration 100 : 0.584508
cost after iteration 200 : 0.466949
cost after iteration 300 : 0.376007
cost after iteration 400 : 0.331463
cost after iteration 500 : 0.303273
cost after iteration 600 : 0.279880
cost after iteration 700 : 0.260042
cost after iteration 800 : 0.242941
cost after iteration 900 : 0.228004
cost after iteration 1000 : 0.214820
cost after iteration 1100 : 0.203078
cost after iteration 1200 : 0.192544
cost after iteration 1300 : 0.183033
cost after iteration 1400 : 0.174399
cost after iteration 1500 : 0.166521
cost after iteration 1600 : 0.159305
cost after iteration 1700 : 0.152667
cost after iteration 1800 : 0.146542
cost after iteration 1900 : 0.140872
train accuracy:99.04306220095694 %
test accuracy:70.0 % 
learning rate is: 0.01
train accuracy:99.52153110047847 %
test accuracy:68.0 % 
learning rate is: 0.001
train accuracy:88.99521531100478 %
test accuracy:64.0 % 
learning rate is: 0.0001
train accuracy:68.42105263157895 %
test accuracy:36.0 % 

这里写图片描述

总结:
1、归一化后的数据,梯度下降收敛速度更快

2、构建学习算法结构、初始化参数、计算损失函数和梯度、梯度下降优化算法

3、学习算法的一般结构

初始化模型参数
通过最小化损失函数,学习模型参数
使用学习后的参数去预测新数据
分析结果

4、出现了过拟合,增加迭代步数,训练集正确率上升,测试集正确率下降

5、学习率的选择

学习率决定更新参数的速度
学习率太高,可能会超过最优值,
学习率太小,收敛速度慢

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值