吴恩达的深度学习编程作业（1-2）- Python Basics with Numpy & Logistic Regression with a Neural Network mindset

最新推荐文章于 2023-12-06 21:37:47 发布

ChanJianWang

最新推荐文章于 2023-12-06 21:37:47 发布

阅读量245

点赞数 1

本文链接：https://blog.csdn.net/wcj1016651365/article/details/87950927

版权

第一个神经网络算法 Logistic Regression

参考：https://blog.csdn.net/koala_tree/article/details/78057033

全代码自用

Logistic Regression.py

理清楚这几个东西：

x_orig（m, num_px, num_px, 3） m是x的个数

x (num_px * num_px * 3, m) reshape后

w ( num_px * num_px * 3, 1 ) 因为还要转置与x相乘

y (1, m ) 预测的y

import numpy as np
import matplotlib.pyplot as plt
import h5py   #与存储在h5文件中的数据集 交互的包
import scipy  #用来测试自己的图片
from PIL import Image  #用来测试自己的图片
from scipy import ndimage
from lr_utils import load_dataset
from numpy_basic import sigmoid

#一、准备好数据集
train_x_orig, train_y, test_x_orig, test_y, classes = load_dataset()

# # Example of a picture
# index = 50
# plt.imshow(train_x_orig[index])
# # plt.show()
# print("y= " +str(train_y[:,index])+",it's a '" + classes[np.squeeze(train_y[:,index])].decode("utf-8")+"' picture.")


#1、Figure out the dimensions and shapes of data
# note: train_x_orig is a numpy-array of shape (m_train, num_px, num_px, 3)
m_train = train_x_orig.shape[0]    #train x 的数量
m_test = test_x_orig.shape[0]      #test x 的数量
num_px = train_x_orig.shape[1]     #x的大小  多少*多少

# print("Number of training examples: m_train = "+ str(m_train))        209
# print("Number of testing examples: m_test = "+str(m_test))            50
# print("Height/Width of each image: num_px = "+str(num_px))            64
# print("Each image is of size: ("+str(num_px)+", "+str(num_px)+", 3)")            (64,64,3)
# print("train_x shape: "+str(train_x_orig.shape) + "\n train_y shape: "+str(train_y.shape) )       (209,64,64,3)     (1,209)
# print("test_x shape: "+str(test_x_orig.shape) + "\n test_y shape: "+str(test_y.shape) )           (50,64,64,3)      (1,50)

# 2、reshape images of shape (m_train, num_px, num_px, 3) in a numpy-array of shape (num_px∗ num_px ∗ 3, m_train)
# X_flatten = X.reshape(X.shape[0], -1).T      # X.T is the transpose of x
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0],-1).T

# print ("train_set_x_flatten shape: " + str(train_x_flatten.shape))       (12288, 209)
# print ("test_set_x_flatten shape: " + str(test_x_flatten.shape))         (12288, 50)
# print ("sanity check after reshaping: " + str(train_x_flatten[0:5,0]))    #检查完整性     0:5前5行   0第一列

#3、standardize  标准化
# 图像数据 标准化 意味着 每一行除以255
train_x = train_x_flatten/255      #12288, 209
test_x = test_x_flatten/255        #12288, 50


#Logistic Regression is actually a very simple Neural Network!

# 二、建立神经网络算法
# The main steps for building a Neural Network are:
# 1. Define the model structure   (such as number of input features)
# 2. Initialize the model’s parameters
# 3. Loop:
# - Calculate current loss         (forward propagation)
# - Calculate current gradient     (backward propagation)
# - Update parameters              (gradient descent)

# 1、sigmoid()
# 2、Initializing parameters
def initialize_parameters(dim):
    w = np.zeros((dim,1))   #列向量
    b = 0                    #不用考虑是个多少维的矩阵  常量0 即可

    assert(w.shape == (dim, 1))    #(dim, 1)  小括号
    assert(isinstance(b, float) or isinstance(b, int))

    return w, b

# 3、Forward and Backward propagation
def propagation(w, b, X, Y):
    """
        Implement the cost function and its gradient for the propagation explained above

        Arguments:
        w -- weights, a numpy array of size (num_px * num_px * 3, 1)
        b -- bias, a scalar
        X -- data of size (num_px * num_px * 3, number of examples)
        Y -- true "label" vector (containing 0 if non-cat, 1 if cat) of size (1, number of examples)

        Return:
        cost -- negative log-likelihood cost for logistic regression
        dw -- gradient of the loss with respect to w, thus same shape as w
        db -- gradient of the loss with respect to b, thus same shape as b

        Tips:
        - Write your code step by step for the propagation. np.log(), np.dot()
        """

    m = X.shape[1]

    # FORWARD PROPAGATION (FROM X TO COST)
    Z = np.dot(w.T, X)+b
    A = sigmoid(Z)
    cost = -(1.0/m)*np.sum(Y*np.log(A) + (1-Y)*np.log(1-A))

    # BACKWARD PROPAGATION (TO FIND GRAD)
    dw = (1.0/m)*np.dot(X, (A-Y).T)
    db = (1.0/m)*np.sum(A-Y)

    assert(dw.shape == w.shape )
    assert(db.dtype == float)
    cost = np.squeeze(cost)    #去掉维度为 1 的shape
    assert(cost.shape == ())   #???    cost是否是个 元组？  其类似数组，但其中元素不能修改

    grads = {"dw":dw,"db":db}

    return grads, cost

w, b, X, Y = np.array([[1.],[2.]]), 2., np.array([[1.,2.,-1.],[3.,4.,-3.2]]), np.array([[1,0,1]])
# grads, cost = propagation(w, b, X, Y)
# print("dw= "+ str(grads["dw"]))
# print("db= "+ str(grads["db"]))
# print("cost= " + str(cost))

#4、iteration Optimization

def optimization(w, b, X, Y, num_iterations, learning_rate, print_cost=False):
    """
       This function optimizes w and b by running a gradient descent algorithm

       Arguments:
       w -- weights, a numpy array of size    (num_px * num_px * 3, 1)
       b -- bias, a scalar
       X -- data of shape          (num_px * num_px * 3, number of examples)
       Y -- true "label" vector (containing 0 if non-cat, 1 if cat), of shape      (1, number of examples)
       print_cost -- True to print the loss every 100 steps

       Returns:
       params -- dictionary containing the weights w and bias b
       grads -- dictionary containing the gradients of the weights and bias with respect to the cost function
       costs -- list of all the costs computed during the optimization, this will be used to plot the learning curve.

       Tips:
       You basically need to write down two steps and iterate through them:
           1) Calculate the cost and the gradient for the current parameters. Use propagate().
           2) Update the parameters using gradient descent rule for w and b.
       """
    costs = []
    for i in range(num_iterations):
        grads, cost = propagation(w, b, X, Y)

        dw = grads["dw"]
        db = grads["db"]

        w = w - learning_rate*dw
        b = b - learning_rate*db

        if i%100 == 0:
            costs.append(cost)

        if i%100 == 0 and print_cost:
            print("Cost after iteration %i: %f" %(i, cost))

    params = {"w": w, "b": b}
    grads = {"dw": dw, "db": db}

    return params, grads, costs

params, grads, costs = optimization(w, b, X, Y, num_iterations=100, learning_rate=0.009, print_cost= True)
# print ("w = " + str(params["w"]))
# print ("b = " + str(params["b"]))
# print ("dw = " + str(grads["dw"]))
# print ("db = " + str(grads["db"]))

# 5、Predictions
def predict(w, b, X):
    '''
    Predict whether the label is 0 or 1 using learned logistic regression parameters (w, b)

    Arguments:
    w -- weights 训练好了的  (num_px * num_px * 3, 1)
    b -- bias
    X -- test     (num_px*num_px*3, number of examples)

    Returns:
    Y_prediction -- a numpy array (vector) containing all predictions (0/1) for the examples in X
    '''
    m = X.shape[1]
    Y_prediction = np.zeros((1,m))
    w =w.reshape(X.shape[0], 1)       #???   应该是保险起见？

    A = sigmoid(np.dot(w.T, X)+b)   #一块计算 直接出来A矩阵

    for i in range(A.shape[1]):    #对A中 进行判断     A (1, number)
        if A[0,i] >0.5:
            Y_prediction[0, i] = 1
        else:
            Y_prediction[0, i] = 0

    assert(Y_prediction.shape == (1, m))

    return Y_prediction

w = np.array([[0.1124579],[0.23106775]])               # 2*1    同理，2指num_px*num_px*3
b = -0.3
X = np.array([[1.,-1.1,-3.2],[1.2,2.,0.1]])            # 2*3    这里2指 num_px*num_px*3
# print ("predictions = " + str(predict(w, b, X)))


#6、 Merge all functions into a model
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
    """
    Builds the logistic regression model by calling the function you've implemented previously

    Arguments:
    X_train --  (num_px * num_px * 3, m_train)
    Y_train --  (1, m_train)
    X_test --  (num_px * num_px * 3, m_test)
    Y_test --  (1, m_test)
    print_cost -- Set to true to print the cost every 100 iterations

    Returns:
    d -- dictionary containing information about the model.
    """

    # initialize_parameters、propagation、optimization、predict
    w, b = initialize_parameters(X_train.shape[0])
    params, grads, costs = optimization(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)

    Y_prediction_test = predict(params["w"], params["b"], X_test)
    Y_prediction_train = predict(params["w"], params["b"], X_train)   #用这个算 train accuracy

    print("train accuracy:{} %" .format( 100-  np.mean(abs(Y_prediction_train-train_y))  *100 ))
    print("test accuracy:{} %" .format(100- np.mean(abs(Y_prediction_test-test_y)) *100))

    d = {"w":params["w"],
         "b":params["b"],
         "costs":costs,
         "Y_predict":Y_prediction_test,
         "learning_rate":learning_rate}

    return d
d = model(train_x, train_y, test_x, test_y, num_iterations = 2000, learning_rate =0.005, print_cost = False)


#test accuracy:70.0 %  这个结果对于这个简单的分类器 已经很好了
# 过拟合 了，以后要加 正则项


# Example of a picture that was wrongly classified

index = 1
# plt.imshow( test_x[:,index].reshape(num_px, num_px, 3) )
# plt.show()
# print ("y = " + str(test_y[0,index]) + ", you predicted that it is a \"" + classes[int(np.squeeze(d["Y_predict"][0,index]))].decode("utf-8") +  "\" picture.")


# Plot learning curve (with costs)

costs= np.squeeze(d['costs'])
# plt.plot(costs)
# plt.ylabel('costs')
# plt.xlabel('iteration(per hundreds)')
# plt.title("learning rate= "+str(d["learning_rate"]))
# plt.show()

#  analysis about learning rate α

learning_rate = [0.01, 0.001, 0.0001]
# models = {}
# for i in learning_rate:
#     print("learning rate is :" + str(i))
#     models[str(i)] = model(train_x, train_y, test_x, test_y, num_iterations=1500, learning_rate=i, print_cost=False)
#     print('\n' + "-------------------------------------------------------" + '\n')
#
# for i in learning_rate:
#     plt.plot(np.squeeze(models[str(i)]["costs"]),  label=str(models[str(i)]["learning_rate"]))
#
# plt.xlabel('iterations')
# plt.ylabel('cost')
#
# legend = plt.legend(loc='upper center', shadow=True)   #???
# frame = legend.get_frame()
# frame.set_facecolor('0.90')
# plt.show()


#Test with your own image
my_image = "aaaaa.jpg"

# preprocess the image to fit your algorithm.
fname = "images/" + my_image
image = np.array(ndimage.imread(fname, flatten=False))
my_image = scipy.misc.imresize(image, size=(num_px, num_px)).reshape((1, num_px*num_px*3)).T
my_predicted_image = predict(d["w"],d["b"],my_image)

plt.imshow(image)
print("y = " + str(np.squeeze(my_predicted_image)) + ", your algorithm predicts a \"" + classes[int(np.squeeze(my_predicted_image)),].decode("utf-8") +  "\" picture.")

numpy_basic.py

# 1 - Building basic functions with numpy

import numpy as np

# sigmoid function
def sigmoid(x):
    s = 1.0/(1+1/np.exp(x))
    return s

# Sigmoid gradient
def sigmoid_derivative(x):
    s= sigmoid(x)
    ds = s*(1-s)
    return ds

# Reshaping arrays
def image2vector(image):
    # (length, height, 3)   to  (length*height*3, 1)
    v = image.reshape(image.shape[0]*image.shape[1]*image.shape[2],1)
    return v

#  Normalizing rows
def normalizeRows(x):
    x_norm = np.linalg.norm(x, axis=1, keepdims=True)    #linalg 线性代数 norm范式  axis=1 行  keepdims=True保存矩阵原来维度
    x = x/x_norm
    return x

# Broadcasting and the softmax function
def softmax(x):
    x_exp = np.exp(x)   # x  m*n矩阵
    x_sum = np.sum(x_exp, axis=1, keepdims=True)  #axis=1行相加  压缩列   x_sum  1*n矩阵
    s = x_exp/x_sum   #广播
    return s

if __name__=="__main__":

    x = np.array([1,2,3])
    # print("sigmoid(x)= "+ str(sigmoid(x)))
    # print("sigmoid_derivative(x) = " + str(sigmoid_derivative(x)))

    image = np.array([[[0.67826139, 0.29380381],
                       [0.90714982, 0.52835647],
                       [0.4215251, 0.45017551]],

                      [[0.92814219, 0.96677647],
                       [0.85304703, 0.52351845],
                       [0.19981397, 0.27417313]],

                      [[0.60659855, 0.00533165],
                       [0.10820313, 0.49978937],
                       [0.34144279, 0.94630077]]])  #This is a 3 by 3 by 2 array, not typically images(3*3*3)

    # print("image2vector(image)=" + str(image2vector(image)))

    x=np.array([[0,3,4],[1,6,4]])   #3*2
    # print("normalizeRows(x)= " + str(normalizeRows(x)))

    x=np.array([[9,2,5,0,0],[7,5,0,0,0]])
    print("softmax(x) = " + str(softmax(x)))

vectorization.py

import time
import numpy as np

x1 = [9, 2, 5, 0, 0, 7, 5, 0, 0, 0, 9, 2, 5, 0, 0]
x2 = [9, 2, 2, 9, 0, 9, 2, 5, 0, 0, 9, 2, 5, 0, 0]

# DOT PRODUCT   内积
tic = time.process_time()
dot = 0
for i in range(len(x1)):
    dot += x1[i] * x2[i]
toc = time.process_time()
print("dot=" + str(dot) +"\n ----------Computation Time=" + str(1000*(toc-tic)) + "ms")

# OUTER PRODUCT  外积
tic = time.process_time()
outer = np.zeros((len(x1),len(x2)))   #把x1转置了
for i in range(len(x1)):
    for j in range((len(x2))):
        outer[i,j] = x1[i]*x2[j]
toc = time.process_time()
print("outer=" + str(outer) +"\n ----------Computation Time=" + str(1000*(toc-tic)) + "ms")

#ELEMENTWISE   对位相乘
tic=time.process_time()
mul = np.zeros(len(x1))
for i in range(len(x1)):
    mul[i] = x1[i] * x2[i]
toc=time.process_time()
print("elementwise mul=" + str(mul) +"\n ----------Computation Time=" + str(1000*(toc-tic)) + "ms")

#GENERAL DOT PRODUCT    wT*x
tic=time.process_time()
W=np.random.rand((3,len(x1)))
gdot = np.zeros(W.shape[0])   #3
for i in range(W.shape[0]):
    for j in range(len(x1)):
        gdot[i] += W[i,j]*x1[j]
toc=time.process_time()
print("gdot =" + str(gdot ) +"\n ----------Computation Time=" + str(1000*(toc-tic)) + "ms")


#实际中直接用
np.dot(x1,x2)
np.outer(x1,x2)
np.multiply(x1,x2)
np.dot(W,x1)

L1_L2_Lost Function.py

import numpy as np

def L1(yhat, y):
    """
    Arguments:
    yhat -- vector of size m (predicted labels)
    y -- vector of size m (true labels)

    Returns:
    loss -- the value of the L1 loss function defined above
    """
    loss = np.sum(np.abs(y-yhat))
    return loss

def L2(yhat, y):
    """
    Arguments:
    yhat -- vector of size m (predicted labels)
    y -- vector of size m (true labels)

    Returns:
    loss -- the value of the L2 loss function defined above
    """
    loss = np.dot(y-yhat,y-yhat)
    #loss = np.sum(np.power((y-yhat),2))
    return loss



yhat = np.array([.9, 0.2, 0.1, .4, .9])
y = np.array([1, 0, 0, 1, 1])
print("L1 = " + str(L1(yhat,y)))

yhat = np.array([.9, 0.2, 0.1, .4, .9])
y = np.array([1, 0, 0, 1, 1])
print("L2 = " + str(L2(yhat,y)))

ChanJianWang

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
吴恩达的深度学习编程作业（1-2）- Python Basics with Numpy & Logistic Regression with a Neural Network mindset

第一个神经网络算法 Logistic Regression参考：https://blog.csdn.net/koala_tree/article/details/78057033全代码自用Logistic Regression.py理清楚这几个东西：x_orig（m, num_px, num_px, 3） m是x的个数x (num_px * num_px ...
复制链接

扫一扫