2020吴恩达 machine learning 编程作业 python实现 ex4

最新推荐文章于 2022-12-03 17:44:56 发布

cheetah023

最新推荐文章于 2022-12-03 17:44:56 发布

阅读量340

点赞数

分类专栏：吴恩达机器学习文章标签：机器学习 python numpy 斯坦福吴恩达机器学习

本文链接：https://blog.csdn.net/cheetah023/article/details/107081584

版权

吴恩达机器学习专栏收录该内容

14 篇文章 2 订阅

订阅专栏

# -*- coding: utf-8 -*-
"""
Created on Wed Jul  1 22:37:49 2020

@author: cheetah023
"""
import numpy as np
import scipy.io as sci
import scipy.optimize as opt

#函数定义
def sigmoid(X):
    return 1 / (1 + np.exp(-X))
def sigmoidGradient(X):
    g = sigmoid(X) * (1 - sigmoid(X))
    return g
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lamda):
    #print('nn_params',nn_params.shape)
    theta1 = np.reshape(nn_params[0:hidden_layer_size*(input_layer_size+1)],
                        [hidden_layer_size,input_layer_size+1])
    theta2 = np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):],
                        [num_labels, hidden_layer_size+1])
    m = X.shape[0]
    ones = np.ones([m,1])
    X = np.column_stack([ones,X])
    a2 = sigmoid(np.dot(X,theta1.T))#5000*25
    a2 = np.column_stack([ones,a2])#5000*26
    a3 = sigmoid(np.dot(a2,theta2.T))#5000*10
    y_t = np.zeros([m,num_labels])
    for i in range(0,m):
        y_t[i,y[i]-1] = 1
    cost = np.sum(-y_t * np.log(a3) - (1-y_t) * np.log(1-a3)) / m
    
    theta1_t = theta1[:,1:]
    #print('theta1_t:',theta1_t.shape)
    theta2_t = theta2[:,1:]
    reg = (np.sum(theta1_t ** 2) + np.sum(theta2_t ** 2)) * lamda / (2 * m)
    #print('reg:',reg)
    cost = cost + reg
    return cost
def nnGradient(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lamda):
    m = X.shape[0]
    ones = np.ones([m,1])
    X = np.column_stack([ones,X])
    
    theta1 = np.reshape(nn_params[0:hidden_layer_size*(input_layer_size+1)],
                        [hidden_layer_size,input_layer_size+1])
    theta2 = np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):],
                        [num_labels, hidden_layer_size+1])
    theta1_grad = np.zeros(theta1.shape);#25*401
    theta2_grad = np.zeros(theta2.shape);#10*26
    theta1_t = theta1[:,1:]#25*400
    theta2_t = theta2[:,1:]#10*25
    y_t = np.zeros([m,num_labels])
    for i in range(0,m):
        y_t[i,y[i]-1] = 1
    for i in range(0,m):
        z2 = np.dot(X[i,:],theta1.T)#1*25
        a2 = sigmoid(z2)#1*25
        a2 = np.hstack((1,a2))#1*26
        a3 = sigmoid(np.dot(a2,theta2.T))#1*10
        delta3 = a3 - y_t[i,:]#1*10
        delta2 = np.dot(delta3,theta2_t) * sigmoidGradient(z2)#1*25
        
        delta2 = np.reshape(delta2,[1,hidden_layer_size])
        X_t = np.reshape(X[i,:],[1,input_layer_size+1])
        delta3 = np.reshape(delta3,[1,num_labels])
        a2 = np.reshape(a2,[1,hidden_layer_size+1])
        
        theta1_grad = theta1_grad + np.dot(delta2.T,X_t)#25*401
        theta2_grad = theta2_grad + np.dot(delta3.T,a2)#10*26
    theta1_grad[:,0] = theta1_grad[:,0] / m
    theta1_grad[:,1:] = theta1_grad[:,1:] / m + (lamda/m) * theta1_t
    theta2_grad[:,0] = theta2_grad[:,0] / m
    theta2_grad[:,1:] = theta2_grad[:,1:] / m + (lamda/m) * theta2_t
    #grad = np.vstack((theta1_grad.reshape(-1,1),theta2_grad.reshape(-1,1)))
    grad = np.concatenate((theta1_grad.flatten(), theta2_grad.flatten()))
    return grad
def randInitializeWeights(L_in, L_out):
    W = np.zeros([L_out, 1 + L_in]);
    epsilon_init = 0.12
    W = np.random.rand(L_out,1+L_in) * 2 * epsilon_init - epsilon_init
    return W
def predict(Theta1, Theta2, X):
    m = X.shape[0]
    ones = np.ones([m,1])
    X = np.column_stack([ones,X])
    a2 = sigmoid(np.dot(X,theta1.T))#5000*25
    a2 = np.column_stack([ones,a2])#5000*26
    a3 = sigmoid(np.dot(a2,theta2.T))#5000*10
    p = np.argmax(a3,axis = 1) + 1
    p = np.reshape(p,[m,1])
    return p
# Setup the parameters you will use for this exercise
input_layer_size  = 400;  # 20x20 Input Images of Digits
hidden_layer_size = 25;   # 25 hidden units
num_labels = 10;          # 10 labels, from 1 to 10   

#Part 1: Loading and Visualizing Data
data1 = sci.loadmat('ex4data1.mat')
#print(data.keys())
X = data1['X']
y = data1['y']
print('X:',X.shape)
print('y:',y.shape)

#Part 2: Loading Parameters
data2 = sci.loadmat('ex4weights.mat')
#print(data2.keys())
theta1 = data2['Theta1']
theta2 = data2['Theta2']
print('theta1:',theta1.shape)
print('theta2:',theta2.shape)

#Part 3: Compute Cost (Feedforward)
lamda = 0
nn_params = np.vstack((theta1.reshape([-1,1]),theta2.reshape([-1,1])))
cost = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lamda)
print('Cost at parameters (loaded from ex4weights):',cost)
print('(this value should be about 0.287629)');

#Part 4: Implement Regularization
lamda = 1
cost = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lamda)
print('Cost at parameters (loaded from ex4weights):',cost)
print('(this value should be about 0.383770');

#Part 5: Sigmoid Gradient
g = sigmoidGradient(np.array([-1, -0.5, 0, 0.5, 1]))
print('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:  ');
print(g);

#Part 6: Initializing Pameters 
initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)
initial_nn_params = np.vstack((initial_Theta1.reshape([-1,1]),
                               initial_Theta2.reshape([-1,1])))
#Part 7: Implement Backpropagation(时间有点紧，就没实现检查梯度的函数)
#Part 8a: Implement Regularization
lamda = 3
cost = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lamda)
print('Cost at (fixed) debugging parameters (lambda = 3):',cost)
print('(for lambda = 3, this value should be about 0.576051)')

#Part 8b: Training NN
lamda = 1

#TNC跑出来0.84-0.88，CG跑出来0.96左右
result = opt.minimize(fun=nnCostFunction,
                      x0=initial_nn_params,
                      args=(input_layer_size,hidden_layer_size,num_labels,X,y,lamda),
                      #method='TNC',#truncated Newton algorithm
                      method='CG',#conjugate gradient algorithm
                      jac=nnGradient,
                      options={'maxiter': 50})
'''
#和opt.minimize的参数method='CG'时等价
nnParam = opt.fmin_cg(f=nnCostFunction, x0=initial_nn_params, fprime=nnGradient,
                     args=(input_layer_size, hidden_layer_size,num_labels, X, y, lamda),
                     maxiter=50, disp=True)
'''
nnParam = result.x
theta1 = np.reshape(nnParam[0:hidden_layer_size*(input_layer_size+1)],
                        [hidden_layer_size,input_layer_size+1])
theta2 = np.reshape(nnParam[hidden_layer_size*(input_layer_size+1):],
                        [num_labels, hidden_layer_size+1])
#Part 9: Visualize Weights(没画图)
#Part 10: Implement Predict
p = predict(theta1, theta2, X)
temp = (p == y)
prob = np.mean(temp)
print('Training Set Accuracy:',prob)

运行结果：

X: (5000, 400)
y: (5000, 1)
theta1: (25, 401)
theta2: (10, 26)
Cost at parameters (loaded from ex4weights): 0.2876291651613189
(this value should be about 0.287629)
Cost at parameters (loaded from ex4weights): 0.38376985909092365
(this value should be about 0.383770
Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:
[0.19661193 0.23500371 0.25 0.23500371 0.19661193]
Cost at (fixed) debugging parameters (lambda = 3): 0.5760512469501331
(for lambda = 3, this value should be about 0.576051)
Training Set Accuracy: 0.9634

总结：

1、画图的功底还是差了些，时间少就没法画了

2、矩阵运算的时候，大多数问题都是维度的问题，先查这个会比较省时间

3、刚开始使用opt.minimize的时候忘了设置最大迭代次数，导致好久都没跑完，等了个寂寞

cheetah023

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
2020吴恩达 machine learning 编程作业 python实现 ex4

# -*- coding: utf-8 -*-"""Created on Wed Jul 1 22:37:49 2020@author: cheetah023"""import numpy as npimport scipy.io as sciimport scipy.optimize as opt#函数定义def sigmoid(X): return 1 / (1 + np.exp(-X))def sigmoidGradient(X): g = sigmoid(.
复制链接

扫一扫