2020吴恩达 machine learning 编程作业 python实现 ex4

# -*- coding: utf-8 -*-
"""
Created on Wed Jul  1 22:37:49 2020

@author: cheetah023
"""
import numpy as np
import scipy.io as sci
import scipy.optimize as opt

#函数定义
def sigmoid(X):
    return 1 / (1 + np.exp(-X))
def sigmoidGradient(X):
    g = sigmoid(X) * (1 - sigmoid(X))
    return g
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lamda):
    #print('nn_params',nn_params.shape)
    theta1 = np.reshape(nn_params[0:hidden_layer_size*(input_layer_size+1)],
                        [hidden_layer_size,input_layer_size+1])
    theta2 = np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):],
                        [num_labels, hidden_layer_size+1])
    m = X.shape[0]
    ones = np.ones([m,1])
    X = np.column_stack([ones,X])
    a2 = sigmoid(np.dot(X,theta1.T))#5000*25
    a2 = np.column_stack([ones,a2])#5000*26
    a3 = sigmoid(np.dot(a2,theta2.T))#5000*10
    y_t = np.zeros([m,num_labels])
    for i in range(0,m):
        y_t[i,y[i]-1] = 1
    cost = np.sum(-y_t * np.log(a3) - (1-y_t) * np.log(1-a3)) / m
    
    theta1_t = theta1[:,1:]
    #print('theta1_t:',theta1_t.shape)
    theta2_t = theta2[:,1:]
    reg = (np.sum(theta1_t ** 2) + np.sum(theta2_t ** 2)) * lamda / (2 * m)
    #print('reg:',reg)
    cost = cost + reg
    return cost
def nnGradient(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lamda):
    m = X.shape[0]
    ones = np.ones([m,1])
    X = np.column_stack([ones,X])
    
    theta1 = np.reshape(nn_params[0:hidden_layer_size*(input_layer_size+1)],
                        [hidden_layer_size,input_layer_size+1])
    theta2 = np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):],
                        [num_labels, hidden_layer_size+1])
    theta1_grad = np.zeros(theta1.shape);#25*401
    theta2_grad = np.zeros(theta2.shape);#10*26
    theta1_t = theta1[:,1:]#25*400
    theta2_t = theta2[:,1:]#10*25
    y_t = np.zeros([m,num_labels])
    for i in range(0,m):
        y_t[i,y[i]-1] = 1
    for i in range(0,m):
        z2 = np.dot(X[i,:],theta1.T)#1*25
        a2 = sigmoid(z2)#1*25
        a2 = np.hstack((1,a2))#1*26
        a3 = sigmoid(np.dot(a2,theta2.T))#1*10
        delta3 = a3 - y_t[i,:]#1*10
        delta2 = np.dot(delta3,theta2_t) * sigmoidGradient(z2)#1*25
        
        delta2 = np.reshape(delta2,[1,hidden_layer_size])
        X_t = np.reshape(X[i,:],[1,input_layer_size+1])
        delta3 = np.reshape(delta3,[1,num_labels])
        a2 = np.reshape(a2,[1,hidden_layer_size+1])
        
        theta1_grad = theta1_grad + np.dot(delta2.T,X_t)#25*401
        theta2_grad = theta2_grad + np.dot(delta3.T,a2)#10*26
    theta1_grad[:,0] = theta1_grad[:,0] / m
    theta1_grad[:,1:] = theta1_grad[:,1:] / m + (lamda/m) * theta1_t
    theta2_grad[:,0] = theta2_grad[:,0] / m
    theta2_grad[:,1:] = theta2_grad[:,1:] / m + (lamda/m) * theta2_t
    #grad = np.vstack((theta1_grad.reshape(-1,1),theta2_grad.reshape(-1,1)))
    grad = np.concatenate((theta1_grad.flatten(), theta2_grad.flatten()))
    return grad
def randInitializeWeights(L_in, L_out):
    W = np.zeros([L_out, 1 + L_in]);
    epsilon_init = 0.12
    W = np.random.rand(L_out,1+L_in) * 2 * epsilon_init - epsilon_init
    return W
def predict(Theta1, Theta2, X):
    m = X.shape[0]
    ones = np.ones([m,1])
    X = np.column_stack([ones,X])
    a2 = sigmoid(np.dot(X,theta1.T))#5000*25
    a2 = np.column_stack([ones,a2])#5000*26
    a3 = sigmoid(np.dot(a2,theta2.T))#5000*10
    p = np.argmax(a3,axis = 1) + 1
    p = np.reshape(p,[m,1])
    return p
# Setup the parameters you will use for this exercise
input_layer_size  = 400;  # 20x20 Input Images of Digits
hidden_layer_size = 25;   # 25 hidden units
num_labels = 10;          # 10 labels, from 1 to 10   

#Part 1: Loading and Visualizing Data
data1 = sci.loadmat('ex4data1.mat')
#print(data.keys())
X = data1['X']
y = data1['y']
print('X:',X.shape)
print('y:',y.shape)

#Part 2: Loading Parameters
data2 = sci.loadmat('ex4weights.mat')
#print(data2.keys())
theta1 = data2['Theta1']
theta2 = data2['Theta2']
print('theta1:',theta1.shape)
print('theta2:',theta2.shape)

#Part 3: Compute Cost (Feedforward)
lamda = 0
nn_params = np.vstack((theta1.reshape([-1,1]),theta2.reshape([-1,1])))
cost = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lamda)
print('Cost at parameters (loaded from ex4weights):',cost)
print('(this value should be about 0.287629)');

#Part 4: Implement Regularization
lamda = 1
cost = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lamda)
print('Cost at parameters (loaded from ex4weights):',cost)
print('(this value should be about 0.383770');

#Part 5: Sigmoid Gradient
g = sigmoidGradient(np.array([-1, -0.5, 0, 0.5, 1]))
print('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:  ');
print(g);

#Part 6: Initializing Pameters 
initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)
initial_nn_params = np.vstack((initial_Theta1.reshape([-1,1]),
                               initial_Theta2.reshape([-1,1])))
#Part 7: Implement Backpropagation(时间有点紧,就没实现检查梯度的函数)
#Part 8a: Implement Regularization
lamda = 3
cost = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lamda)
print('Cost at (fixed) debugging parameters (lambda = 3):',cost)
print('(for lambda = 3, this value should be about 0.576051)')

#Part 8b: Training NN
lamda = 1

#TNC跑出来0.84-0.88,CG跑出来0.96左右
result = opt.minimize(fun=nnCostFunction,
                      x0=initial_nn_params,
                      args=(input_layer_size,hidden_layer_size,num_labels,X,y,lamda),
                      #method='TNC',#truncated Newton algorithm
                      method='CG',#conjugate gradient algorithm
                      jac=nnGradient,
                      options={'maxiter': 50})
'''
#和opt.minimize的参数method='CG'时等价
nnParam = opt.fmin_cg(f=nnCostFunction, x0=initial_nn_params, fprime=nnGradient,
                     args=(input_layer_size, hidden_layer_size,num_labels, X, y, lamda),
                     maxiter=50, disp=True)
'''
nnParam = result.x
theta1 = np.reshape(nnParam[0:hidden_layer_size*(input_layer_size+1)],
                        [hidden_layer_size,input_layer_size+1])
theta2 = np.reshape(nnParam[hidden_layer_size*(input_layer_size+1):],
                        [num_labels, hidden_layer_size+1])
#Part 9: Visualize Weights(没画图)
#Part 10: Implement Predict
p = predict(theta1, theta2, X)
temp = (p == y)
prob = np.mean(temp)
print('Training Set Accuracy:',prob)

运行结果:

X: (5000, 400)
y: (5000, 1)
theta1: (25, 401)
theta2: (10, 26)
Cost at parameters (loaded from ex4weights): 0.2876291651613189
(this value should be about 0.287629)
Cost at parameters (loaded from ex4weights): 0.38376985909092365
(this value should be about 0.383770
Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:  
[0.19661193 0.23500371 0.25       0.23500371 0.19661193]
Cost at (fixed) debugging parameters (lambda = 3): 0.5760512469501331
(for lambda = 3, this value should be about 0.576051)
Training Set Accuracy: 0.9634

总结:

1、画图的功底还是差了些,时间少就没法画了

2、矩阵运算的时候,大多数问题都是维度的问题,先查这个会比较省时间

3、刚开始使用opt.minimize的时候忘了设置最大迭代次数,导致好久都没跑完,等了个寂寞

 

 

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
的机器学习系统设计选择题主要包含以下几个方面: 1. 训练集和开发/测试集:选择合适的训练集和开发/测试集对于构建有效的机器学习系统非常重要。我们需要确保训练集和开发/测试集能够代表真实的数据分布,并且在划分数据集时要考虑到数据的随机性和一致性。 2. 性能指标选择:根据具体的问题和需求,选择合适的性能指标来评估机器学习系统的表现。如分类问题可以选择准确率、精确率、召回率等指标,回归问题可以选择均方误差或相关系数等指标。 3. 偏差和方差的平衡:在机器学习系统中,我们通常会面临偏差和方差之间的权衡。通过增加模型的复杂度可以降低偏差,但容易引起方差过高;通过减小模型的复杂度可以减小方差,但容易导致偏差过高。需要根据具体情况选择适当的模型复杂度。 4. 错误分析:在构建机器学习系统时,我们需要进行错误分析来深入了解模型在不同数据集上的表现。通过错误分析,我们可以找出模型存在的问题,并采取相应的措施进行修正和优化。 5. 学习曲线:学习曲线可以帮助我们了解模型的训练过程。通过绘制训练集和开发/测试集的误差随着训练集大小变化的曲线,我们可以判断模型是否出现高偏差或高方差的情况,从而决定是否需要增加更多的训练数据或者调整模型复杂度。 强调了以上几个方面的重要性,并提供了相应的选择题帮助我们更好地设计和调整机器学习系统,以获得更好的性能和效果。这些选择题的回答需要结合具体问题和数据情况进行分析和判断,从而做出最合理的决策。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值