2020吴恩达 machine learning 编程作业 python实现 ex2

最新推荐文章于 2022-08-20 10:15:12 发布

cheetah023

最新推荐文章于 2022-08-20 10:15:12 发布

阅读量1.3k

点赞数

分类专栏：吴恩达机器学习文章标签：机器学习 python numpy 吴恩达

本文链接：https://blog.csdn.net/cheetah023/article/details/107067426

版权

吴恩达机器学习专栏收录该内容

14 篇文章 2 订阅

订阅专栏

本文详细介绍了如何使用逻辑回归进行二分类预测，并通过正则化防止过拟合。利用numpy和scipy库实现成本函数和梯度计算，通过matplotlib进行数据可视化。通过调整超参数，优化模型并评估训练准确性。

摘要由CSDN通过智能技术生成

# -*- coding: utf-8 -*-
"""
Created on Tue Jun 30 16:41:10 2020

@author: cheetah023
"""


import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as opt

#函数定义
def sigmoid(X):
    return 1 /(1 + np.exp(-X))
def costFunction(theta, X, y, lamda):
    theta = np.reshape(theta,(X.shape[1],1))
    sig = sigmoid(np.dot(X,theta))
    m = X.shape[0]
    #不能用theta[0]=0来算，会出错
    #theta[0] = 0
    cost = (np.dot(-y.T,np.log(sig)) - np.dot(1-y.T,np.log(1-sig))) / m
    cost = cost + np.dot(theta.T[0,1:],theta[1:,0]) * lamda / (2 * m)
    return cost
def gradient(theta, X, y, lamda):
    theta = np.reshape(theta,(X.shape[1],1))
    m = X.shape[0]
    sig = sigmoid(np.dot(X,theta))
    #这里不reshape的话，执行opt.minimize
    #会报错ValueError: tnc: invalid gradient vector from minimized function.
    #sig = np.reshape(sig,(m,1))
    theta[0] = 0
    grad = np.zeros([X.shape[1],1])
    grad = np.dot(X.T,(sig - y)) / m 
    grad = grad + theta * lamda / m
    return grad
def plotDecisionBoundary(theta, X, y):
    x1_min = np.min(X[:,1])
    x1_max = np.max(X[:,1])
    x1 = np.arange(x1_min, x1_max,0.5)
    x2 = -(theta[0] + theta[1] * x1) / theta[2]
    plt.plot(x1,x2,'-')
    plt.legend(['decision boundary','Admitted','not Admitted'],loc='upper right')
def plotdata(X, y):
    postive = np.where(y > 0.5)
    negtive = np.where(y < 0.5)
    #postive和negtive包含两个元组，取第一个元组postive[0]和negtive[0]
    plt.scatter(X[postive[0],0],X[postive[0],1],marker='o',c='g')
    plt.scatter(X[negtive[0],0],X[negtive[0],1],marker='x',c='r')
    
#Part 1: Plotting
data = np.loadtxt('ex2data1.txt',delimiter=',')
X = data[:,0:2]
y = data[:,2:3]
print('X:',X.shape)
print('y:',y.shape)

plotdata(X, y)
plt.xlabel('exam 1 score')
plt.ylabel('exam 2 score')
plt.legend(['Admitted','not Admitted'],loc='upper right')

#Part 2: Compute Cost and Gradient
[m,n] = X.shape
ones = np.ones((m,1))
X = np.column_stack((ones,X))
initial_theta = np.zeros((n+1,1))
lamda = 0
cost = costFunction(initial_theta, X, y,lamda)
grad = gradient(initial_theta, X, y,lamda)
print("Cost at initial theta (zeros):",cost)
print('Expected cost (approx): 0.693')
print('Gradient at initial theta (zeros):\n',grad)
print('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628')


test_theta =[[-24],[0.2],[0.2]]
cost = costFunction(test_theta, X, y, lamda)
grad = gradient(test_theta, X, y, lamda)
print('Cost at test theta:',cost)
print('Expected cost (approx): 0.218')
print('Gradient at test theta:\n',grad)
print('Expected gradients (approx):\n 0.043\n 2.566\n 2.647')
#Part 3: Optimizing using fminunc

result = opt.minimize(fun=costFunction,
                      x0=initial_theta,
                      args=(X,y,lamda), 
                      method='TNC', 
                      jac=gradient)
print('Cost at theta found by fminunc:',result.fun)
print('Expected cost (approx): 0.203')
print('theta:',result.x)
print('Expected theta (approx):')
print('-25.161\n 0.206\n 0.201')
#Plot Boundary
theta = result.x
plotDecisionBoundary(theta, X, y)
plt.legend(['decision boundary','Admitted','not Admitted'],loc='upper right')
#Part 4: Predict and Accuracies
prob = sigmoid(np.dot([1,45,85],theta))
print('For a student with scores 45 and 85, we predict an admission')
print('probability of',prob)

h = sigmoid(np.dot(X,theta))
index = np.where(h >= 0.5)
p = np.zeros([m,1])
p[index] = 1
prob = np.mean(np.double(p==y)) * 100
print('Train Accuracy:',prob)
print('Expected accuracy (approx): 89.0')

运行结果：

X: (100, 2)
y: (100, 1)
Cost at initial theta (zeros): [[0.69314718]]
Expected cost (approx): 0.693
Gradient at initial theta (zeros):
[[ -0.1 ]
[-12.00921659]
[-11.26284221]]
Expected gradients (approx):
-0.1000
-12.0092
-11.2628
Cost at test theta: [[0.21833019]]
Expected cost (approx): 0.218
Gradient at test theta:
[[0.04290299]
[2.56623412]
[2.64679737]]
Expected gradients (approx):
0.043
2.566
2.647
Cost at theta found by fminunc: [[0.2034977]]
Expected cost (approx): 0.203
theta: [-25.16131857 0.20623159 0.20147149]
Expected theta (approx):
-25.161
0.206
0.201
For a student with scores 45 and 85, we predict an admission
probability of 0.7762906213164001
Train Accuracy: 89.0
Expected accuracy (approx): 89.0