# 斯坦福ML课程——python转写(Week3—课程作业ex2_1)

22 篇文章 1 订阅

In this part of the exercise, you will build a logistic regression model to predict whether a student gets admitted into a university. Suppose that you are the administrator of a university department and you want to determine each applicant's chance of admission based on their results on two exams. You have historical data from previous applicants that you can use as a training set for logistic regression. For each training example, you have the applicant's scores on two exams and the admissions
decision. Your task is to build a classication model that estimates an applicant's probability of admission based the scores from those two exams. This outline and the framework code in ex2.m will guide you through the exercise.

1. Part 1: Plotting #显示课程给出的数据
2. Part 2: Compute Cost and Gradient #计算cost以及gradient
3. Part 3: Optimizing using minimize #进行优化，课程中给出的是利用fminunc，python中我用的是minimize
4. Part 4: Predict and Accuracies #预测和准确度

"""
Created on Mon Nov 11 07:50:44 2019

@author: Lonely_hanhan
"""

import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as op
from mpl_toolkits.mplot3d import Axes3D

"""

"""
Data = np.loadtxt('D:\exercise\machine-learning-ex2\ex2\ex2data1.txt', delimiter=',')
X = Data[:,0:2]
[m,n] = X.shape
Y = Data[:,2] #此处读取的时一维数组，需转化为m维数组
Y = Y.reshape((m,1))

''' ==================== Part 1: Plotting ==================== '''

pos = np.where(Y == 1)
neg = np.where(Y == 0)

#由于Y为二维数组，所以where得到的值都为tuple类型，需提取行坐标

p = pos[0].shape[0]
pos = pos[0].reshape(p, 1) #Y值为1的下标位置
ne = neg[0].shape[0]
neg = neg[0].reshape(ne, 1) #Y值为0的下标位置

plt.plot(X[pos, 0], X[pos, 1], color='black', linewidth='2', marker='+', markersize='7', markerfacecolor = 'black', linestyle='None')
plt.plot(X[neg, 0], X[neg, 1], color='black', marker='o', markersize='7',markerfacecolor = 'yellow', linestyle='None')

#设置标题，横纵坐标 图例

plt.title('Scatter plot of training data')
plt.xlabel('Exam 1 score')
plt.ylabel('Exam 2 score')
plt.legend(('Admitted', 'Not admitted'), loc='upper right')
plt.show()
"""

"""
x_0 = np.ones((m,1))
X = np.hstack((x_0, X))
initial_theta = np.zeros((1, n + 1))

''' ==================== Part 2: Compute Cost and Gradient ==================== '''
#创建h(x)函数
def h_func(z):
return 1/(1+np.exp(-z))
#创建损失函数
def costFunction(theta, X, Y):
jVal = (-np.dot(np.log(h_func(np.dot(theta, X.T))), Y) - np.dot(np.log(1 - h_func(np.dot(theta, X.T))), (1-Y))) / m
return jVal
#创建梯度函数
def gradient(theta, X, Y):
grad = np.dot((h_func(np.dot(theta, X.T))-Y.T), X) / m
return grad

cost1 = costFunction(initial_theta, X, Y)
print(cost1)
grad1 = gradient(initial_theta, X, Y)
print(grad1)

test_theta =np.array([[-24, 0.2, 0.2]])
cost2 = costFunction(test_theta, X, Y)
print(cost2)
grad2 = gradient(test_theta, X, Y)
print(grad2)

''' ==================== Part 3: Optimizing using minimize ==================== '''

result = op.minimize(fun=costFunction, x0=initial_theta, args=(X, Y), method='TNC', jac=gradient, options= {'maxiter': 400})
print(result)

# plot boundary
# 映射为多项式 ,该函数需结合matlab中help mapFeature进行看，引用了https://github.com/hujinsen/python-machine-learning
# 中5.映射为多项式
def mapFeature(X1,X2):
degree = 3;                     # 映射的最高次方
out = np.ones((X1.shape[0],1))  # 映射后的结果数组（取代X）
'''
这里以degree=2为例，映射为1,x1,x2,x1^2,x1,x2,x2^2
'''
for i in np.arange(1,degree+1):
for j in range(i+1):
temp = X1**(i-j)*(X2**j)    #矩阵直接乘相当于matlab中的点乘.*
out = np.hstack((out, temp.reshape(-1,1)))
return out

#plot data

plt.plot(X[pos, 1], X[pos, 2], color='black', linewidth='2', marker='+', markersize='7', markerfacecolor = 'black', linestyle='None')
plt.plot(X[neg, 1], X[neg, 2], color='black', marker='o', markersize='7',markerfacecolor = 'yellow', linestyle='None')

if X.shape[1] <= 3:
plot_x = np.array([np.max(X[:,1])-2, np.min(X[:,1])+2]) #Only need 2 points to define a line, so choose two endpoint
plot_y = (-1/result.x[2])*(result.x[1]*plot_x + result.x[0])# Calculate the decision boundary line，即计算exam2的值
plt.plot(plot_x, plot_y)
plt.legend(('Admitted', 'Not admitted', 'Decision Boundary'), loc='upper right')
plt.axis([30, 100, 30, 100])
else:
'''
这一部分需要进一步研究
'''
# Here is the grid range
u = np.linspace(-1, 1.5, 50)
v = np.linspace(-1, 1.5, 50)
z = np.zeros([len(u), len(u)])
for i in range(1,len(u)):
for j in range(1,len(v)):
z[i,j] = mapFeature(u(i), v(j))*result.x;
z = z.T; # important to transpose z before calling contour
fig = plt.figure()
ax = Axes3D(fig)
ax.plot_surface(u, v, z, rstride=1, cstride=1, cmap='rainbow')

plt.title('Scatter plot of training data')
plt.xlabel('Exam 1 score')
plt.ylabel('Exam 2 score')

''' ==================== Part 4: Predict and Accuracie==================== '''
#  After learning the parameters, you'll like to use it to predict the outcomes
#  on unseen data. In this part, you will use the logistic regression model
#  to predict the probability that a student with score 45 on exam 1 and
#  score 85 on exam 2 will be admitted.
#
#  Furthermore, you will compute the training and test set accuracies of
#  our model.
# Predict probability for a student with score 45 on exam 1  and score 85 on exam 2
S = np.array([[1, 45, 85]])
p = h_func(np.dot(result.x, S.T))#y为1的概率值
print(p)

y_p = np.zeros(shape=(m,2))

for i in range(0, m):
XT = X[i,:].reshape((-1,1))
y_p[i,1] = h_func(np.dot(result.x, XT))
if h_func(np.dot(result.x, XT)) > 0.5:
y_p[i,0] = 1
else:
y_p[i,0] = 0
print(y_p)


• 0
点赞
• 0
收藏
• 打赏
• 0
评论
09-27 168
06-05 1839
10-18 5976
09-01 1051
12-07 1826
09-27 1434
11-18 3358

### “相关推荐”对你有帮助么？

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助

¥2 ¥4 ¥6 ¥10 ¥20

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、C币套餐、付费专栏及课程。