一、逻辑回归
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
1.读取数据:
path='H:\Coursera-ML-AndrewNg-Notes\code\ex2-logistic regression\ex2data1.txt'
data=pd.read_csv(path,header=None,names=['Exam 1','Exam 2','Admitted'])
data.head()
部分数据截图:
2.绘制散点图
positive=data[data['Admitted'].isin(['1'])]
negative=data[data['Admitted'].isin(['0'])]
plt.scatter(positive['Exam 1'],positive['Exam 2'],color='g',marker='o',label='Admitted')
plt.scatter(negative['Exam 1'],negative['Exam 2'],color='r',marker='x',label='No Admitted')
plt.legend()
plt.xlabel('Exam 1')
plt.ylabel('Exam 2')
plt.show()
3.定义X和y,初始化 θ \theta θ
data.insert(0,'ones',1)
data.head()
X=data.iloc[:,0:3]
y=data.iloc[:,3:4]
theta=np.array([0,0,0])
X=X.values
y=y.values.flatten()
4.定义sigmoid函数
g ( z ) = 1 1 + e − z g(z)=\frac{1}{1+e^{-z}} g(z)=1+e−z1
def sigmoid(z):
return 1/(1+np.exp(-z))
nums=np.linspace(-10,10,100)
plt.plot(nums,sigmoid(nums))
plt.show()
5.定义代价函数
J ( θ ) = 1 m ∑ i = 1 m [ ( − y ( i ) ) l o g ( h θ ( x ( i ) ) ) − ( 1 − y ( i ) ) l o g ( 1 − h θ ( x ( i ) ) ] J(\theta)=\frac{1}{m}\sum_{i=1}^{m}[(-y^{(i)})log(h_{\theta}(x^{(i)}))-(1-y^{(i)})log(1-h_{\theta}(x^{(i)})] J(θ)=m1i=1∑m[(−y(i))log(hθ(x(i)))−(1−y(i))log(1−hθ(x(i))]
def Cost(theta,X,y):
inner=-(y*np.log(sigmoid(X@theta)))-(1-y)*np.log(1-sigmoid(X@theta))
return np.sum(inner)/len(X)
Cost(theta,X,y)
#结果为:0.6931471805599453
6.定义梯度
∂ J ( θ ) ∂ θ j = 1 m