代码:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# 文件路径
path='ex2data1.txt'
# 读取文件
data=pd.read_csv(path,names=['Exam 1','Exam 2','Accepted'])
# 把接收的和被接收的分开来画图
fig, ax=plt.subplots()
ax.scatter(data[data['Accepted']==0]['Exam 1'],data[data['Accepted']==0]['Exam 2'],c='r',marker='x',label='y=0')
ax.scatter(data[data['Accepted']==1]['Exam 1'],data[data['Accepted']==1]['Exam 2'],c='b',marker='o',label='y=1')
# 要加label就必须写这个
ax.legend()
# 设置横纵坐标
ax.set(xlabel='exam1',ylabel='exam2')
# 画图
plt.show()
# 打印data前面几行
print(data.head())
# 得到特征和真实值的矩阵
def get_Xy(data):
# 插入一列1
data.insert(0,'ones',1)
# X的矩阵为m*n
X_=data.iloc[:,0:-1]
# pandas不能直接用于计算要转化为数值型
X=X_.values
y_=data.iloc[:,-1]
# y的矩阵为m*1,如果只取出一行的话就是向量,所以要转换为矩阵
y=y_.values.reshape(len(y_),1)
return X,y
# 获得特征和实际值
X,y=get_Xy(data)
# sigmoid函数
def sigmoid(z):
return 1/(1+np.exp(-z))
# 损失函数
def costfunction(X,y,theta):
# 算出预测值
A=sigmoid(X@theta)
first=y*np.log(A)
second=(1-y)*np.log(A)
return -np.sum(first+second)/len(X)
# 初始化theta
theta=np.zeros((3,1))
# 梯度下降
def grandientDescent(X,y,theta,iters,alpha):
m=len(X)
# 每次梯度下降得到的损失函数
costs=[]
for i in range(iters):
A=sigmoid(X@theta)
theta=theta-(alpha/m)*X.T@(A-y)
cost=costfunction(X,y,theta)
costs.append(cost)
return costs,theta
# 得到每次迭代后的损失值以及最后的theta
costs,theta_finall=grandientDescent(X,y,theta,200000,0.004)
# 预测
def predict(X,theta):
# 得到预测值
prob=sigmoid(X@theta_finall)
return [1 if x>=0.5 else 0 for x in prob]
# 转化为矩阵
y_=np.array(predict(X,theta))
y_pre=y_.reshape(len(y_),1)
# 预测的准确率
acc=np.mean(y_pre==y)
print(acc)
结果展示: