import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.optimize as opt
# 实现sigmoid函数
def sigmoid(z):
return 1/(1+np.exp(-z))
#实现代价函数
def cost(theta,X,y):
theta=np.matrix(theta)
X=np.matrix(X)
y=np.matrix(y)
first=np.multiply(-y,np.log(sigmoid(X*theta.T)))
second=np.multiply(1-y,np.log(1-sigmoid(X*theta.T)))
return np.sum(first-second)/len(X)
#梯度更新
def gradient(theta, X, y):
theta=np.matrix(theta)
X=np.matrix(X)
y=np.matrix(y)
param=int(theta.ravel().shape[1])
grad=np.zeros(param)
J=sigmoid(X*theta.T)-y
for i in range(param):
term=np.multiply(J,X[:,i])
grad[i]=np.sum(term)/len(X)
return grad
path = 'F:/work/data_sets/ex2data1.txt'
data = pd.read_csv(path, header=None, names=['Exam 1', 'Exam 2', 'Admitted'])
print(data.head())
'''
pandas库中的isin()函数用于数据筛选,isin()接受一个列表,判断该列中元素是否在列表中,多用于要选择某列等于多个数值或者字符串时。
data[data[‘admitted’].isin([‘1’])]选取admitted列值为1的所有行,等价于data[data[‘admitted’]==1]。
'''
#选出值为1的所有行
positive = data[data['Admitted'].isin([1])]
#选出值为0的所有行
negative = data[data['Admitted'].isin([0])]
fig, ax = plt.subplots(figsize=(12,8))
ax.scatter(positive['Exam 1'], positive['Exam 2'], s=50, c='b', marker='o', label='Admitted')
ax.scatter(negative['Exam 1'], negative['Exam 2'], s=50, c='r', marker='x', label='Not Admitted')
ax.legend()
ax.set_xlabel('Exam 1 Score')
ax.set_ylabel('Exam 2 Score')
plt.show()
data.insert(0,'ones',1)
col=data.shape[1]
X=data.iloc[:,:-1]
y=data.iloc[:,col-1:col]
theta=np.zeros(3)
X=np.matrix(X.values)
y=np.matrix(y.values)
p=cost(theta,X,y)
print(p)
#求解theta
result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(X, y))
k=cost(result[0],X,y)
print(k)
print(result)
plotting_x1 = np.linspace(30, 100, 100)
plotting_h1 = ( - result[0][0] - result[0][1] * plotting_x1) / result[0][2]
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(plotting_x1, plotting_h1, 'y', label='Prediction')
ax.scatter(positive['Exam 1'], positive['Exam 2'], s=50, c='b', marker='o', label='Admitted')
ax.scatter(negative['Exam 1'], negative['Exam 2'], s=50, c='r', marker='x', label='Not Admitted')
ax.legend()
ax.set_xlabel('Exam 1 Score')
ax.set_ylabel('Exam 2 Score')
plt.show()
#评价逻辑回归模型
#计算被录取的可能性,预测函数
# def model(theta, X):
# return sigmoid(np.dot(theta.T, X))
# print(model(result[0],[1,45,85]))
'''
另一种评价θ的方法是看模型在训练集上的正确率怎样。
写一个predict的函数,给出数据以及参数后,会返回“1”或者“0”。
然后再把这个predict函数用于训练集上,看准确率怎样
'''
def predict(theta,X):
probility=sigmoid(X*theta.T)
return [1 if x>= 0.5 else 0 for x in probility]
theta_min=np.matrix(result[0])
predicts=predict(theta_min,X)
correct=[1 if ((a==1 and b==1) or (a==0 and b==0))else 0 for(a,b) in zip(predicts,y)]
accuracy=(sum(map(int,correct)))%len(correct)
print('accuracy = {0}%'.format(accuracy))