逻辑回归python正则化 选择参数_吴恩达机器学习第二次作业(python实现):逻辑回归...

逻辑回归

数据在这

import matplotlib.pyplot as plt

import numpy as np

import scipy.optimize as opt

from sklearn.metrics import classification_report

import pandas as pd

from sklearn import linear_model

# 获取原始数据

def raw_data(path):

data=pd.read_csv(path,names=['exam1','exam2','admit'])

return data

# 绘制原始数据

def draw_data(data):

accept=data[data['admit'].isin([1])]

refuse=data[data['admit'].isin([0])]

plt.scatter(accept['exam1'],accept['exam2'],c='g',label='admit')

plt.scatter(refuse['exam1'],refuse['exam2'],c='r',label='not admit')

plt.title('admission')

plt.xlabel('score1')

plt.ylabel('score2')

return plt

# sigmoid函数

def sigmoid(z):

return 1/(1+np.exp(-z))

# 代价函数

def cost_function(theta,x,y):

m=x.shape[0]

j=(y.dot(np.log(sigmoid(x.dot(theta))))+(1-y).dot(np.log(1-sigmoid(x.dot(theta)))))/(-m)

return j

# 计算偏导数即可

def gradient_descent(theta,x,y):

return ((sigmoid(x.dot(theta))-y).T).dot(x)

# 假设函数

def predict(theta,x):

h=sigmoid(x.dot(theta))

return [1 if x>=0.5 else 0 for x in h]

# 决策边界theta*x=0

def boundary(theta,data):

x1=np.arange(20,100,0.01)

x2=(theta[0]+theta[1]*x1)/-theta[2]

plt=draw_data(data)

plt.title('boundary')

plt.plot(x1,x2)

plt.show()

def main():

data=raw_data('venv/lib/dataset/ex2data1.txt')

# print(data.head())

# plt=draw_data(data)

# plt.show()

x1=data['exam1']

x2=data['exam2']

x=np.c_[np.ones(x1.shape[0]),x1,x2]

y=data['admit']

# 不要将theta初始化为1,初始化为1的话,h(x)会过大,sigmoid近似返回1,log(1-h(x))无意义

theta=np.zeros(x.shape[1])

# print(cost_function(theta,x,y)) # 0.6931471805599453

# theta=opt.fmin_tnc(func=cost_function,x0=theta,fprime=gradient_descent,args=(x,y))

theta=opt.minimize(fun=cost_function,x0=theta,args=(x,y),method='tnc',jac=gradient_descent)

theta=theta.x

# print(cost_function(theta,x,y))

print(classification_report(predict(theta,x),y))

'''

model=linear_model.LogisticRegression()

model.fit(x,y)

print(model.score(x,y))

'''

boundary(theta,data)

main()

关于opt.fmin_tnc()函数的参数问题

func是要最小化的函数

x0是最小化函数的自变量

fprime是最小化的方法

args是func的除theta外的

opt.minimize()函数的参数基本同上

结果:

训练数据集

决策边界

classification_report()的结果:

关于如何看classification_report()的结果,请看classification_report结果说明

正则化逻辑回归

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

from sklearn import linear_model

import scipy.optimize as opt

from sklearn.metrics import classification_report

# 读取数据

def raw_data(path):

data=pd.read_csv(path,names=['test1','test2','accept'])

return data

# 画出原始数据

def draw_data(data):

accept=data[data['accept'].isin(['1'])]

reject=data[data['accept'].isin(['0'])]

plt.scatter(accept['test1'],accept['test2'],c='g')

plt.scatter(reject['test1'],reject['test2'],c='r')

plt.title('raw_data')

plt.xlabel('test1')

plt.ylabel('test2')

return plt

# 特征映射

def feature_mapping(x1,x2,power):

datamap={}

for i in range(power+1):

for j in range(i+1):

datamap["f{}{}".format(j,i-j)]=np.power(x1,j)*np.power(x2,i-j)

return pd.DataFrame(datamap)

def sigmoid(z):

return 1/(1+np.exp(-z))

# 正则化代价函数

def regularized_cost_function(theta,x,y,lam):

m=x.shape[0]

j=((y.dot(np.log(sigmoid(x.dot(theta)))))+((1-y).dot(np.log(1-sigmoid(x.dot(theta))))))/-m

penalty=lam*(theta.dot(theta))/(2*m)

return j+penalty

# 计算偏导数

def regularized_gradient_descent(theta,x,y,lam):

m=x.shape[0]

partial_j=((sigmoid(x.dot(theta))-y).T).dot(x)/m

partial_penalty=lam*theta/m

# 不惩罚第一项

partial_penalty[0]=0

return partial_j+partial_penalty

def predict(theta,x):

h=x.dot(theta)

return [1 if x>=0.5 else 0 for x in h]

def boundary(theta,data):

"""

[X,Y] = meshgrid(x,y)

将向量x和y定义的区域转换成矩阵X和Y,

其中矩阵X的行向量是向量x的简单复制,而矩阵Y的列向量是向量y的简单复制

假设x是长度为m的向量,y是长度为n的向量,则最终生成的矩阵X和Y的维度都是 nm(注意不是mn)

"""

# 绘制方程theta*x=0

x = np.linspace(-1, 1.5, 200)

x1, x2 = np.meshgrid(x, x)

z = feature_mapping(x1.ravel(), x2.ravel(), 6).values

z = z.dot(theta)

# print(xx.shape) # (200,200)

# print(z.shape) # (40000,)

z=z.reshape(x1.shape)

plt=draw_data(data)

plt.contour(x1,x2,z,0)

plt.title('boundary')

plt.show()

def main():

rawdata=raw_data('venv/lib/dataset/ex2data2.txt')

# print(rawdata.head())

# plt=draw_data(rawdata)

# plt.show()

data=feature_mapping(rawdata['test1'],rawdata['test2'],power=6)

# print(data.head())

x=data.values

y=rawdata['accept']

theta=np.zeros(x.shape[1])

'''

print(type(x))

print(type(y))

print(type(data))

print(x.shape)

print(y.shape)

print(theta.shape)

'''

# print(regularized_cost_function(theta,x,y,1)) #0.6931471805599454

theta=opt.minimize(fun=regularized_cost_function,x0=theta,args=(x,y,1),method='tnc',jac=regularized_gradient_descent).x

# print(regularized_cost_function(theta,x,y,1)) # 0.5357749513533407

# print(classification_report(predict(theta,x),y))

boundary(theta,rawdata)

main()

原始数据:

决策边界:

classification_report()结果:

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值