习题3.3 编程实现对率回归,并给出西瓜数据集 3.0α 上的结果.
数据集3.0α
sn | density | suger_ratio | good_melon |
1 | 0.697 | 0.46 | 1 |
2 | 0.774 | 0.376 | 1 |
3 | 0.634 | 0.264 | 1 |
4 | 0.608 | 0.318 | 1 |
5 | 0.556 | 0.215 | 1 |
6 | 0.403 | 0.237 | 1 |
7 | 0.481 | 0.149 | 1 |
8 | 0.437 | 0.211 | 1 |
9 | 0.666 | 0.091 | 0 |
10 | 0.243 | 0.267 | 0 |
11 | 0.245 | 0.057 | 0 |
12 | 0.343 | 0.099 | 0 |
13 | 0.639 | 0.161 | 0 |
14 | 0.657 | 0.198 | 0 |
15 | 0.36 | 0.37 | 0 |
16 | 0.593 | 0.042 | 0 |
17 | 0.719 | 0.103 | 0 |
对应的python实现代码
#encoding:UTF-8
import csv
import numpy as np
from math import *
from numpy.linalg import *
from matplotlib import pyplot as plt
#牛顿迭代法实现对数几率回归
old_ml=0 #初始似然函数值
n=0
beta=np.mat([[0],[0],[1]]) #初始参数[w1;w2;b]
#读取数据集
wm_data_30a = csv.reader(open('../data_set/watermelon_data_set_30a.csv','r'))
xi_d = np.mat(np.zeros((17,3)))
yi_d = np.mat(np.zeros((17,1)))
#遍历数据集
sn=0
for stu in wm_data_30a:
#[sn x1 x2 y]
if(stu[0].isdigit()==True):
xi_d[sn,:] = np.mat([float(stu[1]),float(stu[2]),1])
yi_d[sn,0] = float(stu[3])
sn = sn+1
#print('xi_d=\n',xi_d)
#print('yi_d=\n',yi_d)
#print(wm_data_30a)
iter=0
xi = np.mat(np.zeros((3,1)))
while(iter<10):
sn=0
#遍历数据
print('iter=',iter)
new_ml=0
for idx in range(17):#0~16
#print('idx=',idx)
xi = xi_d[idx,:].T #3*1的矩阵
yi=yi_d[idx,0]
#print(yi)
#计算当前ml函数
# ln(1+beta*xi) - yi*beta*xi
new_ml = new_ml + log(1+e**det(beta.T*xi)) - yi*det(beta.T*xi)
print('new ml=',new_ml)
if abs(new_ml-old_ml)<0.001:
print(new_ml-old_ml)
print(beta)
break
#牛顿迭代法更新参数
old_ml = new_ml
dml_beta_1 = np.mat([[0],[0],[0]])#3*1矩阵
dml_beta_2 = np.mat(np.zeros((3,3))) #3*3矩阵
#遍历数据
for idx in range(17):#0~16
xi = xi_d[idx,:].T
yi=yi_d[idx,0]
#print(det(beta.T*xi))
p1_xi = 1-1/(1+e**(det(beta.T*xi)))
#计算一阶导
dml_beta_1 = dml_beta_1 - xi*(yi-p1_xi) #矩阵
#计算二阶导
dml_beta_2 = dml_beta_2 + xi*(xi.T)*p1_xi*(1-p1_xi)
#print('Neton iter',dml_beta_2)
beta = beta - (dml_beta_2.I)*dml_beta_1
print('beta=',beta)
iter=iter+1
#%画出散点图以及计算出的直线
#%逐点画 分别表示是否好瓜
for idx in range(17):
if yi_d[idx,0]==1:
plt.plot(xi_d[idx,0],xi_d[idx,1],'+r')
else:
plt.plot(xi_d[idx,0],xi_d[idx,1],'ob')
#画出回归线
ply=-(0.1*beta[0,0]+beta[2,0])/beta[1,0];
pry=-(0.9*beta[0,0]+beta[2,0])/beta[1,0];
px=[0.1,0.9]
py=[ply,pry]
plt.plot(px,py)
plt.xlabel('density')
plt.ylabel('suger ratio')
plt.title('logistic function regression')
plt.show()
运算结果
beta=
[[ 3.15832738]
[12.52119012]
[-4.42886222]]