本章内容来源于西瓜书第三章的课后题,序号是3.3,关于西瓜书的答案,网上已经有了不少,我自己也是先按照网上西瓜书的答案进行的学习,但是后面发现网上的答案没有很好的利用书中的知识,所以自己按照书中内容,重新做了下
数据介绍
我们先介绍数据,数据如下,可以直接复制拿去用
[[ 1. , 0.697 , 0.46 , 1. ],
[ 2. , 0.774 , 0.376 , 1. ],
[ 3. , 0.634 , 0.264 , 1. ],
[ 4. , 0.608 , 0.318 , 1. ],
[ 5. , 0.556 , 0.215 , 1. ],
[ 6. , 0.403 , 0.237 , 1. ],
[ 7. , 0.481 , 0.149 , 1. ],
[ 8. , 0.437 , 0.211 , 1. ],
[ 9. , 0.666 , 0.091 , 0. ],
[10. , 0.243 , 0.0267, 0. ],
[11. , 0.245 , 0.057 , 0. ],
[12. , 0.343 , 0.099 , 0. ],
[13. , 0.639 , 0.161 , 0. ],
[14. , 0.657 , 0.198 , 0. ],
[15. , 0.36 , 0.37 , 0. ],
[16. , 0.593 , 0.042 , 0. ],
[17. , 0.719 , 0.103 , 0. ]]
代码实现
import numpy as np
from sklearn import model_selection
from sklearn import metrics
def gradDscent_2(x,y):
max_time=500
m,n=np.shape(x)
beta=np.zeros(n)
h=0.1
delta=np.zeros(n)
for i in range(max_time):
beta+=delta
delta=derivative(x,y,beta)*-h
return beta
def derivative(x,y,beta):
delta_l=0
m,n=np.shape(x)
for i in range(m):
delta_l+=np.dot(x[i].T,(1-y[i]-(1/(1+np.math.exp(np.dot(x[i].T,beta))))))
return delta_l
def sigmoid(x,beta):
return 1.0/(1+np.math.exp(-1*np.dot(beta,x.T)))
def predict(x,beta):
m,n=np.shape(x)
y=np.zeros(m)
for i in range(m):
if(sigmoid(x[i],beta)>0.5):
y[i]=1
return y
dataset=[[ 1. , 0.697 , 0.46 , 1. ],
[ 2. , 0.774 , 0.376 , 1. ],
[ 3. , 0.634 , 0.264 , 1. ],
[ 4. , 0.608 , 0.318 , 1. ],
[ 5. , 0.556 , 0.215 , 1. ],
[ 6. , 0.403 , 0.237 , 1. ],
[ 7. , 0.481 , 0.149 , 1. ],
[ 8. , 0.437 , 0.211 , 1. ],
[ 9. , 0.666 , 0.091 , 0. ],
[10. , 0.243 , 0.0267, 0. ],
[11. , 0.245 , 0.057 , 0. ],
[12. , 0.343 , 0.099 , 0. ],
[13. , 0.639 , 0.161 , 0. ],
[14. , 0.657 , 0.198 , 0. ],
[15. , 0.36 , 0.37 , 0. ],
[16. , 0.593 , 0.042 , 0. ],
[17. , 0.719 , 0.103 , 0. ]]
dataset=np.array(dataset)
print(type(dataset))
x=dataset[:,1:3]
y=dataset[:,3]
x_train,x_test,y_train,y_test=model_selection.train_test_split(x,y,test_size=0.6,random_state=0)
beta=gradDscent_2(x_train,y_train)
pred=predict(x_test,beta)
print(metrics.confusion_matrix(y_test,pred))
print(metrics.classification_report(y_test,pred))
结果与网上答案一致,使用的公式是p60,3.30