#### 逻辑回归

1、使用sklearn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

ys = ys.astype(int)
df_X['label'] = ys[0].values #将X按照y值的结果一一打标签
ax = plt.axes()
#在二维图中描绘X点所处位置，直观查看数据点的分布情况
df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')
#提取用于学习的数据
Xs = df_X[[0, 1]].values
Xs = np.hstack([np.ones((Xs.shape[0], 1)), Xs])
ys = df_X['label'].values

from __future__ import print_function
import numpy as np
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(fit_intercept=False) #因为前面已经将截距项的值合并到变量中，此处参数设置不需要截距项
lr.fit(Xs, ys) #拟合
score = lr.score(Xs, ys) #结果评价
print("Coefficient: %s" % lr.coef_)
print("Score: %s" % score)

ax = plt.axes()

df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

_xs = np.array([np.min(Xs[:,1]), np.max(Xs[:,1])])
#将数据以二维图形式描点，并用学习得出的参数结果作为阈值，划分数据区域
_ys = (lr.coef_[0][0] + lr.coef_[0][1] * _xs) / (- lr.coef_[0][2])
plt.plot(_xs, _ys, lw=1)


2、用梯度下降法将相同的数据分类，观察与sklearnd的不同

class LGR_GD():
def __init__(self):
self.w = None
self.n_iters = None
def fit(self,X,y,alpha=0.03,loss = 1e-10): # 设定步长为0.002，判断是否收敛的条件为1e-10
y = y.reshape(-1,1) #重塑y值的维度以便矩阵运算
[m,d] = np.shape(X) #自变量的维度
self.w = np.zeros((1,d)) #将参数的初始值定为0
tol = 1e5
self.n_iters = 0

while tol > loss: #设置收敛条件

self.n_iters += 1 #更新迭代次数

def predict(self, X):
# 用已经拟合的参数值预测新自变量
y_pred = X.dot(self.w)
return y_pred

if __name__ == "__main__":
lr_gd = LGR_GD()
lr_gd.fit(Xs,ys)

ax = plt.axes()

df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

_xs = np.array([np.min(Xs[:,1]), np.max(Xs[:,1])])
_ys = (lr_gd.w[0][0] + lr_gd.w[0][1] * _xs) / (- lr_gd.w[0][2])
plt.plot(_xs, _ys, lw=1



3、用牛顿法实现

import numpy as npimport matplotlib.pyplot as pltfrom file2matrix import file2matrixfrom sigmoid import sigmoidfrom compute_loss import compute_loss

a=np.diag(np.array([1,2]))
print(a)

def nt(x,y,theta,iterations=100):
n,m=x.shape
J_loss=[]
orig_loss=np.inf
real_iter=0
for i in range(iterations):
l=compute_loss(x,y,theta)
J_loss.append(l)
h=sigmoid(np.dot(x,theta))
j_first_order=1/n*np.dot(x.T,h-y)
j_second_order=1/n*np.dot(np.dot(np.dot(x.T,np.diag(h.reshape(n))),np.diag(1-h.reshape(n))),x)#(m,m)
theta=theta-np.dot(np.linalg.inv(j_second_order),j_first_order)
if orig_loss-l<0.001:
real_iter=i+1
break
orig_loss=l
return theta,J_loss,real_iter

if __name__=="__main__":
X = file2matrix('./ex4x.dat')
y = file2matrix('./ex4y.dat', 1)

n, m = X.shape
X = np.column_stack((np.ones(n), X))    # print(X.shape)
m = m + 1
theta = np.zeros((m, 1))

theta, J_his,real_iter = nt(X, y, theta)
print(real_iter)

print("theta", theta)
print("J", J_his)
plt.xlabel("iteration")
plt.ylabel("J")
plt.plot(np.arange(real_iter), J_his)
plt.show()

pos = list(np.where(y == 1.0)[0])
X_pos = X[pos,1:3]
neg = list(np.where(y == 0.0)[0])
X_neg = X[neg,1:3]
plt.plot(X_pos[:, 0], X_pos[:, 1], '+', label='admitted')
plt.plot(X_neg[:, 0], X_neg[:, 1], 'o', label='Not admitted')
plt.xlabel("exam1 score")
plt.ylabel("exam2 score")
plt.legend()

xx = np.linspace(20, 70, 6)
yy = []
for i in xx:
res = (i * -(theta[1][0]) - (theta[0][0])) / (theta[2][0])
yy.append(res)
plt.plot(xx, yy)
plt.show()

