【机器学习】用牛顿法求解logistics回归

 数据集有80组(关于学生考试成绩和其被认可的概率),x输入是二维(exam1的分数,exam2的分数),y是该学生被认可的概率

import random
import numpy as np
import matplotlib.pyplot as plt
def sigmoid(X):
    return 1/(1+np.exp(-X))
def cost(A,Y,m):
    return (-1/m)*np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))
def gradient_descent(X,Y,W,m,alpha,epoch):
    for i in range(epoch):
        A = sigmoid(np.dot(X,W))
        j_first_order=np.dot(X.T, (A - Y))/m  # 损失函数的一阶导数
        j_second_order = 1/m * np.dot(np.dot(np.dot(X.T, np.diag(A.reshape(m))), np.diag(1 - A.reshape(m))), X)  # 损失函数的二阶导数
        W=W-np.dot(np.linalg.inv(j_second_order), j_first_order)*alpha  # 更新W
        if i%10==0:
            print(f"cost :{cost(A,Y,m)}")
    return W
def model(x):
    x=x*1.0/scale
    x=np.insert(x,0,np.ones(x.shape[0]),1)
    return sigmoid(np.dot(x,W))
def draw():
    # 绘制图像
    color=[]
    for i in range(m_all):
        a=data_set_y_orig[i]
        if a>0.5:
            color.append('r')
        else :
            color.append('orange')
    x_min,x_max=np.min(data_set_x_orig[:,0])-0.5,np.max(data_set_x_orig[:,0])+0.5
    y_min,y_max = np.min(data_set_x_orig[:, 1])-0.5, np.max(data_set_x_orig[:, 1])+0.5
    xx=np.arange(x_min,x_max,0.1)
    yy=np.arange(y_min,y_max, 0.1)
    X, Y = np.meshgrid(xx, yy)
    z=model(np.c_[X.ravel(),Y.ravel()])
    z=z.reshape(X.shape)
    #plt.contour(X,Y,z,levels=[0.5],colors=['blue'])  # 绘制分界线
    plt.scatter(data_set_x_orig[:,0],data_set_x_orig[:,1],c=color)
    plt.show()

    color=[]
    for i in range(m_all):
        x=data_set_x[i]
        a=sigmoid(np.dot(x,W))
        if a>0.5:
            color.append('r')
        else :
            color.append('orange')
    z = model(np.c_[X.ravel(), Y.ravel()])
    z = z.reshape(X.shape)
    plt.contour(X, Y, z, levels=[0.5], colors=['blue'])  # 绘制分界线
    plt.scatter(data_set_x_orig[:,0],data_set_x_orig[:,1],c=color)
    plt.show()


#  --------------------导入数据
data_set_x_orig=np.loadtxt("ex3x.dat")
data_set_y_orig=np.loadtxt("ex3y.dat")
#  --------------------导入数据

m_all=data_set_x_orig.shape[0]  # 得到总的数据组数
m_train=int(m_all*0.8)  # 训练集组数
m_test=m_all-m_train   # 测试集组数
n=data_set_x_orig.shape[1]+1  # 参数维数

# ---------------------------------数据预处理
scale=np.max(data_set_x_orig)
data_set_x=data_set_x_orig*1.0/scale
data_set_x=np.insert(data_set_x,0,np.ones(m_all),1)
data_set_y=data_set_y_orig.reshape((data_set_y_orig.shape[0],1))
# ---------------------------------数据预处理

# ----------------------------------划分训练集和测试集
all_index=[]
for i in range(data_set_x.shape[0]):
    all_index.append(i)

train_index=random.sample(all_index,m_train)
for i in train_index:
    all_index.remove(i)
test_index=all_index


x_train=[]
x_test=[]
for i in train_index:
    x_train.append(data_set_x[i])
for i in test_index:
    x_test.append(data_set_x[i])


y_train=[]
y_test=[]
for i in train_index:
    y_train.append(data_set_y[i])
for i in test_index:
    y_test.append(data_set_y[i])

x_test=np.array(x_test)
x_train=np.array(x_train)
y_test=np.array(y_test)
y_train=np.array(y_train)

print(f"训练集数据组数{m_train}")
print(f"测试集数据组数{m_test}")

# ----------------------------------划分训练集和测试集

#W=np.random.random((n,1))
W=np.zeros((n,1))  # 将W初始化为0向量


W=gradient_descent(x_train,y_train,W,m_train,0.2,20)  # 求解W
print(W)


# -----------------------计算相关数据
num=0
for i in range(m_test):
    x=x_test[i]
    y=y_test[i]
    if sigmoid(np.dot(x,W))>0.5 and y==1:
        num+=1
    elif sigmoid(np.dot(x,W))<0.5 and y==0:
        num+=1
accuracy_test=num/m_test
print(f"测试集准确率{accuracy_test}")


num=0
for i in range(m_train):
    x=x_train[i]
    y=y_train[i]
    if sigmoid(np.dot(x,W))>0.5 and y==1:
        num+=1
    elif sigmoid(np.dot(x,W))<0.5 and y==0:
        num+=1
accuracy_train=num/m_train
print(f"训练集准确率{accuracy_train}")

tp=0
fp=0
fn=0
tn=0
for i in range(m_test):
    x=x_test[i]
    y=y_test[i]
    a=sigmoid(np.dot(x,W))
    if a>0.5 and y==1:
        tp+=1
    if a>0.5 and y==0:
        fp+=1
    if a<0.5 and y==1:
        fn+=1
    if a<0.5 and y==0:
        tn+=1
precision=tp/(tp+fp)
fpr=fp/(fp+tn)
recall=tp/(tp+fn)
roc=(precision,fpr)
print(f"precision 和 recall :{precision,recall}")
print(f"roc: {roc}")
# -----------------------计算相关数据
draw() # 画图
x_temp=np.array([[20,80]]) # 预测一组数据
print(model(x_temp))

原始数据:

划分后的:

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值