逻辑回归 python实现不用现成库

春田花花幼稚园优秀毕业生

于 2020-10-05 22:00:09 发布

阅读量462

点赞数

分类专栏：大数据学习

本文链接：https://blog.csdn.net/weixin_44544354/article/details/108933529

版权

大数据学习专栏收录该内容

8 篇文章 0 订阅

订阅专栏

函数名瞎写的，轻喷

import numpy as np
import pandas as pd
import numpy.random
import time
import matplotlib.pyplot as plt
%matplotlib inline

f=open('data_banknote_authentication.txt','r',encoding='utf-8')
data = f.read()

fdata=data.split('\n')
Mdata=pd.Series(fdata)
adf=Mdata.str.split(',')
adf

0         [3.6216, 8.6661, -2.8073, -0.44699, 0]
1          [4.5459, 8.1674, -2.4586, -1.4621, 0]
2           [3.866, -2.6383, 1.9242, 0.10645, 0]
3          [3.4566, 9.5228, -4.0112, -3.5944, 0]
4         [0.32924, -4.4552, 4.5718, -0.9888, 0]
                          ...                   
1367     [0.40614, 1.3492, -1.4501, -0.55949, 1]
1368      [-1.3887, -4.8773, 6.4774, 0.34179, 1]
1369    [-3.7503, -13.4586, 17.5932, -2.7771, 1]
1370      [-3.5637, -8.3827, 12.393, -1.2823, 1]
1371      [-2.5419, -0.65804, 2.6842, 1.1952, 1]
Length: 1372, dtype: object

adf=pd.DataFrame(adf.values.tolist(),dtype=np.float32)
adf

	0	1	2	3	4
0	3.62160	8.66610	-2.807300	-0.44699	0.0
1	4.54590	8.16740	-2.458600	-1.46210	0.0
2	3.86600	-2.63830	1.924200	0.10645	0.0
3	3.45660	9.52280	-4.011200	-3.59440	0.0
4	0.32924	-4.45520	4.571800	-0.98880	0.0
...	...	...	...	...	...
1367	0.40614	1.34920	-1.450100	-0.55949	1.0
1368	-1.38870	-4.87730	6.477400	0.34179	1.0
1369	-3.75030	-13.45860	17.593201	-2.77710	1.0
1370	-3.56370	-8.38270	12.393000	-1.28230	1.0
1371	-2.54190	-0.65804	2.684200	1.19520	1.0

1372 rows × 5 columns

adf.insert(0,'f',1)
df=adf.values
numpy.random.shuffle(df)
x=df[:,0:5]
y=df[:,5:6]

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def hx(x,theta):
    return sigmoid(np.dot(x,theta.T))# dot 相乘相加，放入sigmoid将数值映射到概率

theta=np.zeros([1,5])
theta #参数

array([[0., 0., 0., 0., 0.]])

x[1370:1400,:]

array([[ 1.        , -1.87820005, -6.58650017,  4.84859991, -0.021566  ],
       [ 1.        ,  4.8906002 , -3.35840011,  3.42020011,  1.0905    ]])

array([[1.],
       [0.],
       [1.],
       ...,
       [0.],
       [1.],
       [0.]])

def loss(x,y,theta):
    a=np.multiply(-y,np.log(hx(x,theta)))# multiply 两个数组对应位置相乘，结果成为数组（矩阵）
    b=np.multiply(1-y,np.log(1-hx(x,theta)))
    return np.sum(a-b)/len(y)

loss(x,y,theta)

0.6931471805599454

def pd(x,y,theta):
    
    grad=np.zeros(theta.shape)#梯度结果，占位
    error=(hx(x,theta)-y).ravel()# ravel 多维数组拉成一维数组
    
    for i in range(len(theta.ravel())):# 所有参数的偏导
        grad[0,i]=(np.sum(np.multiply(error,x[:,i]))/len(x))
    return grad

def DM1(x,y,theta,YuZhi,learnning_Rate):
    start_time=time.time()
    i=0 #
    k=0
    grad=np.zeros(theta.shape)
    Loss=[loss(x,y,theta)]
    while True:
        grad=pd(x[k:k+1],y[k:k+1],theta)
        k+=1
        if k>=len(x):
            k=0
            numpy.random.shuffle(df)
            x=df[:,0:5]
            y=df[:,5:6]
        theta=theta-(learnning_Rate*grad)
        Loss.append(loss(x,y,theta))
        i+=1
        if i>YuZhi:
            break
    return theta,i-1,Loss,time.time()-start_time

ENDtheta,times,ENDcosts,spend=DM1(x,y,theta,50000,0.003)
print('耗时：%f'%(spend))
fig,ax=plt.subplots(figsize=(12,4))
ax.plot(np.arange(len(ENDcosts)),ENDcosts,'r')
ax.set_xlabel("Iter")
ax.set_ylabel("Loss")