逻辑回归,主要用于二分类问题。其基本形式是每个特征乘以一个回归系数,然后所有值相加。
在代入sigmoid函数中,得到一个离散的分类结果。然后在以0.5(或其他值)作为阈值,进行0-1分类。
极大似然估计:利用已知样本数据,去估计相关的参数。将上诉的内容在利用极大似然估计法,进行估计简化,再利用梯度下降法求解。
梯度下降法:
参考:https://www.cnblogs.com/muchen/p/6296957.html#_labelTop
https://www.cnblogs.com/lc1217/p/6802637.html
具体实现代码如下:
#!/usr/bin/env python
# -*- coding:utf-8 -*-
__author__ = 'Great'
'''
读取整理数据
(逻辑回归函数)
计算sigmord函数
(最大似然估计)
梯度下降法求解(迭代式)
预测
显示分类效果
'''
from math import exp
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#读取数据集,处理训练数据及标签
def data_set():
x_train = []
x_label = []
df = open('./test/testSet.txt')
for line in df.readlines():
line_df = line.strip().split()
x_train.append([1.0, float(line_df[0]), float(line_df[1])])
x_label.append(float(line_df[2]))
return x_train, x_label
#signord转换
def sigmord(num):
return 1.0/(1 + np.exp(-num))#math.exp()不能对矩阵操作
#梯度下降法
def gradt(data, label):
labelmat = np.mat(label).transpose()
datamat = np.mat(data)
m, n = np.shape(datamat)
alpha = 0.001
maxtry = 500
weights = np.ones((n, 1))
for k in range(maxtry):
h = sigmord(datamat*weights)
error = (labelmat - h)
weights = weights + alpha*datamat.transpose()*error
return weights
'''
def test():
data, label = data_set()
result = gradt(data, label)
print(result)
'''
def plot_fit(weight):
data, label = data_set()
data_array = np.array(data)
xfeat0 = []
yfeat0 = []
xfeat1 = []
yfeat1 = []
for i in range(np.shape(data_array)[0]):
if int(label[i]) == 1:
xfeat1.append(data_array[i,1])
yfeat1.append(data_array[i,2])
else:
xfeat0.append(data_array[i,1])
yfeat0.append(data_array[i,2])
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(xfeat0,xfeat0, s = 30, c = 'green', marker = 'o')
ax.scatter(xfeat1,yfeat1, s = 30, c = 'red')
x = np.arange(-3.0, 3.0, 0.1)
y = (-weight[0] - weight[1]*x)/weight[2]
ax.plot(x, y)
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()
def test1():
data, label = data_set()
weight = gradt(data, label)
wg = weight.getA()#将numpy转为数组
plot_fit(wg)
if __name__ =='__main__':
test1()
#还需要更多关于应用的使用和理解......