#coding=utf-8
from numpy import *
import re
def load_data():
"""加载数据"""
data=[];label=[]
#附加两个表
open_file=open('test.txt')
for line in open_file.readlines():
line_arr=re.split(r'(\d*)',line.strip())
data.append([1.0,float(int(line_arr[1])),float(int(line_arr[3]))])
label.append(int(line_arr[5]))
#这个for就是附加表用的,里面的正则是因为数据格式问题。也就是对数据进行筛选
return data,label
def sigmoid(inx):
"""sigmoid函数"""
return 1.0/(1+exp(-inx))
def grad_ascent(data,label):
"""回归梯度上升"""
data_matrix=mat(data)
#把原始数据矩阵化
label_matrix=mat(label).T
#矩阵化后转置
m,n=shape(data_matrix)
#m代表行,n代表列
alpha=0.001
#步幅
max_cycles=5000
#迭代次数,本列中迭代次数在阈值内次数越多越精确
weights=ones((n,1))
#weights[0]步长,weights[1]&weights[2]决定了拟合直线的斜率
for k in range(max_cycles):
h=sigmoid(data_matrix*weights)
#data_matrix*weights就相当于求data_matrix中每行元素与weights每列元素乘积的和
error=(label_matrix-h)
#类别目标的差值。即元素a经sigmoid判定后为类别1.它的sigmoid值为0.72 。error=1-0.72
weights=weights+alpha*data_matrix.transpose()*error
#w=w+α▽wf(w)梯度上升算法迭代公式。按定义看
#梯度上升,weights[0]为累计步长
weights=weights.getA()
return weights
def best_fit(weights):
""""""
import matplotlib.pyplot as plt
data,label=load_data()
data_arr=array(data)
#阵列化
m=shape(data_arr)[0]
#获取数据行数
xcord1=[];ycord1=[]
xcord0=[];ycord0=[]
#初始列表
for i in range(m):
if int(label[i])==1:
xcord1.append(data_arr[i,1])
ycord1.append(data_arr[i,2])
#获取1类坐标
else:
xcord0.append(data_arr[i,1])
ycord0.append(data_arr[i,2])
#获取0类坐标
fig=plt.figure()
#创建图形
ax=fig.add_subplot(111)
#创建一个子图
ax.scatter(xcord1,ycord1,s=30,c='red',marker='s')
ax.scatter(xcord0,ycord0,s=30,c='green')
#创建散点图
x=arange(8.0,10.0,0.1)
#拟合直线x轴。前两个参数是范围,后一个是精度。
#关于范围的选定应该看两类数据x轴的重合范围。
#如样本数据是只在9处重合,那取(8,10)这个区间。
y=(-weights[0]-weights[1]*x)/weights[2]
ax.plot(x,y)
#创建最佳拟合直线
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()
样本数据:
[2, 4, '1']
[5, 8, '1']
[8, 5, '1']
[9, 9, '1']
[4, 1, '1']
[0, 0, '1']
[5, 8, '1']
[9, 3, '1']
[1, 8, '1']
[7, 3, '1']
[9, 3, '1']
[3, 8, '1']
[4, 6, '1']
[9, 7, '1']
[7, 1, '1']
[5, 2, '1']
[9, 6, '1']
[6, 9, '1']
[9, 8, '1']
[7, 0, '1']
[4, 5, '1']
[9, 8, '1']
[0, 4, '1']
[4, 3, '1']
[6, 0, '1']
[9, 9, '1']
[0, 3, '1']
[9, 8, '1']
[1, 7, '1']
[5, 8, '1']
[7, 8, '1']
[1, 5, '1']
[0, 7, '1']
[1, 9, '1']
[7, 8, '1']
[2, 5, '1']
[7, 4, '1']
[2, 1, '1']
[6, 1, '1']
[0, 1, '1']
[2, 4, '1']
[6, 0, '1']
[8, 0, '1']
[4, 9, '1']
[8, 3, '1']
[9, 8, '1']
[8, 9, '1']
[5, 9, '1']
[9, 6, '1']
[4, 2, '1']
[8, 7, '1']
[1, 9, '1']
[3, 8, '1']
[0, 1, '1']
[1, 1, '1']
[0, 9, '1']
[0, 6, '1']
[1, 5, '1']
[2, 6, '1']
[9, 5, '1']
[5, 0, '1']
[2, 4, '1']
[5, 9, '1']
[9, 5, '1']
[6, 3, '1']
[9, 3, '1']
[3, 6, '1']
[8, 6, '1']
[7, 7, '1']
[0, 0, '1']
[5, 4, '1']
[2, 9, '1']
[5, 7, '1']
[3, 9, '1']
[6, 9, '1']
[8, 2, '1']
[8, 3, '1']
[8, 0, '1']
[2, 4, '1']
[9, 2, '1']
[0, 3, '1']
[6, 8, '1']
[5, 4, '1']
[5, 0, '1']
[5, 3, '1']
[7, 6, '1']
[0, 4, '1']
[3, 9, '1']
[7, 5, '1']
[8, 3, '1']
[9, 7, '1']
[8, 3, '1']
[3, 5, '1']
[2, 6, '1']
[1, 9, '1']
[6, 2, '1']
[3, 5, '1']
[9, 7, '1']
[5, 6, '1']
[7, 2, '1']
[11, 0, '0']
[13, 7, '0']
[16, 5, '0']
[11, 0, '0']
[17, 6, '0']
[9, 5, '0']
[15, 1, '0']
[13, 7, '0']
[12, 6, '0']
[9, 5, '0']
[17, 4, '0']
[10, 8, '0']
[10, 8, '0']
[9, 5, '0']
[13, 2, '0']
[13, 6, '0']
[9, 0, '0']
[11, 9, '0']
[17, 2, '0']
[9, 7, '0']
[16, 4, '0']
[12, 1, '0']
[10, 8, '0']
[10, 1, '0']
[17, 7, '0']
[12, 0, '0']
[16, 5, '0']
[18, 2, '0']
[15, 6, '0']
[9, 5, '0']
[13, 4, '0']
[13, 2, '0']
[10, 2, '0']
[17, 7, '0']
[16, 1, '0']
[15, 0, '0']
[9, 4, '0']
[16, 7, '0']
[13, 1, '0']
[17, 0, '0']
[18, 4, '0']
[12, 3, '0']
[10, 7, '0']
[14, 6, '0']
[9, 5, '0']
[11, 9, '0']
[12, 4, '0']
[17, 8, '0']
[10, 2, '0']
[12, 5, '0']
[13, 0, '0']
[12, 2, '0']
[11, 1, '0']
[14, 1, '0']
[17, 0, '0']
[18, 3, '0']
[10, 5, '0']
[18, 2, '0']
[12, 4, '0']
[15, 8, '0']
[17, 9, '0']
[18, 5, '0']
[14, 9, '0']
[16, 9, '0']
[18, 5, '0']
[9, 1, '0']
[14, 4, '0']
[13, 2, '0']
[12, 9, '0']
[16, 8, '0']
[15, 4, '0']
[12, 0, '0']
[16, 9, '0']
[14, 3, '0']
[12, 9, '0']
[17, 5, '0']
[11, 4, '0']
[13, 6, '0']
[16, 3, '0']
[16, 2, '0']
[11, 3, '0']
[11, 1, '0']
[17, 9, '0']
[18, 2, '0']
[11, 8, '0']
[14, 3, '0']
[11, 0, '0']
[18, 6, '0']
[12, 6, '0']
[10, 0, '0']
[14, 0, '0']
[16, 5, '0']
[12, 7, '0']
[15, 0, '0']
[15, 1, '0']
[18, 9, '0']
[9, 0, '0']
[18, 0, '0']
[18, 6, '0']
[9, 3, '0']