感知机python实现
补习机器学习基础知识,把以前的代码复习一遍。
统计学习方法一书中感知机学习算法对偶形式的python实现,分为两部分代码,数据生成部分和感知机学习部分,可独立运行。
数据生成
先拟定一超平面,在超平面两侧生成正负样本
# -*- coding: utf-8 -*-
import numpy as np
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
class data_set: #数据集类
#__init__:初始化函数
# num:需要构造的数据量
# feature_size:特征数量
# w, b:超平面参数
# low:特征最小值
# high:特征最大值
#build_set:构造数据集函数
# data:数据集
# label:标签
#show_set:数据集可视化
def __init__(self, num, feature_size, w, b, low, high):
self.num = num
self.feature_size = feature_size
self.w = w
self.b = b
self.low = low
self.high = high
def build_set(self):
row_data = np.random.randint(self.low, self.high, size = (self.num * 2, self.feature_size))
row_label = np.zeros(self.num * 2, dtype = int)
data = np.empty(shape = [0, self.feature_size], dtype = int)
label = np.empty(shape = [0], dtype = int)
data_num = 0
for i in range(self.num * 2):
x = row_data[i, :]
if np.dot(self.w.transpose(), x) + self.b > 0.1:
row_label[i] = 1
elif np.dot(self.w.transpose(), x) + self.b < -0.1:
row_label[i] = -1
else: continue
data = np.append(data, [row_data[i, :]], axis = 0)
label = np.append(label, [row_label[i]], axis = 0)
data_num += 1
if data_num == self.num:
break
self.data, self.label = data, label
return
def show_set(self):
for i in range(self.num):
plt.scatter(self.data[i, 0] , self.data[i, 1], marker='x', color='red' if self.label[i] == -1 else 'green', s=5)
x1 = np.linspace(self.low, self.high, self.high - self.low)
x2 = (-self.b - self.w[0] * x1) / (self.w[1] + 0.001)
plt.plot(x1, x2, color = 'blue')
plt.show()
def show_set_3D(self):
fig = plt.figure()
ax = fig.gca(projection='3d')
for i in range(self.label.shape[0]):
ax.scatter(self.data[i, 0] , self.data[i, 1], self.data[i, 2], marker='x', color='red' if self.label[i] == -1 else 'green', s=5)
x1 = np.linspace(np.min(self.data[:, 0]), np.max(self.data[:, 0]), np.max(self.data[:, 0]) - np.min(self.data[:, 0]))
x2 = np.linspace(np.min(self.data[:, 0]), np.max(self.data[:, 0]), np.max(self.data[:, 0]) - np.min(self.data[:, 0]))
x1, x2 = np.meshgrid(x1, x2)
x3 = (-self.b - self.w[0] * x1 - self.w[1] * x2) / (self.w[2] + 0.001)
surf = ax.plot_surface(x1, x2, x3, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'), linewidth=0, antialiased=True)
plt.show()
if __name__ == '__main__':
set_ = data_set(100, 3, np.array([1, 1, 1]), 3, -10, 10)
set_.build_set()
set_.show_set_3D()
感知机学习
初始化w和b均为0,根据统计学习方法书中的公式实现感知机的学习
# -*- coding: utf-8 -*-
import numpy as np
from data_building import data_set
from mpl_toolkits.mplot3d import axes3d
from matplotlib import cm
import matplotlib.pyplot as plt
class perceptron:
#__init__:初始化函数
# data:数据集
# label:标签
# lr:学习率
#isError:判断数据是否误分类函数
#show_result:分类结果可视化
def __init__(self, data, label, lr):
self.data = data
self.label = label
self.lr = lr
self.Alpha = np.zeros(label.shape[0], dtype = float)
self.b = 0
self.w = np.zeros(data.shape[1], dtype = float)
self.Gram = np.dot(self.data, self.data.transpose())
def isError(self, index):
if self.label[index] *((self.Alpha * self.label * self.Gram[index, :]).sum() + self.b) <= 0:
return True
return False
def train(self, max_step):
isFinish = False
while not isFinish:
for i in range(self.label.shape[0]):
if self.isError(i):
self.Alpha[i] += self.lr
self.b += (self.lr * self.label[i]).sum()
isFinish = True
for j in range(self.label.shape[0]):
if self.isError(j):
isFinish = False
if isFinish:
break
self.w = np.dot(self.Alpha * self.label, self.data)
return
def show_result(self):
for i in range(self.label.shape[0]):
plt.scatter(self.data[i, 0] , self.data[i, 1], marker='x', color='red' if self.label[i] == -1 else 'green', s=5)
x1 = np.linspace(np.min(self.data[:, 0]), np.max(self.data[:, 0]), np.max(self.data[:, 0]) - np.min(self.data[:, 0]))
x2 = (-self.b - self.w[0] * x1) / (self.w[1] + 0.001)
plt.plot(x1, x2, color = 'blue')
plt.show()
def show_result_3D(self):
fig = plt.figure()
ax = fig.gca(projection='3d')
for i in range(self.label.shape[0]):
ax.scatter(self.data[i, 0] , self.data[i, 1], self.data[i, 2], marker='x', color='red' if self.label[i] == -1 else 'green', s=5)
x1 = np.linspace(np.min(self.data[:, 0]), np.max(self.data[:, 0]), np.max(self.data[:, 0]) - np.min(self.data[:, 0]))
x2 = np.linspace(np.min(self.data[:, 0]), np.max(self.data[:, 0]), np.max(self.data[:, 0]) - np.min(self.data[:, 0]))
x1, x2 = np.meshgrid(x1, x2)
x3 = (-self.b - self.w[0] * x1 - self.w[1] * x2) / (self.w[2] + 0.001)
surf = ax.plot_surface(x1, x2, x3, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'), linewidth=0, antialiased=True)
plt.show()
if __name__ == '__main__':
set_ = data_set(100, 3, np.array([1, 1, 1]), 3, -10, 10)
set_.build_set()
model = perceptron(set_.data, set_.label, 0.1)
model.train(100)
print(model.w, model.b)
model.show_result_3D()