本博客复现感知机的二分类算法
import pandas as pd
import numpy as np
class Perceptron:
def __init__(self, w=None, b=0):
self.w = w
self.b = b
def fit(self, x_train: pd.DataFrame, y_train: pd.Series, iter=20, learn_rate=0.001):
"""
:param x_train: 训练集特征
:param y_train: 训练集分类
:param iter: 迭代次数
:param learn_rate: 学习率
:return:
"""
assert x_train.shape[0] == y_train.shape[0]
w = np.zeros((1, x_train.shape[1]))
b = 0
for iter_time in range(iter):
x_data = x_train.loc[iter_time]
y_data = y_train.loc[iter_time]
x_data = np.mat(x_data.tolist())
y_data = np.mat(y_data.tolist())
# 修正参数
if -1 * y_data * (w * x_data.T + b) >= 0:
w += learn_rate * y_data * x_data
b += learn_rate * y_data
self.w = w
self.b = b
def predict(self, x_test: pd.DataFrame):
count = x_test.shape[0] # 测试集数量
w = self.w
b = self.b
result = []
for i in range(count):
x_data = x_test.loc[i]
x_data = np.mat(x_data.tolist())
y = w * x_data.T + b
result.append(1) if y > 0 else result.append(-1)
return result
data = pd.read_csv('../Mnist/mnist_train_little.csv', header=None)
test_data = pd.read_csv('../Mnist/mnist_test_little.csv', header=None)
x_train = data.drop(0, axis=1) # 训练集特征
y_train = data[0] # 训练集标签
x_test = test_data.drop(0, axis=1) # 测试集特征
# 模型
perceptron_model = Perceptron()
perceptron_model.fit(x_train=x_train, y_train=y_train)
y_predict = perceptron_model.predict(x_test=x_test)