借鉴了一些网上的资源,把一些计算过程向量化加快速度。
算法方面用的smo算法,内外层循环完整版,编程渣渣各位大佬勿喷
数据集用的iris数据集的一部分,结果看起来还可以,实际上这个数据应该是近似线性可分的,所以结果还行。
Backend TkAgg is interactive backend. Turning interactive mode on.
----训练集准确率:1.000000----
----测试集准确率:1.000000----
上代码
from pandas import DataFrame, Series
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['FangSong'] # 指定默认字体
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
def load_data():
from sklearn.datasets import load_iris
iris = load_iris()
train_X = DataFrame(iris['data']).loc[:99]
train_Y = DataFrame(iris['target']).loc[:99]
train_X.columns = ['x1', 'x2', 'x3', 'x4']
train_Y.columns = ['y']
train_Y[train_Y == 0] = -1
# train_X = train_X.drop(['x3', 'x4'], axis=1)
return train_X, train_Y
# return pd.concat([train_X.iloc[:30, :], train_X.iloc[70:, :]]), pd.concat([train_Y.iloc[:30, :], train_Y.iloc[70:, :]])
class svm():
def __init__(self, trainX, trainY, epison,C, maxiter, kernal):
self.trainX = trainX
self.trainY = trainY
self.epison = epison
self.C = C
self.maxiter = maxiter
self.kernal = kernal
self.alpha = [0 for _ in range(trainX.shape[0])]
self.b = 0.
self.w = None
def kernal_whole(self, x, y):
x = x.values
y = y.values
if self.kernal == 'gauss':
return np.exp(-0.5 * np