支持向量机python代码程序_支持向量机python实现(优化版)

本文详细介绍了使用Python实现的支持向量机优化版,包括数据加载、选择误差最大点更新拉格朗日系数的策略以及分类决策函数。通过实验数据展示,表明优化版相比简化版具有更快的速度和更多支持向量。
摘要由CSDN通过智能技术生成

from numpy import *

import numpy as np

import matplotlib.pyplot as plt

def load_set_data(file_name):

data_mat = []

label_mat = []

n = 0

fr = open(file_name)

for line in fr.readlines():

line_arr = line.strip().split()

data_mat.append([float(line_arr[0]), float(line_arr[1])])

label_mat.append(float(line_arr[2]))

n += 1

return data_mat, label_mat, n

def select_j_rand(i, m):

j = i

while (j == i):

j = int(random.uniform(0, m))

return j

def clip_alpha(aj, H, L):

if aj > H:

aj = H

if aj < L:

aj = L

return aj

class opt_struct:

def __init__(self, data_matin, class_labels, C, toler):

self.X = data_matin

self.label_mat = class_labels

self.C = C

self.tol = toler

self.m = shape(data_matin)[0]

self.alphas = mat(zeros((self.m, 1)))

self.b = 0

self.e_cache = mat(zeros((self.m, 2)))

def calc_ek(os, k):

fxk = float((multiply(os.alphas, os.label_mat).T) * (os.X * os.X[k, :].T)) + os.b

ek = fxk - os.label_mat[k]

return fxk,ek

def select_j(os, i, ei):

maxk = -1

max_delta_e = 0

ej = 0

os.e_cache[i] = [1, ei]

valid_e_cache_list = nonzero(os.e_cache[0].A)[0]

if(len(valid_e_cache_list)) > 1:

for k in valid_e_cache_list:

if (k == 1):

continue

fxk,ek = os.calc_ek(k)

delta_e = abs(ei - ek)

if(delta_e > max_delta_e):

maxk = k

max_delta_e = delta_e

ej = ek

return maxk, ej

else:

j = select_j_rand(i, os.m)

fj,ej = os.calc_ek(j)

return j, ej

def update_ek(os, k):

fxk,ek = os.calc_ek(k)

os.e_cache[k] = [1, ek]

def innert(os, i):

fxi,ei = os.calc_ek(i)

if(((os.label_mat[i] * ei < -os.tol) and (os.alphas[i] < os.C)) or\

((os.label_mat[i] * ei > os.tol) and (os.alphas[i] > 0))):

#if((os.label_mat[i] * (fxi - 2 * os.b) <= 1 and os.alphas[i] < os.C)\

# or (os.label_mat[i] * (fxi - 2 * os.b) >= 1 and os.alphas[i] > 0)\

# or (os.label_mat[i] * (fxi - 2 * os.b) == 1 and (os.alphas[i] == 0 or os.alphas[i] == os.C))):

j,ej = os.select_j(i, ei)

alpha_i_old = os.alphas[i].copy()

alpha_j_old = os.alphas[j].copy()

if(os.label_mat[i] != os.label_mat[j]):

L = max(0, os.alphas[j] - os.alphas[i])

H = min(os.C, os.C + os.alphas[j] - os.alphas[i])

else:

L = max(0, os.alphas[j] + os.alphas[i] - os.C)

H = min(os.C, os.alphas[j] + os.alphas[i])

if(L == H):

print("L == H")

return 0

eta = 2 * os.X[i, :] * os.X[j, :].T - os.X[i, :] * os.X[i, :].T - os.X[j, :] * os.X[j, :].T

if(eta >= 0):

print("eta >= 0")

return 0

os.alphas[j] = alpha_j_old - (os.label_mat[j] * (ei - ej)) * 1.0 / eta

os.alphas[j] = clip_alpha(os.alphas[j], H, L)

os.update_ek(j)

if(abs(os.alphas[j] - alpha_j_old) < 0.00001):

print("j not move")

return 0

os.alphas[i] = alpha_i_old + os.label_mat[i] * os.label_mat[j] * (alpha_j_old - os.alphas[j])

os.update_ek(i)

b1 = os.b - ei - os.label_mat[i] * (os.alphas[i] - alpha_i_old) * (os.alphas[i, :] * os.alphas[i, :].T) -\

os.label_mat[j] * (os.alphas[j] - alpha_j_old) * (os.alphas[i, :] * os.alphas[j, :].T)

b2 = os.b - ej - os.label_mat[j] * (os.alphas[i] - alpha_i_old) * (os.alphas[i, :] * os.alphas[j, :].T) -\

os.label_mat[j] * (os.alphas[j, :] - alpha_j_old) * (os.alphas[j, :] * os.alphas[j, :])

if(os.alphas[i] > 0 and os.alphas[i] < os.C):

os.b = b1

elif(os.alphas[j] > 0 and os.alphas[j] < os.C):

os.b = b2

else:

os.b = (b1 + b2) * 1.0 / 2

return 1

else:

return 0

def smop(data_mat_in, class_labels, C, toler, max_iter):

os = opt_struct(mat(data_mat_in), mat(class_labels).transpose(), C, toler)

iter = 0

entire_set = True

alpha_paris_changed = 0

while((iter < max_iter) and (alpha_paris_changed > 0) or (entire_set)):

alpha_paris_changed = 0

if entire_set:

for i in range(os.m):

alpha_paris_changed += innert(os, i)

iter += 1

else:

non_bound_is = nonzero((os.alphas.A > 0) * (os.alphas.A < C))[0]

for i in non_bound_is:

alpha_paris_changed += innert(os, i)

iter += 1

if entire_set:

entire_set = False

elif(alpha_paris_changed == 0):

entire_set = True

return os.b, os.alphas

def show_experiment_plot(alphas, data_list_in, label_list_in, b, n):

data_arr_in = array(data_list_in)

label_arr_in = array(label_list_in)

alphas_arr = alphas.getA()

data_mat = mat(data_list_in)

label_mat = mat(label_list_in).transpose()

i = 0

weights = zeros((2, 1))

while(i < n):

if(label_arr_in[i] == -1):

plt.plot(data_arr_in[i, 0], data_arr_in[i, 1], "ob")

elif(label_arr_in[i] == 1):

plt.plot(data_arr_in[i, 0], data_arr_in[i, 1], "or")

if(alphas_arr[i] > 0):

plt.plot(data_arr_in[i, 0], data_arr_in[i, 1], "oy")

weights += multiply(alphas[i] * label_mat[i], data_mat[i, :].T)

i += 1

x = arange(-2, 12, 0.1)

y = []

for k in x:

y.append(float(-b - weights[0] * k) / weights[1])

plt.plot(x, y, '-g')

plt.xlabel("X")

plt.ylabel("Y")

plt.show()

def main():

data_list,label_list, n = load_set_data("test_set.txt")

b,alphas = smop(data_list, label_list, 0.6, 0.001, 40)

b_data = array(b)[0][0]

show_experiment_plot(alphas, data_list, label_list, b_data, n)

main()

在opt_struct类中的成员函数select_j,选择误差值最大的点进行更新拉格朗日系数是由式子:

3.PNG

决定的,同时也说明了由决策函数计算得到的值与实际值偏差太大,则更加需要调整决策函数的权值,分类决策函数基本模型如下:

4.PNG

权值与拉格朗日系数与权值的关系:

5.PNG

由权值与拉格朗日系数的关系可以得出,如果权值需要更新则拉格朗日的系数也需要更新。

实验数据:

3.542485 1.977398 -1

3.018896 2.556416 -1

7.551510 -1.580030 1

2.114999 -0.004466 -1

8.127113 1.274372 1

7.108772 -0.986906 1

8.610639 2.046708 1

2.326297 0.265213 -1

3.634009 1.730537 -1

0.341367 -0.894998 -1

3.125951 0.293251 -1

2.123252 -0.783563 -1

0.887835 -2.797792 -1

7.139979 -2.329896 1

1.696414 -1.212496 -1

8.117032 0.623493 1

8.497162 -0.266649 1

4.658191 3.507396 -1

8.197181 1.545132 1

1.208047 0.213100 -1

1.928486 -0.321870 -1

2.175808 -0.014527 -1

7.886608 0.461755 1

3.223038 -0.552392 -1

3.628502 2.190585 -1

7.407860 -0.121961 1

7.286357 0.251077 1

2.301095 -0.533988 -1

-0.232542 -0.547690 -1

3.457096 -0.082216 -1

3.023938 -0.057392 -1

8.015003 0.885325 1

8.991748 0.923154 1

7.916831 -1.781735 1

7.616862 -0.217958 1

2.450939 0.744967 -1

7.270337 -2.507834 1

1.749721 -0.961902 -1

1.803111 -0.176349 -1

8.804461 3.044301 1

1.231257 -0.568573 -1

2.074915 1.410550 -1

-0.743036 -1.736103 -1

3.536555 3.964960 -1

8.410143 0.025606 1

7.382988 -0.478764 1

6.960661 -0.245353 1

8.234460 0.701868 1

8.168618 -0.903835 1

1.534187 -0.622492 -1

9.229518 2.066088 1

7.886242 0.191813 1

2.893743 -1.643468 -1

1.870457 -1.040420 -1

5.286862 -2.358286 1

6.080573 0.418886 1

2.544314 1.714165 -1

6.016004 -3.753712 1

0.926310 -0.564359 -1

0.870296 -0.109952 -1

2.369345 1.375695 -1

1.363782 -0.254082 -1

7.279460 -0.189572 1

1.896005 0.515080 -1

8.102154 -0.603875 1

2.529893 0.662657 -1

1.963874 -0.365233 -1

8.132048 0.785914 1

8.245938 0.372366 1

6.543888 0.433164 1

-0.236713 -5.766721 -1

8.112593 0.295839 1

9.803425 1.495167 1

1.497407 -0.552916 -1

1.336267 -1.632889 -1

9.205805 -0.586480 1

1.966279 -1.840439 -1

8.398012 1.584918 1

7.239953 -1.764292 1

7.556201 0.241185 1

9.015509 0.345019 1

8.266085 -0.230977 1

8.545620 2.788799 1

9.295969 1.346332 1

2.404234 0.570278 -1

2.037772 0.021919 -1

1.727631 -0.453143 -1

1.979395 -0.050773 -1

8.092288 -1.372433 1

1.667645 0.239204 -1

9.854303 1.365116 1

7.921057 -1.327587 1

8.500757 1.492372 1

1.339746 -0.291183 -1

3.107511 0.758367 -1

2.609525 0.902979 -1

3.263585 1.367898 -1

2.912122 -0.202359 -1

1.731786 0.589096 -1

2.387003 1.573131 -1

实验结果如下:

1.PNG

可以看出优化版的与简化版的稍有不同,但是速度明显提高了许多,支持向量的点也多一些。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值