下面的代码是根据李航博士《统计学习方法》一书写的SVM的实现。还有些问题,贴出来大家给些建议。
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 19 16:05:09 2017
@author: ocean
"""
import numpy as np
def kernel(x1,x2):
np_x1 = np.array(x1)
np_x2 = np.array(x2)
return np.dot(np_x1,np_x2)
class svm(object):
def __init__(self,train_data,label, C=0.1, b = 0):
#each row is a sample
self.x = train_data
self.y = label
self.N = x.shape[0]
self.alpha = np.zeros([self.N])#N by 1
self.C = C
self.b = b
self.support_vector_idx=set()# the index of support vector, i.e., 0<alpha[support_vector_idx[i]]<C
#
def predict(self, xi):
'''
output the prediction of xi
'''
s = 0
for j in xrange(self.N):
s = s + self.alpha[j]*self.y[j]*kernel(self.x[j,:],xi)
p = s + self.b
return p
#
def choose_alpha1(self):
'''
see p 128-129
'''
epsilon = 1e-5
for idx in self.support_vector_idx:
if abs (self.y[idx]*self.predict(self.x[idx,:]) - 1) < epsilon:
continue
return idx
for idx in xrange(self.N):
if abs(self.alpha[idx]-0)<epsilon and self.y[idx]*self.predict(self.x[idx,:])>=1-epsilon:
continue
if abs(self.alpha[idx]-self.C)<epsilon and self.y[idx]*self.predict(self.x[idx,:]) <=1+epsilon:
continue
return idx
#
def choose_alpha2(self, alpha1_idx):
E1 = self.predict(self.x[alpha1_idx,:]) - self.y[alpha1_idx]
alpha2_idx = 0
max_e = -9999999999
for i in xrange(self.N):
if i == alpha1_idx:
continue
E2 = self.predict(self.x[i,:]) - self.y[i]
if abs(E1-E2) > max_e:# |E1-E2|
max_e = abs(E1-E2)
alpha2_idx = i
return alpha2_idx
#
def get_L_H(self, alpha1_idx, alpha2_idx):
y1 = self.y[alpha1_idx]
y2 = self.y[alpha2_idx]
alpha1 = self.alpha[alpha1_idx]
alpha2 = self.alpha[alpha2_idx]
#
if y1 != y2:
L = max(0, alpha2-alpha1)
H = min(self.C, self.C+alpha2-alpha1)
else:
L = max(0, alpha2+alpha1-self.C)
H = min(C, alpha2+alpha1)
return L,H
#
def update_alpha_b(self, alpha1_idx,alpha2_idx):
y1 = self.y[alpha1_idx]
y2 = self.y[alpha2_idx]
E1 = self.predict(self.x[alpha1_idx,:]) - self.y[alpha1_idx]
E2 = self.predict(self.x[alpha2_idx,:]) - self.y[alpha2_idx]
eta = kernel(self.x[alpha1_idx,:], self.x[alpha1_idx,:]) \
+kernel(self.x[alpha2_idx,:], self.x[alpha2_idx,:]) \
-2*kernel(self.x[alpha1_idx,:], self.x[alpha2_idx,:])
alpha1_old = self.alpha[alpha1_idx]
alpha2_old = self.alpha[alpha2_idx]
#
L,H = self.get_L_H(alpha1_idx,alpha2_idx)
#
alpha2_new_unc = alpha2_old + (y2*(E1-E2))/eta
#
if alpha2_new_unc > H:
alpha2_new = H
elif alpha2_new_unc < L:
alpha2_new = L
else:
alpha2_new = alpha2_new_unc
#
alpha1_new = alpha1_old + y1*y2*(alpha2_old-alpha2_new)
#
self.update_b(alpha1_idx,alpha2_idx,alpha1_new,alpha2_new)
self.alpha[alpha1_idx] = alpha1_new
self.alpha[alpha2_idx] = alpha2_new
self.update_support_vector_idx(alpha1_idx,alpha2_idx)
#
#see p 129-130
def update_b(self,alpha1_idx,alpha2_idx,alpha1_new,alpha2_new):
y1 = self.y[alpha1_idx]
y2 = self.y[alpha2_idx]
x1 = self.x[alpha1_idx,:]
x2 = self.x[alpha2_idx,:]
alpha1_old = self.alpha[alpha1_idx]
alpha2_old = self.alpha[alpha2_idx]
K11 = kernel(x1,x1)
K21 = kernel(x2,x1)
K12 = kernel(x1,x2)
K22 = kernel(x2,x2)
E1 = self.predict(x1) - self.y[alpha1_idx]
E2 = self.predict(x2) - self.y[alpha2_idx]
#
b1_new = -E1 - y1*K11*(alpha1_new-alpha1_old) - y2*K21*(alpha2_new-alpha2_old) + self.b
b2_new = -E2 - y1*K12*(alpha1_new-alpha1_old) - y2*K22*(alpha2_new-alpha2_old) + self.b
epsilon = 1e-5
if (alpha1_new>0 and alpha1_new < self.C) and (alpha2_new > 0 and alpha2_new < self.C):
assert(abs(b1_new - b2_new)<epsilon)
self.b = b1_new
else:
self.b = (b1_new + b2_new)/2
#
def update_support_vector_idx(self, alpha1_idx,alpha2_idx):
epsilon = 1e-5
alpha1_new = self.alpha[alpha1_idx]
alpha2_new = self.alpha[alpha2_idx]
#
if alpha1_new > 0 and alpha1_new < self.C:
self.support_vector_idx.add(alpha1_idx)
else:
try:
self.support_vector_idx.remove(alpha1_idx)
except:
pass
#
if alpha2_new > 0 and alpha2_new < self.C:
self.support_vector_idx.add(alpha2_idx)
else:
try:
self.support_vector_idx.remove(alpha2_idx)
except:
pass
#
def is_stop(self):
epsilon = 1e-5
#condition 1,2,3
s = 0
for i in xrange(self.N):
alphai = self.alpha[i]
yi = self.y[i]
xi = self.x[i,:]
g_xi = self.predict(xi)
#
s += alphai*yi
#
if alphai < -epsilon or alphai > self.C + epsilon:
return False
#
if (abs(alphai-0)<epsilon and yi*g_xi >=1) \
or (alphai > 0-epsilon and alphai < self.C+epsilon and abs(yi*g_xi-1)<epsilon) \
or (abs(alphai - self.C)<epsilon and yi*g_xi <= 1):
continue
else:
return False
if abs(s - 0)<=epsilon:
return True
else:
return False
#
def train(self):
while True:
alpha1_idx = self.choose_alpha1()
alpha2_idx = self.choose_alpha2(alpha1_idx)
#
self.update_alpha_b(alpha1_idx,alpha2_idx)
#
if self.is_stop():
break
print self.alpha
print self.support_vector_idx
for i in xrange(self.N):
print self.predict(self.x[i,:])
#load data
x = np.loadtxt('train_data')
y = np.loadtxt('train_label')
n = x.shape[0]
#initialize alpha,C
#alpha_0 = np.zeros([n,1])
C = 0.1
b = 0
my_svm = svm(x,y,C,b)
my_svm.train()
print my_svm.alpha
print my_svm.support_vector_idx
#################下面是产生随机样本的代码###########
#代码是别人的,连接丢失了怎么也找不到,大家如果知道,请联系我,我尽快引用上。
# encoding=utf8
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
N = 10 #生成训练数据的个数
# AX=0 相当于matlab中 null(a','r')
def null(a, rtol=1e-5):
u, s, v = np.linalg.svd(a)
rank = (s > rtol*s[0]).sum()
return rank, v[rank:].T.copy()
# 符号函数,之后要进行向量化
def sign(x):
if x > 0:
return 1
elif x == 0:
return 0
elif x < 0:
return -1
#noisy=False,那么就会生成N的dim维的线性可分数据X,标签为y
#noisy=True, 那么生成的数据是线性不可分的,标签为y
def mk_data(N, noisy=False):
rang = [-10,10]
dim = 2
X=np.random.rand(dim,N)*(rang[1]-rang[0])+rang[0]
while True:
Xsample = np.concatenate((np.ones((1,dim)), np.random.rand(dim,dim)*(rang[1]-rang[0])+rang[0]))
k,w=null(Xsample.T)
y = sign(np.dot(w.T,np.concatenate((np.ones((1,N)), X))))
if np.all(y):
break
if noisy == True:
idx = random.sample(range(1,N), N/10)
for id in idx:
y[0][id] = -y[0][id]
return (X,y,w)
def data_visualization(X,y,title):
class_1 = [[],[]]
class_2 = [[],[]]
size = len(y)
for i in xrange(size):
X_1 = X[0][i]
X_2 = X[1][i]
if y[i] == 1:
class_1[0].append(X_1)
class_1[1].append(X_2)
else:
class_2[0].append(X_1)
class_2[1].append(X_2)
plt.figure(figsize=(8, 6), dpi=80)
plt.title(title)
axes = plt.subplot(111)
type1 = axes.scatter(class_1[0], class_1[1], s=20, c='red')
type2 = axes.scatter(class_2[0], class_2[1], s=20, c='green')
plt.show()
def rebuild_features(features):
size = len(features[0])
new_features = []
for i in xrange(size):
new_features.append([features[0][i],features[1][i]])
return new_features
def generate_dataset(size, noisy = False, visualization = True):
global sign
sign = np.vectorize(sign)
X,y,w = mk_data(size,False)
y = list(y[0])
if visualization:
data_visualization(X,y,'all data') #数据可视化
testset_size = int(len(y)*0.333)
indexes = [i for i in xrange(len(y))]
test_indexes = random.sample(indexes,testset_size)
train_indexes = list(set(indexes)-set(test_indexes))
trainset_features = [[],[]]
trainset_labels = []
testset_features = [[],[]]
testset_labels = []
for i in test_indexes:
testset_features[0].append(X[0][i])
testset_features[1].append(X[1][i])
testset_labels.append(y[i])
if visualization:
data_visualization(testset_features,testset_labels,'test set')
for i in train_indexes:
trainset_features[0].append(X[0][i])
trainset_features[1].append(X[1][i])
trainset_labels.append(y[i])
if visualization:
data_visualization(trainset_features,trainset_labels,'train set')
return rebuild_features(trainset_features),trainset_labels,rebuild_features(testset_features),testset_labels
if __name__ == '__main__':
size = 1000
generate_dataset(size)
# generate_dataset
# print sign
# sign = np.vectorize(sign)
# X,y,w = mk_data(size,False)
#
# data_visualization(X,y)
# encoding=utf8
上面的svm代码存在一些小问题,如果大家看出来请留言....