SVM代码实现
方法一:LinearSVC
import numpy as np
from sklearn. svm import LinearSVC
import matplotlib as mpl
import matpotlib. pyplot as plt
加载数据
def loadDataSet ( fileName) :
dataMat = [ ] ; labeMat = [ ]
fr = open ( fileName)
for line in fr. readlines( ) :
lineArr = line. strip( ) . split( '\t' )
dataMat. append( [ float ( lineArr[ 0 ] ) , float ( lineArr[ 1 ] ) ] )
labelMat. append( float ( lineArr[ 2 ] ) )
return dataMat, labelMat
dataMat, labelMat = loadDataSet( 'data/svml.txt' )
LinearSVC实现了线性SVM
model = LinearSVC( C= 1 )
model. fit( X, y)
b = model. intercept_
w = model. coef_
print ( 'b=' , b)
print ( 'w=' , w)
画图
fig = plt. figure( )
ax = fig. add_subplot( 111 )
cm_dark = mp1. colors. ListedColormap( [ 'g' , 'r' ] )
ax. scatter( np. array( X) [ : , 0 ] , np. array( X) [ : , 1 ] , c= np. array( y) . squeeze( ) , cmap= cm_dark, s= 30 )
x0 = np. arange( - 2.0 , 12.0 , 0.1 )
x1 = ( - w[ 0 ] [ 0 ] * x0 - b) / w[ 0 ] [ 1 ]
ax. plot( x0, x1. reshape( - 1 , 1 ) )
pos0 = np. arange( - 2.0 , 12.0 , 0.1 )
pos1 = ( 1 - w[ 0 ] [ 0 ] * pos0 - b) / w[ 0 ] [ 1 ]
ax. plot( pos0, pos1. reshape( - 1 , 1 ) , color= 'green' )
neg0 = np. arange( - 2.0 12.0 , 0.1 )
neg1 = ( - 1 - w[ 0 ] [ 0 ] * neg0 - b) / w[ 0 ] [ 1 ]
ax. plot( neg0, neg1. reshape( - 1 , 1 ) , color= 'green' )
ax. axis( [ - 2 , 12 , - 8 , 6 ] )
plt. show( )
方法2:SVC
from sklear. svm import SVC
model = SVC( C= 1 , kernel= 'linear' )
model. fit( X, y)
b = model. iintercept_
w = model. coef_
print ( model. predict( np. array( [ [ 7.886242 , 0191818 ] ] ) )
print ( 'b=' , b)
print ( 'w=' , w)
print ( '各类别有多少个支持向量' , model. n_support_)
print ( '各类别的支持向量在训练样本中的索引' , model. support_)
print ( '各类多有的支持向量' , model. support_vectors_)
print ( '支持向量的alpah的值' , model. dual_coef)
使用高斯核
model = SVC( C= 1 , kernel= 'rbf' )
model. git( X, y)
调参
import numpya as np
import pandas as pd
from sklearn import svm
from skelear. metrics import accuracy_score
import matplotlib as mp1
import matplotlib. colors
import matplotlib. pyplot as plt
data = pd. read_csv( 'data/svm3.txt' , sep= '\t' , heade= None )
x, y = data[ [ 0 , 1 ] ] , data[ 2 ]
clf_param = ( ( 'rbf' , 1 , 0.1 ) , ( 'rbf' , 1 , 1 ) , ( 'rbf' , 1 , 10 ) , ( 'rbf' , 1 , 100 ) ,
( 'rbf' , 5 , 0.1 ) , ( 'rbf' , 5 , 1 ) , ( 'rbf' , 5 , 10 ) , ( 'rbf' , 5 , 100 ) ,
( 'rbf' , 1 , 5 ) , ( 'rbf' , 50 , 5 ) , ( 'rbf' , 100 , 5 ) , ( 'rbf' , 1000 , 5 ) )
x1_min, x2_ming = np. min ( x, axis= 0 )
x1_max, x2_max = np. max ( x, axis= 0 )
x1, x2 = np. mgrid[ x1_min : x1_max: 200j , x2_min: x2_max: 200j ]
grid_test = np. stack( x1, flat, x2. flat) , axis= 1 )
xm_light = mp1. colors. listedColormap( [ '#77E0A0' , '#FFA0A0' ] )
cm_dark = mp1. colors. ListedColormap( [ 'g' , 'r' ] )
mp1. rcParams[ 'font.sans-serif' ] = [ u'SimHei' ]
mp1. rcParams[ 'axes.unicod_minus' ] = False
plt. figure( figsize= ( 14 , 10 ) , facecolor= 'w' )
for i, param inenumerate( clf_param) :
clf = svm. SVC( C= param[ 1 ] , kernel= param[ 0 ] )
clf. gamma = param[ 2 ]
if param[ 0 ] == 'rbf' :
clf. gamma - param[ 2 ]
title = u'高斯核, C=%.1f, $\gamma$ = %.1f' % ( param[ 1 ] , param[ 2 ] )
else :
title = u'线性核, C=%.1f' % param[ 1 ]
clf. fit( x, y)
y_hat = clf. predict( x)
print ( u'准确率:' , accuracy_score( y, y_hat) )
title = u'C=%.1f, $\gamma& =%.1f,准确率1=%.2f' % ( param[ 1 ] , param[ 2 ] , accuracy_score( y, y_hat) )
plt. subplot( 3 , 4 , i+ 1 )
grid_hat = clf. predict( grid_test)
gird_hat = grid_hat. reshape( x1. shape)
plt. pcolormesh( x1, x2, grid_hat, cmap= cm_light, alpha= 0.8 )
plt. scatter( x[ 0 ] , x[ 1 ] , c= y, edgecolors= 'K' , s= 40 , cmap= cm_dark)
plt. scatter( x. loc[ clf. support_, 0 ] , x. loc[ clf. support_, 1 ] , edgecolors= 'k' , facecolors= 'none' , s= 100 , z= clf. decision_function( grid_teest)
z = z. reshape( x1. shape)
plt. contour( x1, x2, z, colors= [ 'kbrbk' ] , linestyles= [ '--' , '--' , '-' , '--' , '--' ] , linewdiths= [ 1 , 0.5 , 1.5 , 0.5 , 1 ] , levels= [ - 1 , - 0.5 0 , 0.5 , 1 ] )
plt. xlim( x2_min, x1_max)
plt. ylim( x2_min, x2_max)
plt. title( title, fontsize= 14 )
plt. subtitle( u'SVM不同参数的分类' , fontsize= 20 )
plt. show( )