包含全部示例的代码仓库见GIthub
1 导入库
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing # 预处理方法
from sklearn.svm import SVC #SVC分类,SVR回归
2 绘图格式
plt.style.use('ggplot')
3 数据准备
data = pd.read_csv('./dataset/credit-a.csv', header=None)
data
# output
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
0 0 30.83 0.000 0 0 9 0 1.25 0 0 1 1 0 202 0.0 -1
1 1 58.67 4.460 0 0 8 1 3.04 0 0 6 1 0 43 560.0 -1
2 1 24.50 0.500 0 0 8 1 1.50 0 1 0 1 0 280 824.0 -1
3 0 27.83 1.540 0 0 9 0 3.75 0 0 5 0 0 100 3.0 -1
4 0 20.17 5.625 0 0 9 0 1.71 0 1 0 1 2 120 0.0 -1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
648 0 21.08 10.085 1 1 11 1 1.25 1 1 0 1 0 260 0.0 1
649 1 22.67 0.750 0 0 0 0 2.00 1 0 2 0 0 200 394.0 1
650 1 25.25 13.500 1 1 13 7 2.00 1 0 1 0 0 200 1.0 1
651 0 17.92 0.205 0 0 12 0 0.04 1 1 0 1 0 280 750.0 1
652 0 35.00 3.375 0 0 0 1 8.29 1 1 0 0 0 0 0.0 1
x = data[data.columns[:-1]]
y = data[15].replace(-1, 0)
x_train, x_test, y_train, y_test = train_test_split(x, y)
4 数据标准化
# 使用统一的train上的分布指标转化train和test
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
5 模型构建
model = SVC(kernel='poly', degree=3, C=5)
model.fit(x_train, y_train)
# output
SVC(C=5, kernel='poly')
model.score(x_test, y_test)
# output
0.8902439024390244
model2 = SVC(kernel='rbf', gamma=0.5, C=5)
model2.fit(x_train, y_train)
# output
SVC(C=5, gamma=0.5)
model2.score(x_test, y_test)
# output
0.7865853658536586