1.3 特征分类
特征分类:最后,对上一步得到的特征进行分类,通常使用如 SVM、AdaBoost的分类器。
1.3.1 支持向量机
1.3.2 python-opencv实现支持向量机
对线性可分数据分类
from re import S
from sklearn import datasets
from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np
from sklearn import model_selection as ms
import cv2
# 数据生成
X,y = datasets.make_classification(n_samples=100,n_features=2,
n_redundant=0,n_classes=2,random_state=7816)
# 数据可视化
plt.scatter(X[:,0],X[:,1],c=y,s=100)
plt.xlabel("x values")
plt.ylabel("y values")
plt.savefig("data.png")
plt.show()
# 数据集预处理
X = X.astype(np.float32)
y = y*2 -1 # 标签 0 1 转换为 -1 1
X_train, X_test, y_train, y_test = ms.train_test_split(X,y,test_size=0.2,random_state=42)
# 创建支持向量机
svm = cv2.ml.SVM_create()
svm.setKernel(cv2.ml.SVM_LINEAR) # 使用线性SVM
svm.train(X_train,cv2.ml.ROW_SAMPLE,y_train)
# 预测目标标签
_,y_pred = svm.predict(X_test)
# 分类器打分
acc = metrics.accuracy_score(y_test,y_pred)
print(acc)
# 决策边界可视化
def plot_decision_boundary(svm,X_test,y_test):
x_min, x_max = X_test[:,0].min() - 1, X_test[:,0].max() + 1
y_min, y_max = X_test[:,1].min() - 1, X_test[:,1].max() + 1
h = 0.02
xx, yy = np.meshgrid(np.arange(x_min,x_max,h),
np.arange(y_min,y_max,h))
X_hypo = np.c_[xx.ravel().astype(np.float32),
yy.ravel().astype(np.float32)]
_,zz = svm.predict(X_hypo)
zz = zz.reshape(xx.shape)
plt.contourf(xx,yy,zz,cmap=plt.cm.coolwarm,alpha=0.8)
plt.scatter(X_test[:,0],X_test[:,1],c=y_test,s=200)
plt.savefig("vis_decision_boundary.png")
plt.show()
plot_decision_boundary(svm,X_test,y_test)
数据可视化图像
决策边界可视化图像
采用不同高斯核可以实现非线性支持向量机
# 决策边界可视化
def plot_decision_boundary(svm,X_test,y_test):
x_min, x_max = X_test[:,0].min() - 1, X_test[:,0].max() + 1
y_min, y_max = X_test[:,1].min() - 1, X_test[:,1].max() + 1
h = 0.02
xx, yy = np.meshgrid(np.arange(x_min,x_max,h),
np.arange(y_min,y_max,h))
X_hypo = np.c_[xx.ravel().astype(np.float32),
yy.ravel().astype(np.float32)]
_,zz = svm.predict(X_hypo)
zz = zz.reshape(xx.shape)
plt.contourf(xx,yy,zz,cmap=plt.cm.coolwarm,alpha=0.8)
plt.scatter(X_test[:,0],X_test[:,1],c=y_test,s=200)
if __name__ == "__main__":
from re import S
from sklearn import datasets
from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np
from sklearn import model_selection as ms
import cv2
# 数据生成
X,y = datasets.make_classification(n_samples=100,n_features=2,
n_redundant=0,n_classes=2,random_state=7816)
# 数据可视化
plt.scatter(X[:,0],X[:,1],c=y,s=100)
plt.xlabel("x values")
plt.ylabel("y values")
plt.savefig("data.png")
plt.show()
# 数据集预处理
X = X.astype(np.float32)
y = y*2 -1 # 标签 0 1 转换为 -1 1
X_train, X_test, y_train, y_test = ms.train_test_split(X,y,test_size=0.2,random_state=42)
# 创建支持向量机
kernels = [cv2.ml.SVM_LINEAR,cv2.ml.SVM_INTER,cv2.ml.SVM_SIGMOID,cv2.ml.SVM_RBF]
for idx, kernel in enumerate(kernels):
svm = cv2.ml.SVM_create()
svm.setKernel(kernel) # 使用线性SVM
svm.train(X_train,cv2.ml.ROW_SAMPLE,y_train)
# 预测目标标签
_,y_pred = svm.predict(X_test)
# 分类器打分
acc = metrics.accuracy_score(y_test,y_pred)
print(acc)
plt.subplot(2,2,idx+1)
plot_decision_boundary(svm,X_test,y_test)
plt.title("accuracy=%.2f"%acc)
plt.savefig("non-linear.png")
plt.show()