1.题干
数字识别:基于手写邮政编码点阵数据(邮政编码数据.txt),利用支持向量机实现数字的识别分类。
2.数据格式
第一列为数字(0~9),第2列到第257列为数字对应像素编码(-1~1)(16*16)
3.代码
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
def printf(n, strf):
print()
print('-' * n)
print(f"\033[1m{strf}\033[0m")
print()
data = pd.read_csv('邮政编码数据.txt', header=None, delimiter=" ")
printf(100, '查看数据大致情况')
print(data.head())
X = data.iloc[:, 1:257]
y = data.iloc[:,0]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
svm_model = SVC(kernel='rbf')
svm_model.fit(X_train, y_train)
y_train_pred = svm_model.predict(X_train)
y_test_pred = svm_model.predict(X_test)
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"支持向量机 - 训练准确率: {train_accuracy}")
print(f"支持向量机 - 测试准确率: {test_accuracy}")