from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import cv2
import os
from pandas import DataFrame
###
# 聚类精度模板、三大指标模板
import numpy as np
from scipy.optimize import linear_sum_assignment
def cluster_acc(y_true, y_pred):
y_true = np.array(y_true).astype(np.int64)
assert y_pred.size == y_true.size
D = max(y_pred.max(), y_true.max()) + 1
w = np.zeros((D, D), dtype=np.int64)
for i in range(y_pred.size):
w[y_pred[i], y_true[i]] += 1
ind = linear_sum_assignment(w.max() - w)
ind = np.asarray(ind)
ind = np.transpose(ind)
return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size
#
## 朴素贝叶斯与KNN分类
def knn_gnb_lr_lsr(X, labels, title_knn="XXX.KNN",\
title_gnb="XXX.GNB",title_lr="XXX.LR",\
title_lsr="XXX=LSR",n = 3):
# 划分训练集和测试集
x_train, x_test, labels_train, labels_test =\
train_test_split(X, labels, test_size=0.2, random_state=22)
# 使用KNN进行分类
knn = KNeighborsClassifier()
knn.fit(x_train, labels_train)
label_sample = knn.predict(x_test)
knn_acc=cluster_acc(labels_test, label_sample)
print(title_knn,"=",knn_acc)
# 使用高斯朴素贝叶斯进行分类
gnb = GaussianNB() # 使用默认配置初始化朴素贝叶斯
gnb.fit(x_train, labels_train) # 训练模型
label_sample = gnb.predict(x_test)
gnb_acc = cluster_acc(labels_test, label_sample)
print(title_gnb,"=", gnb_acc)
# 线性回归
lr = LinearRegression()
lr.fit(x_train, labels_train)
label_sample = lr.predict(x_test)
label_sample = np.round(label_sample)
label_sample=label_sample.astype(np.int64)
lr_acc = cluster_acc(labels_test, label_sample)
print(title_lr, "=", lr_acc)
#Logistic regression 需要事先进行标准化
#创建一对多的逻辑回归对象
# 标准化特征
scaler = StandardScaler()
X_ = scaler.fit_transform(X,labels)
# 划分训练集和测试集
x_train, x_test, labels_train, labels_test = \
train_test_split(X_, labels, test_size=0.2)
log_reg = LogisticRegression()#multinomial
#训练模型
log_reg.fit(x_train, labels_train)
label_sample = log_reg.predict(x_test)
lsr_acc = cluster_acc(labels_test, label_sample)
print(title_lsr, "=", lsr_acc)
return round(knn_acc,n),round(gnb_acc,n),round(lr_acc,n),round(lsr_acc,n)
def createDatabase(path,re_size=(36,136)):
file = os.listdir(path)
file_number = len(file)
print("**********************")
print("file_number",file_number)
img = []
labels = []
for k in range(0, file_number):
subfile = os.listdir(path + '/' +file[k])
subfile_number = len(subfile)
for i in range(0, subfile_number):
image = cv2.imread(path + '/' + file[k] + '/' + subfile[i], cv2.IMREAD_COLOR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, re_size)#re_size=(50,50)元组
image = image.reshape(1,image.size)
img.append(image)
labels.append(k)#可更改,以符合数据集
img = np.array(img)
labels = np.array(labels)
return img,labels
def get_img(path,re_size=(70,70)):
from PIL import Image
file = os.listdir(path)
img = []
labels = []
for i in range(0, len(file)):
file_dir = os.path.join(path, file[i])
image = Image.open(file_dir)
image = image.resize((re_size), Image.ANTIALIAS)
# 使用Image模块进行resize好处是图片路径可以包含中文||Image.ANTIALIAS 即是抗锯齿
img.append(np.array(image))
labels.append(i//80) #获取标签,可以根据实际情况更改
img = np.array(img)
return img ,labels
digit = []
#face
path = 'E:\\pythonwork\\LPDdetection\\Chinese-license-plate-recognition-system\\Train\\plate4'
X1, Y1 = createDatabase(path,(180,200))
X1 = X1.reshape(200, 180*200*3)
figure = knn1, gnb1,lr1,lsr1 = knn_gnb_lr_lsr(X1, Y1)
digit.append(figure)
KNN训练代码
最新推荐文章于 2024-04-02 16:29:00 发布