#os为了下面获取图片路径 import os, glob import matplotlib.pyplot as plt import numpy as np from PIL import Image from sklearn.decomposition import PCA import tensorflow from tensorflow import keras #total_num = 5970 # 训练集总计和 #train_num = 5970 # 训练集实际训练数字,20的验证集 dir_data = '/Users/mac/Desktop/Dataset2' # 训练集路径 dir_mask = os.path.join(dir_data, 'mask') # 戴口罩文件夹路径 dir_nomask = os.path.join(dir_data, 'nomask') # 没带口罩文件夹路径 #assert为断言函数。判断文件是否存在 assert os.path.exists(dir_mask), 'Could not find ' + dir_mask assert os.path.exists(dir_nomask), 'Could not find ' + dir_nomask # 定义了文件指针对整个文件夹遍历一遍,将图像读出来 #os.path.abspath(fp) 获取图片的绝对路径 #glob.glob获取一个可以遍历对象(随机遍历) fpath_mask=[os.path.abspath(fp) for fp in glob.glob(os.path.join(dir_mask, '*.png'))] fpath_nomask = [os.path.abspath(fp) for fp in glob.glob(os.path.join(dir_nomask, '*.png'))] # 文件数 num_mask = len(fpath_mask) num_nomask = len(fpath_nomask) # 设置标签。将num_mask变为0,将num_nomask变为1 label_mask = [0] * num_mask label_nomask = [1] * num_nomask print('#mask:', num_mask) print('#nomask:', num_nomask) # 划分多少为验证集 RATIO_TEST = 0.2 num_mask_test = int(num_mask * RATIO_TEST) num_nomask_test = int(num_nomask * RATIO_TEST) # 包括第十个的所有戴口罩和不带口罩的路径 fpath_train = fpath_mask[num_mask_test:] + fpath_nomask[num_nomask_test:] #包括第十个的所有戴口罩和不带口罩的标签 label_train = label_mask[num_mask_test:] + label_nomask[num_nomask_test:] # validation fpath_vali = fpath_mask[:num_mask_test] + fpath_nomask[:num_nomask_test] label_vali = label_mask[:num_mask_test] + label_nomask[:num_nomask_test] ways=16 for i in range(ways): pil_img1 = Image.open(fpath_train[i]) out1 = pil_img1.resize((16,16)) num_img2 = np.asarray(out1)#将图片转为80*80*3的数组 #将num_img1由3维降为2维数组 num_img2= (num_img2.reshape(num_img2.shape[0], num_img2.shape[1] * num_img2.shape[2])) #使用0-1标准化 from sklearn import preprocessing min_max_scaler = preprocessing.MinMaxScaler() num_img4 = min_max_scaler.fit_transform(num_img2) #n_components为降维后需要的维度 n_components = 4 #svd_solver='auto':使用奇异值分解器。auto:SVD模式中的随机模式 #whiten=True:白化会去除变换信号中的一些信息(分量的相对方差尺度),但在数据有比较强的相关性的假设下,有时可以提高下游估计器的性能。 pca1 = PCA(n_components=n_components, svd_solver='auto', whiten=True).fit(num_img4) #all_data_pca1维度的:80*16 all_data_pca1 = pca1.transform(num_img4) #3进行l1正则化 all_data_pca3 = preprocessing.normalize(all_data_pca1, norm='l1') ways=4 for i in range(ways): pil_img2 = Image.open(fpath_vali[i]) out2 = pil_img2.resize((4,4)) num_img3 = np.asarray(out2) num_img3= (num_img3.reshape(num_img3.shape[0], num_img3.shape[1] * num_img3.shape[2])) from sklearn import preprocessing min_max_scaler = preprocessing.MinMaxScaler() num_img5 = min_max_scaler.fit_transform(num_img3) n_components = 4 pca2 = PCA(n_components=n_components, svd_solver='auto', whiten=True).fit(num_img5) #all_data_pca2维度为20*16 all_data_pca2 = pca2.transform(num_img5) all_data_pca4 = preprocessing.normalize(all_data_pca2, norm='l1') # 其中0.01为上面2.1小节提到的常数系数C,自己设置 from sklearn import svm clf = svm.SVC() clf.fit(all_data_pca3,label_train) print("SVM",clf.score(all_data_pca4,label_vali))
支持向量机做图片分类
最新推荐文章于 2024-05-08 12:37:58 发布