第七章 你的第一个图像分类器
1.动物数据集
“动物”数据集是一个简单的示例数据集,图像三有个不同的类:狗、猫和熊猫,每个类有1000个示例图像。
2.工具包的项目结构
(文件目录结构,dogs/panda/cats文件夹各有1000幅图片)
3.simplepreprocessor.py
# -*- coding: utf-8 -*-
# import the necessary packages
import cv2
class SimplePreprocessor:
def __init__(self, width, height, inter=cv2.INTER_AREA):
# store the target image width, height, and interpolation
# method used when resizing
self.width = width
self.height = height
self.inter = inter
def preprocess(self, image):
# resize the image to a fixed size, ignoring the aspect
# ratio
return cv2.resize(image, (self.width, self.height),interpolation=self.inter)
4.simpledatasetloader.py
# -*- coding: utf-8 -*-
#import the necessary packages
import numpy as np
import cv2
import os
class SimpleDatasetLoader:
def __init__(self, preprocessors=None):
# store the image preprocessor
self.preprocessors = preprocessors
# if the preprocessors are None, initialize them as an
# empty list
if self.preprocessors is None:
self.preprocessors = []
def load(self, imagePaths, verbose=-1):
# initialize the list of features and labels
data = []
labels = []
# loop over the input images
for (i, imagePath) in enumerate(imagePaths):
# load the image and extract the class label assuming
# that our path has the following format:
# /path/to/dataset/{class}/{image}.jpg
image = cv2.imread(imagePath)
label = imagePath.split(os.path.sep)[-2]
# check to see if our preprocessors are not None
if self.preprocessors is not None:
# loop over the preprocessors and apply each to
# the image
for p in self.preprocessors:
image = p.preprocess(image)
# treat our processed image as a "feature vector"
# by updating the data list followed by the labels
data.append(image)
labels.append(label)
# show an update every ¡®verbose¡® images
if verbose > 0 and i > 0 and (i + 1) % verbose == 0:
print("[INFO] processed {}/{}".format(i + 1,
len(imagePaths)))
# return a tuple of the data and labels
return (np.array(data), np.array(labels))
5.knn.py
注意 原书中代码
from
pyimagesearch.preprocessing
import
SimplePreprocessor
from
pyimagesearch.datasets
import
SimpleDatasetLoader
需要进行一定修改,整体如下:
# -*- coding: utf-8 -*-
# import the necessary packages
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from pyimagesearch.preprocessing.simplepreprocessor import SimplePreprocessor
from pyimagesearch.datasets.simpledatasetloader import SimpleDatasetLoader
from imutils import paths
import argparse
#construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True,
help="path to input dataset")
ap.add_argument("-k", "--neighbors", type=int, default=1,
help="# of nearest neighbors for classification")
ap.add_argument("-j", "--jobs", type=int, default=-1,
help="# of jobs for k-NN distance (-1 uses all available cores)")
args = vars(ap.parse_args())
# grab the list of images that we¡¯ll be describing
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))
# initialize the image preprocessor, load the dataset from disk,
# and reshape the data matrix
sp = SimplePreprocessor(32, 32)
sdl = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sdl.load(imagePaths, verbose=500)
data = data.reshape((data.shape[0], 3072))
# show some information on memory consumption of the images
print("[INFO] features matrix: {:.1f}MB".format(data.nbytes / (1024 * 1000.0)))
# encode the labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels,test_size=0.25, random_state=42)
# train and evaluate a k-NN classifier on the raw pixel intensities
print("[INFO] evaluating k-NN classifier...")
model = KNeighborsClassifier(n_neighbors=args["neighbors"],n_jobs=args["jobs"])
model.fit(trainX, trainY)
print(classification_report(testY, model.predict(testX),target_names=le.classes_))
6.终端窗口运行
首先打开cmd终端窗口,cd命令转换到目标文件夹下,然后执行命令:
C:\Users\SYL>D:
D:\>cd D:\project\deep_learning\chapter07
D:\project\deep_learning\chapter07>python knn.py --dataset D:\project\deep_learning\chapter07\animals
最终结果即可出来 。