Keras教程:如何开始使用Keras、深度学习和Python_keras教程:如何开始使用keras,深度学习和python-CSDN博客

这里涉及到如何用本地的数据集训练小型神经网络。

第一：在本地系统安装Keras；
第二:从磁盘中加载数据；在这里插入图片描述
第三：划分训练集合测试集；
第四：定义Keras模型结构；
第五：编译Keras模型；这里指选择选择优化器和损失函数。

第六：拟合训练数据和模型；

第七：评估Keras模型，并用测试集进行预测；

教程参考：点击

# set the matplotlib backend so figures can be saved in the background
import matplotlib 
#python首选绘图包
matplotlib.use("Agg")

# import the necessary packages
from sklearn.preprocessing import LabelBinarizer
#scikit-learn库将帮助我们对标签进行二值化，划分训练/测试数据，并在终端生成训练报告。
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers.core import Dense
from keras.optimizers import SGD
from imutils import paths
#imutils是方便功能包。我们将使用paths模块生成用于训练的图像文件路径列表。
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import pickle
import cv2
import os
#%%
# construct the argument parser and parse the arguments
# =============================================================================
# ap = argparse.ArgumentParser()
# ap.add_argument("-d", "--dataset", required=True,
# 	help="path to input dataset of images")
# ap.add_argument("-m", "--model", required=True,
# 	help="path to output trained model")
# ap.add_argument("-l", "--label_bin", required=True,
# 	help="path to output label binarizer")
# ap.add_argument("-p", "--plot", required=True,
# 	help="path to output accuracy/loss plot")
# args = vars(ap.parse_args())
args={
      "dataset":r".\animals",
      "model":r".\output\simple_nn.model",
      "label_bin":r".\output\simple_nn_lb.pickle",
      "plot":r".\output\simple_nn_plot.png"
      
      
      }
# =============================================================================

#%%
# initialize the data and labels
print("[INFO] loading images...")
data = []
labels = []

# grab the image paths and randomly shuffle them
imagePaths = sorted(list(paths.list_images(args["dataset"])))   #list是把str转换为list
#这两语句在一起，每次运行都能产生相同的随机序列
#print(imagePaths)
random.seed(42)
random.shuffle(imagePaths)
#print(type(imagePaths))
#%%
# loop over the input images
for imagePath in imagePaths:
	# load the image, resize the image to be 32x32 pixels (ignoring aspect ratio),
	# flatten the image into 32x32x3=3072 pixel image into a list, and store the image in the data list
	# into a list, and store the image in the data list

    image = cv2.imread(imagePath)   #读取到内存中
    image = cv2.resize(image, (32, 32)).flatten()
    data.append(image)   #到目前为止，type(data)=list
	# extract the class label from the image path and update the labels list
#    print(imagePath)  #.\animals\panda\panda_00650.jpg
    label = imagePath.split(os.path.sep)[-2] #os.path.sep:路径分隔符，['.', 'animals', 'panda', 'panda_00650.jpg']
    labels.append(label)

#%%
# scale the raw pixel intensities to the range [0, 1]
#缩放原始像素强度到[0,1],一个常见的预处理步骤。
data = np.array(data, dtype="float") / 255.0  #此时，数据类型是Numpy
#将列表转换为Numpy数组
labels = np.array(labels)   #此时，数据类型是Numpy
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
	labels, test_size=0.25, random_state=42)

# convert the labels from integers to vectors (for 2-class, binary
# classification you should use Keras' to_categorical function
# instead as the scikit-learn's LabelBinarizer will not return a
# vector)
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)  #找到所有唯一的类标签，将他们转换为one-hot编码标签
testY = lb.transform(testY)  #调用translate只执行一个one-hot编码标签
#%%
#定义模型体系结构
# define the 3072-1024-512-3 architecture using Keras
model = Sequential()
model.add(Dense(1024, input_shape=(3072,), activation="sigmoid"))  #32*32*3=3072
model.add(Dense(512, activation="sigmoid"))
model.add(Dense(len(lb.classes_), activation="softmax"))
#%%
#编译Keras模型
# initialize our initial learning rate and # of epochs to train for
INIT_LR = 0.01
EPOCHS = 75

# compile the model using SGD as our optimizer and categorical
# cross-entropy loss (you'll want to use binary_crossentropy
# for 2-class classification)
print("[INFO] training network...")
opt = SGD(lr=INIT_LR)
model.compile(loss="categorical_crossentropy", optimizer=opt,
	metrics=["accuracy"])
#%%
#拟合模型和数据
#使用训练数据和编译模式（compile mode,指优化器和损失函数）来训练深度学习模型
# train the neural network
H = model.fit(trainX, trainY, validation_data=(testX, testY),
	epochs=EPOCHS, batch_size=32)
#%%
#评估Keras模型，拟合模型后，可以使用测试数据进行预测，生成分类报告。
# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=32)
print(classification_report(testY.argmax(axis=1),
	predictions.argmax(axis=1), target_names=lb.classes_))

# plot the training loss and accuracy
N = np.arange(0, EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.plot(N, H.history["acc"], label="train_acc")
plt.plot(N, H.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy (Simple NN)")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()
plt.savefig(args["plot"])
#%%
#最后，我们可以把模型保存到磁盘上，这样我们就可以重用它，而不需要重新训练它:
# save the model and label binarizer to disk
print("[INFO] serializing network and label binarizer...")
model.save(args["model"])
f = open(args["label_bin"], "wb")
f.write(pickle.dumps(lb))
f.close()