文章目录
使用Keras创建CNN用于对象识别
1. 数据集keras.datasets中的 cifar10
from keras.datasets import cifar10
下载数据集链接,迅雷比较快。
下载的文件名为cifar-10-python.tar.gz
,将其重命名为:cifar-10-batches-py.tar.gz
放到目录C:\Users\.keras\datasets
下。
训练集:50000张图片,33232;标签[0,…9]
验证集:10000张图片。
from keras.datasets import cifar10
from keras.layers.convolutional import MaxPooling2D
from keras.layers import Flatten
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.layers.convolutional import Conv2D
import numpy as np
from keras import backend as K
K.set_image_dim_ordering('th')
# Loading the data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
'''
#Lets see some examples from this data
fig = plt.figure()
gs = gridspec.GridSpec(4, 4, wspace=0.0)
ax = [plt.subplot(gs[i]) for i in range(4*4)]
for i in range(16):
a = x_train[i]
a = a[:,:,:,np.newaxis]
r, g,b = np.split(a,indices_or_sections=3,axis=0)
a = np.concatenate([r,g,b],axis=3)
a =a.squeeze()
ax[i].imshow(Image.fromarray(a))
plt.show()
'''
# 现在将类转换为一个热编码矩阵 0--0,1--1,...9--9
y_train_onehot = np_utils.to_categorical(y_train)
y_test_onehot = np_utils.to_categorical(y_test)
# 数据归一化
x_train = x_train/255
x_test = x_test/255
1.创建模型
model = Sequential()
卷积层:model.add(Conv2D(filters=, kernel_size=, padding='same', input_shape=, activation='relu')))
池化层model.add(MaxPooling2D(pool_size=(2, 2)))
序列化model.add(Flatten())
全连接层:model.add(Dense(units=512, activation='relu'))
# 选择序列模型来获得一组层。
num_classes = 10
model = Sequential()
# 第一卷积层
model.add(Conv2D(filters=32,
kernel_size=(3, 3),
padding='same',
input_shape=(3, 32, 32),
activation='relu'))
# 第er卷积层
model.add(Conv2D(filters=32, kernel_size=(3, 3),
padding='same', activation='relu', ))
# 池化层
model.add(MaxPooling2D(pool_size=(2, 2)))
# Flatten()使输出序列化
model.add(Flatten())
model.add(Dense(units=512, activation='relu'))
# Output class
model.add(Dense(num_classes, activation='softmax'))
# 编译模型
epochs = 50
lrate = 0.05
sgd = SGD(lr=lrate,
momentum=0.8, # 大或等于0的浮点数,动量参数
decay=lrate/epochs, # 每次更新后的学习率衰减值
nesterov=False) # 是否使用Nesterov动量
model.compile(loss='categorical_crossentropy', # 多分类的对数损失,与softmax分类器相对应。需要将标签转化为形如(nb_samples, nb_classes)的二值序列
optimizer=sgd,
metrics=['accuracy'])
# Print summary of CNN
print(model.summary())
Layer (type) Output Shape Param # conv2d_1 (Conv2D) (None, 32, 32, 32) 896 _________________________________________________________________ conv2d_2 (Conv2D) (None, 32, 32, 32) 9248 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 32, 16, 16) 0 _________________________________________________________________ flatten_1 (Flatten) (None, 8192) 0 _________________________________________________________________ dense_1 (Dense) (None, 512) 4194816 _________________________________________________________________ dense_2 (Dense) (None, 10) 5130 Total params: 4,210,090 Trainable params: 4,210,090 Non-trainable params: 0 _________________________________________________________________ None
2.训练、评估
model.fit()
model.evaluate()
# Fitting the model
model.fit(x_train, y_train_onehot, validation_data=(x_test, y_test_onehot), epochs=250, batch_size=100)
# Final evaluation of the model
loss, accuracy = model.evaluate(x_test, y_test_onehot, verbose=0)
print("Model Accuracy = {:.4f}".format(accuracy))
3.结果
Epoch 1/50 - loss:1.8169 - acc: 0.3460 - val_loss: 1.5685 - val_acc: 0.4329
Epoch 2/50 -loss: 1.3877 - acc: 0.5058 - val_loss: 1.3792 - val_acc: 0.5129
Epoch 3/50 -loss: 1.1807 - acc: 0.5797 - val_loss: 1.2014 - val_acc: 0.5807
Epoch 4/50 - loss: 0.9950 - acc: 0.6482 - val_loss: 1.0757 - val_acc: 0.6150
Epoch 5/50 - loss: 0.8353 - acc: 0.7069 - val_loss: 1.0268 - val_acc: 0.6452
Epoch 6/50 - loss: 0.6723 - acc: 0.7645 - val_loss: 1.0511 - val_acc: 0.6480
Epoch 7/50 - loss: 0.5094 - acc: 0.8232 - val_loss: 1.2619 - val_acc: 0.6250
Epoch 8/50 - loss: 0.3615 - acc: 0.8765 - val_loss: 1.2374 - val_acc: 0.6513
Epoch 9/50 - loss: 0.2408 - acc: 0.9187 - val_loss: 1.3188 - val_acc: 0.6532
Epoch 10/50 - loss: 0.1522 - acc: 0.9494 - val_loss: 1.6040 - val_acc: 0.6385
Epoch 11/50 - loss: 0.1107 - acc: 0.9644 - val_loss: 1.6339 - val_acc: 0.6591
Epoch 12/50- loss: 0.0600 - acc: 0.9816 - val_loss: 1.8889 - val_acc: 0.6526
Epoch 13/50
2.数据集MNIST
from keras.layers.convolutional import MaxPooling2D
from keras.layers import Flatten
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.layers.convolutional import Conv2D
from PIL import Image
import matplotlib.gridspec as gridspec
from keras import backend as K
K.set_image_dim_ordering('th')
import numpy as np
import struct
import matplotlib.pyplot as plt
1.加载数据
# 训练集文件
train_images_idx3_ubyte_file = './mnist/raw/train-images-idx3-ubyte'
# 训练集标签文件
train_labels_idx1_ubyte_file = './mnist/raw/train-labels-idx1-ubyte'
# 测试集文件
test_images_idx3_ubyte_file = './mnist/raw/t10k-images-idx3-ubyte'
# 测试集标签文件
test_labels_idx1_ubyte_file = './mnist/raw/t10k-labels-idx1-ubyte'
解析idx3文件的通用函数
def decode_idx3_ubyte(idx3_ubyte_file):
"""
解析idx3文件的通用函数
"""
# 读取二进制数据
bin_data = open(idx3_ubyte_file, 'rb').read()
# 解析文件头信息,依次为魔数、图片数量、每张图片高、每张图片宽
offset = 0
fmt_header = '>iiii' #因为数据结构中前4行的数据类型都是32位整型,所以采用i格式,但我们需要读取前4行数据,所以需要4个i。我们后面会看到标签集中,只使用2个ii。
magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
print('魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols))
# 解析数据集
image_size = num_rows * num_cols
offset += struct.calcsize(fmt_header) #获得数据在缓存中的指针位置,从前面介绍的数据结构可以看出,读取了前4行之后,指针位置(即偏移位置offset)指向0016。
# print(offset)
fmt_image = '>' + str(image_size) + 'B' #图像数据像素值的类型为unsigned char型,对应的format格式为B。这里还有加上图像大小784,是为了读取784个B格式数据,如果没有则只会读取一个值(即一副图像中的一个像素值)
# print(fmt_image,offset,struct.calcsize(fmt_image))
images = np.empty((num_images, num_rows, num_cols))
#plt.figure()
for i in range(num_images):
# if (i + 1) % 10000 == 0:
# # print('已解析 %d' % (i + 1) + '张')
# print(offset)
images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
#print(images[i])
offset += struct.calcsize(fmt_image)
return images
解析idx1文件的通用函数
def decode_idx1_ubyte(idx1_ubyte_file):
"""
解析idx1文件的通用函数
"""
# 读取二进制数据
bin_data = open(idx1_ubyte_file, 'rb').read()
# 解析文件头信息,依次为魔数和标签数
offset = 0
fmt_header = '>ii'
magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
print('魔数:%d, 图片数量: %d张' % (magic_number, num_images))
# 解析数据集
offset += struct.calcsize(fmt_header)
fmt_image = '>B'
labels = np.empty(num_images)
for i in range(num_images):
# if (i + 1) % 10000 == 0:
# print ('已解析 %d' % (i + 1) + '张')
labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
offset += struct.calcsize(fmt_image)
return labels
获取数据
train_images = decode_idx3_ubyte(train_images_idx3_ubyte_file) # (60000, 28, 28)
train_labels = decode_idx1_ubyte(train_labels_idx1_ubyte_file)
test_images = decode_idx3_ubyte(test_images_idx3_ubyte_file)
test_labels = decode_idx1_ubyte(test_labels_idx1_ubyte_file)
2.绘制前16张图像
fig = plt.figure()
gs = gridspec.GridSpec(4, 4, wspace=0.0)
ax = [plt.subplot(gs[i]) for i in range(4*4)]
for i in range(16):
a = train_images[i]
ax[i].imshow(Image.fromarray(a))
plt.show()
3数据处理
将类标签转换为热编码矩阵
# 现在将类转换为一个热编码矩阵 0--0,1--1,...9--9
y_train_onehot = np_utils.to_categorical(train_labels)
y_test_onehot = np_utils.to_categorical(test_labels)
x扩充维度,shape(60000,28,28)—>(60000,1,28,28)
x_train = train_images[:,np.newaxis]
x_test = test_images[:,np.newaxis]
4.创建模型
Conv2D需要输入四维数据,将input_shape=(1, 28, 28)
# 选择序列模型来获得一组层。
num_classes = 10
model = Sequential()
# 第一卷积层
model.add(Conv2D(filters=32,
kernel_size=(3, 3),
padding='same',
input_shape=(1, 28, 28),
activation='relu'))
# 第er卷积层
model.add(Conv2D(filters=32, kernel_size=(3, 3),
padding='same', activation='relu', ))
# 池化层
model.add(MaxPooling2D(pool_size=(2, 2)))
# Flatten()使输出序列化
model.add(Flatten())
model.add(Dense(units=512, activation='relu'))
# Output class
model.add(Dense(num_classes, activation='softmax'))
# 编译模型
epochs = 50
lrate = 0.05
sgd = SGD(lr=lrate,
momentum=0.8, # 大或等于0的浮点数,动量参数
decay=lrate/epochs, # 每次更新后的学习率衰减值
nesterov=False) # 是否使用Nesterov动量
model.compile(loss='mean_squared_error', # 用categorical_crossentropy的话正确率就会一直在0.01
optimizer=sgd,
metrics=['accuracy'])
# Print summary of CNN
print(model.summary())
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d_1 (Conv2D) (None, 32, 28, 28) 320 _________________________________________________________________ conv2d_2 (Conv2D) (None, 32, 28, 28) 9248 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 32, 14, 14) 0 _________________________________________________________________ flatten_1 (Flatten) (None, 6272) 0 _________________________________________________________________ dense_1 (Dense) (None, 512) 3211776 _________________________________________________________________ dense_2 (Dense) (None, 10) 5130 ================================================================= Total params: 3,226,474 Trainable params: 3,226,474 Non-trainable params: 0 _________________________________________________________________ None
5.训练
# Fitting the model
model.fit(x_train,
y_train_onehot,
validation_data=(x_test, y_test_onehot),
epochs=250,
batch_size=100,
verbose=2) # 每轮epoch显示日志
# Final evaluation of the model
loss, accuracy = model.evaluate(x_test, y_test_onehot, verbose=0)
print("Model Accuracy = {:.4f}".format(accuracy))