- 卷积神经网络是一种自动化特征提取的机器学习模型,主要用于解决图像识别问题。从直观上讲,是一个从细节到抽象的过程。这里的关键的如何抽象,抽象就是把图像中的各种零散的特征通过某种方式汇总起来,形成新的特征。深度学习网络最上层的特征是最抽象的。
- 卷积神经网络还依靠于反向传播的反馈机制,来自动调节过滤器来更加准确提取特征。
- 一般一个卷积层包括3个部分:卷积、非线性变化(激活函数)、池化(Pooling),有的还包括dropout。流行的CNN网络结构,比如LeNet、VGG16等,都是通过构造多层卷积层,使得原来“矮胖”型的图像立体,变成“瘦长”型立体,最后做一个单层的网络,把“瘦长”型立体和输出层(类别)联系在一起。
实战
1、端到端(End-to-End)的MNIST训练数字识别
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
# 获得MNIST数据集
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print(X_train[0].shape)
print(y_train[0])
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')
# 归一化
X_train /= 255
X_test /= 255
# 独热编码
def tran_y(y):
y_ohe = np.zeros(10)
y_ohe[y] = 1
return y_ohe
# 把标签进行独热编码
y_train_ohe = np.array([tran_y(y_train[i]) for i in range(len(y_train))])
y_test_ohe = np.array([tran_y(y_test[i]) for i in range(len(y_test))])
# 创建序列模型
model = Sequential()
# 添加卷积层,64个滤波器,卷积核大小3x3,平移步长1,填充方式:补零,设定输入层维度,激活函数relu
model.add(Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', input_shape=(28, 28, 1), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2))) # 池化层,取2x2格子中的最大值
model.add(Dropout(0.5)) # dropout层,概率0.5,防止过拟合,提高泛化能力
model.add(Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Conv2D(256, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
# 把当前层节点展平
model.add(Flatten())
# 添加全连接层
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(10, activation='softmax')) # 10个神经元,对应输出层
# 编译模型,指定损失函数(一般分类问题的损失函数都采用交叉熵),优化器,度量
model.compile(loss='categorical_crossentropy', optimizer= 'adagrad', metrics=['accuracy'])
# 放入批量样本进行,训练模型
model.fit(X_train, y_train_ohe, validation_data=(X_test, y_test_ohe), epochs=20, batch_size=128)
# 在测试集上评估模型的准确度
scores = model.evaluate(X_test, y_test_ohe, verbose=0)
2、利用VGG16网络(迁移学习)进行字体识别
迁移学习的思想:利用别人的模型框架作为自己模型的输入,或者作为自己问题中的已知部分。这种学习是站在别人的肩膀上,从而大大缩短自己调整模型和建立模型的时间。同时也可以选择利用已建好模型的参考值,再适当地加上少量的参数,最后只需要计算自己添加的那部分参数的值就可以了。
网络结构和权重同时迁移,layers.trainable = False
把原有的训练好的权重**“冷冻”**起来,只训练自己搭建的那部分权重,减少训练时间,但是不一定适用新数据。
from keras.applications.vgg16 import VGG16
from keras.layers import Input, Flatten, Dense, Dropout
from keras.models import Model
from keras.optimizers import SGD
from keras.datasets import mnist
import cv2
import h5py as h5py
import numpy as np
# 只迁移结构,权重重新训练
model_vgg = VGG16(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
model = Flatten(name='flatten')(model_vgg.output)
model = Dense(10, activation='softmax')(model)
model_vgg_mnist = Model(model_vgg.input, model, name='vgg16')
model_vgg_mnist.summary()
# 结构和权重都迁移
ishape = 224
model_vgg = VGG16(include_top=False, weights='imagenet', input_shape=(ishape, ishape, 3))
# “冷冻”训练好的权重
for layer in model_vgg.layers:
layer.trainable = False
model = Flatten()(model_vgg.output)
model = Dense(10, activation='softmax')(model)
model_vgg_mnist_pretrain = Model(model_vgg.input, model, name='vgg16_pretrain')
model_vgg_mnist_pretrain.summary()
sgd = SGD(lr=0.05, decay=1e-5)
model_vgg_mnist_pretrain.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = [cv2.cvtColor(cv2.resize(i, (ishape, ishape)), cv2.COLOR_GRAY2BGR) for i in X_train]
X_train = np.concatenate([arr[np.newaxis] for arr in X_train]).astype('float32')
X_test = [cv2.cvtColor(cv2.resize(i, (ishape, ishape)), cv2.COLOR_GRAY2BGR) for i in X_test]
X_test = np.concatenate([arr[np.newaxis] for arr in X_test]).astype('float32')
# 归一化
X_train /= 255
X_test /= 255
# 独热编码
def tran_y(y):
y_ohe = np.zeros(10)
y_ohe[y] = 1
return y_ohe
# 把标签进行独热编码
y_train_ohe = np.array([tran_y(y_train[i]) for i in range(len(y_train))])
y_test_ohe = np.array([tran_y(y_test[i]) for i in range(len(y_test))])
# 放入批量样本进行,训练模型
model_vgg_mnist_pretrain.fit(X_train, y_train_ohe, validation_data=(X_test, y_test_ohe), epochs=200, batch_size=128)