TensorFlow实战训练
-
房价预测模型
-
数据处理与分析
单变量房价预测数据处理与分析:
import pandas as pd import seaborn as sns import matplotlib.pyplot as plt sns.set(context='notebook', style='whitegrid', palette='dark') # 设置画图环境 df0 = pd.read_csv('data/data0.csv', names=['square', 'price']) # 读取文件 print(df0.head()) # 显示数据框的前5行 sns.lmplot('square', 'price', df0, height=6, fit_reg=False) # 画图 plt.show() # 显示图片
多变量房价预测数据处理与分析:
import pandas as pd import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D df1 = pd.read_csv('data/data1.csv', names=['square', 'bedrooms', 'price']) fig = plt.figure() # 创建一个原始的图像 ax = plt.axes(projection='3d') # 创建一个Axes3D object # 设置3个坐标轴的名称 ax.set_xlabel('square') ax.set_ylabel('bedrooms') ax.set_zlabel('price') ax.scatter3D(df1['square'], df1['bedrooms'], df1['price'], c=df1['price'], cmap='Greens') # 绘制3D散点图 plt.show()
数据规范化:
import pandas as pd import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # 数据规范化处理函数 def normalize_feature(df): return df.apply(lambda column: (column - column.mean()) / column.std()) df = pd.read_csv('data/data1.csv', names=['square', 'bedrooms', 'price']) df = normalize_feature(df) # 数据规范化 ax = plt.axes(projection='3d') ax.set_xlabel('square') ax.set_ylabel('bedrooms') ax.set_zlabel('price') ax.scatter3D(df['square'], df['bedrooms'], df['price'], c=df['price'], cmap='Reds') plt.show()
数据处理,在 X X X中加上一列1:
import pandas as pd import numpy as np def normalize_feature(df): return df.apply(lambda column: (column - column.mean()) / column.std()) df = pd.read_csv('data/data1.csv', names=['square', 'bedrooms', 'price']) df = normalize_feature(df) noes = pd.DataFrame({'ones': np.ones(len(df))}) # ones是n行1列的数据框 df = pd.concat([noes, df], axis=1) # 根据列合并数据 X_data = np.array(df[df.columns[0:3]]) y_data = np.array(df[df.columns[-1]]).reshape(len(df), 1) print(X_data.shape, type(X_data)) print(y_data.shape, type(y_data))
-
模型创建与训练
创建线型回归模型(数据流图):
import tensorflow as tf alpha = 0.01 # 学习率 alpha epoch = 500 # 训练全量数据的轮数 # 利用命名作用域来使我们的数据流图更容易看懂 with tf.name_scope('input'): X = tf.placeholder(tf.float32, X_data.shape, name='X') y = tf.placeholder(tf.float32, y_data.shape, name='y') with tf.name_scope('hypothesis'): W = tf.get_variable("weights", (X_data.shape[1], 1), initializer=tf.constant_initializer()) y_pred = tf.matmul(X, W, name='y_pred') with tf.name_scope('loss'): # tf.matmul(a, b, transpose_a=True) 表示矩阵a的转置乘矩阵b # 损失函数操作 loss_op loss_op = 1 / (2 * len(X_data)) * tf.matmul((y_pred - y), (y_pred - y), transpose_a=True) with tf.name_scope('train'): # 随机梯度下降优化器 opt opt = tf.train.GradientDescentOptimizer(learning_rate=alpha) # 单轮训练操作 train_op train_op = opt.minimize(loss_op)
创建会话(运行环境):
init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) # 初始化全局变量 # 创建FileWriter实例 writer = tf.summary.FileWriter("./summary/linear-reg-0", sess.graph) loss_data = [] for e in range(1, epoch + 1): _, loss, w = sess.run([train_op, loss_op, W], feed_dict={X: X_data, y: y_data}) loss_data.append(float(loss)) if e % 100 == 0: log_str = "Epoch %d \t Loss=%.4g \t Model: y = %.4gx1 + %.4gx2 + %.4g" print(log_str % (e, loss, w[1], w[2], w[0])) writer.close() # 关闭FileWrite输出流
可视化损失值:
import matplotlib.pyplot as plt import seaborn as sns sns.set(context="notebook", style="whitegrid", palette="dark") ax = sns.lineplot(x='epoch', y='loss', data=pd.DataFrame({'loss': loss_data, 'epoch': np.arange(epoch)})) ax.set_xlabel('epoch') ax.set_ylabel('loss') plt.show()
注:这种方法非常耗内存,之后将使用
TensorBoard
来可视化损失值。 -
可视化数据流图
在命令窗口进入当前项目的目录下,然后执行:
tensorboard --logdir ./summary --host localhost
即可在本地的默认端口(
6006
)启动TensorBoard
。
-
-
手写数字识别
第一种方法:通过MNIST Softmax网络实现
-
加载MNIST数据集
from keras.datasets import mnist (x_train, y_train), (x_test, y_test) = mnist.load_data('mnist/mnist.npz') print(x_train.shape, type(x_train)) print(y_train.shape, type(y_train))
-
数据处理:规范化
# 将数据展开成28*28=784 X_train = x_train.reshape(60000, 784) X_test = x_test.reshape(10000, 784) print(X_train.shape, type(X_train)) print(X_test.shape, type(X_test)) # 将数据类型转化为float32 X_train = X_train.astype('float32') X_test = X_test.astype('float32') # 数据归一化 X_train /= 255 X_test /= 255
-
统计训练数据中各标签数量
import numpy as np import matplotlib.pyplot as plt label, count = np.unique(y_train, return_counts=True) print(label, count) fig = plt.figure() plt.bar(label, count, width=0.7, align='center') plt.xlabel('Label') plt.ylabel('Count') plt.xticks(label) # 显示x轴的数字 plt.ylim(0, 7500) # 设置y轴的区间 # 为柱状图的顶部添加数字 for a, b in zip(label, count): plt.text(a, b, '%d' % b, ha='center', va='bottom', fontsize=10) plt.show()
-
数据处理:one-hot编码
from keras.utils import np_utils n_classes = 10 Y_train = np_utils.to_categorical(y_train, n_classes) Y_test = np_utils.to_categorical(y_test, n_classes)
-
使用Keras sequential model定义Softmax网络
Softmax网络层:
from keras.models import Sequential from keras.layers.core import Dense, Activation model = Sequential() model.add(Dense(512, input_shape=(784,))) model.add(Activation('relu')) model.add(Dense(512)) model.add(Activation('relu'))
编译模型:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
训练模型,并将指标保存到history中:
history = model.fit(X_train, Y_train, batch_size=128, epochs=5, verbose=2, validation_data=(X_test, Y_test))
可视化指标:
fig = plt.figure() plt.subplot(2, 1, 1) plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('Model Accuracy') plt.xlabel('epoch') plt.ylabel('accuracy') plt.legend(['train', 'test'], loc='lower right') plt.subplot(2, 1, 2) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model Loss') plt.xlabel('epoch') plt.ylabel('loss') plt.legend(['train', 'test'], loc='upper right') plt.tight_layout() plt.show()
保存模型:
import os import tensorflow.gfile as gfile save_dir = "models/mnist_model" if gfile.Exists(save_dir): gfile.DeleteRecursively(save_dir) gfile.MakeDirs(save_dir) model_name = 'keras_mnist.h5' model_path = os.path.join(save_dir, model_name) model.save(model_path)
加载模型:
from keras.models import load_model mnist_model = load_model(model_path)
统计模型在测试集上的分类结果:
loss_and_metrics = mnist_model.evaluate(X_test, Y_test, verbose=2) print("Test Loss: {}".format(loss_and_metrics[0])) print("Test Accuracy: {}%".format(loss_and_metrics[1] * 100)) predicted_classes = mnist_model.predict_classes(X_test) correct_indices = np.nonzero(predicted_classes == y_test)[0] incorrect_indices = np.nonzero(predicted_classes != y_test)[0] print("Classified correctly count: {}".format(len(correct_indices))) print("Classified incorrectly count: {}".format(len(incorrect_indices)))
第二种方法:通过MNIST CNN实现
-
加载MNIST数据集 (代码同第一种方法)
-
数据处理:规范化
from keras import backend as K img_rows, img_cols = 28, 28 if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) print(x_train.shape, type(x_train)) print(x_test.shape, type(x_test)) # 将数据类型转化为float32 X_train = x_train.astype('float32') X_test = x_test.astype('float32') # 数据归一化 X_train /= 255 X_test /= 255
-
统计训练数据中各标签数量 (代码同第一种方法)
-
数据处理:one-hot编码 (代码同第一种方法)
-
使用Keras sequential model定义MNIST CNN网络
from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten from keras.layers import Conv2D, MaxPooling2D model = Sequential() # 第1层卷积,32个3x3的卷积核,激活函数使用relu model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) # 第2层卷积,64个3x3的卷积核,激活函数使用relu model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu')) # 最大池化层,池化窗口2x2 model.add(MaxPooling2D(pool_size=(2, 2))) # Dropout 25% 的输入神经元 model.add(Dropout(0.25)) # 将Pooled feature map摊平后输入全连接网络 model.add(Flatten()) # 全连接层 model.add(Dense(128, activation='relu')) # Dropout 50% 的输入神经元 model.add(Dropout(0.5)) # 使用softmax激活函数做多分类,输出各数字的概率 model.add(Dense(n_classes, activation='softmax'))
-
查看MNIST CNN模型网络结构
# 查看整体网络结构 model.summary() # 查看每一层的网络形状 for layer in model.layers: print(layer.get_output_at(0).get_shape().as_list())
-
编译模型 (代码同第一种方法)
-
训练模型,并将指标保存到history中 (代码同第一种方法)
-
可视化指标 (代码同第一种方法)
-
保存模型 (代码同第一种方法)
-
加载模型 (代码同第一种方法)
-
统计模型在测试集上的分类结果 (代码同第一种方法)
-
-
验证码识别
-
生成验证码数据集
引入第三方包:
from captcha.image import ImageCaptcha import random import numpy as np import tensorflow.gfile as gfile import matplotlib.pyplot as plt import PIL.Image as Image
定义常量和字符集:
NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] LOWERCASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] UPPERCASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'] CAPTCHA_CHARSET = NUMBER # 验证码字符集 CAPTCHA_LEN = 4 # 验证码长度 CAPTCHA_HEIGHT = 60 # 验证码高度 CAPTCHA_WIDTH = 160 # 验证码宽度 TRAIN_DATASET_SIZE = 5000 # 验证码数据集大小 TEST_DATASET_SIZE = 1000 TRAIN_DATA_DIR = 'train-data/' # 验证码数据集目录 TEST_DATA_DIR = 'test-data/'
生成随机字符的方法:
def gen_random_text(charset=CAPTCHA_CHARSET, length=CAPTCHA_LEN): text = [random.choice(charset) for _ in range(length)] return ''.join(text)
创建并保存验证码数据集的方法:
def create_captcha_dataset(size=100, data_dir='data/', height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH, image_format='.png'): if gfile.Exists(data_dir): gfile.DeleteRecursively(data_dir) gfile.MakeDirs(data_dir) # 创建 ImageCaptcha 实例 captcha captcha = ImageCaptcha(width=width, height=height) for _ in range(size): # 生成随机的验证码字符 text = gen_random_text(CAPTCHA_CHARSET, CAPTCHA_LEN) captcha.write(text, data_dir + text + image_format) return None
创建并保存训练集:
create_captcha_dataset(TRAIN_DATASET_SIZE, TRAIN_DATA_DIR)
创建并保存测试集:
create_captcha_dataset(TEST_DATASET_SIZE, TEST_DATA_DIR)
生成并返回验证码数据集的方法(在内存中):
def gen_captcha_dataset(size=100, data_dir='data/test3-data', height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH, image_format='.png'): captcha = ImageCaptcha(width=width, height=height) images, texts = [None]*size, [None]*size for i in range(size): texts[i] = gen_random_text(CAPTCHA_CHARSET, CAPTCHA_LEN) # 使用 PIL.Image.open() 识别新生成的验证码图像 # 然后,将图像转换为形如(CAPTCHA_WIDTH, CAPTCHA_HEIGHT, 3)的 Numpy 数组 images[i] = np.array(Image.open(captcha.generate(texts[i]))) return images, texts
生成100张验证码图像和字符:
images, texts = gen_captcha_dataset() plt.figure() for i in range(20): plt.subplot(5, 4, i + 1) # 绘制前20个验证码,以5行4列子图形式展示 plt.tight_layout() # 自动适配子图尺寸 plt.imshow(images[i]) plt.title("Label: {}".format(texts[i])) # 设置标签为子图标题 plt.xticks() # 删除x轴标记 plt.yticks() # 删除y轴标记 plt.show()
-
输入与输出数据处理
引入第三方包:
from PIL import Image from keras import backend as K import glob import numpy as np import matplotlib.pyplot as plt
定义常量和字符集:
NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] LOWERCASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] UPPERCASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'] CAPTCHA_CHARSET = NUMBER # 验证码字符集 CAPTCHA_LEN = 4 # 验证码长度 CAPTCHA_HEIGHT = 60 # 验证码高度 CAPTCHA_WIDTH = 160 # 验证码宽度 TRAIN_DATA_DIR = 'train-data/' # 验证码数据集目录
读取训练集前100张图片,并通过文件名解析验证码(标签):
image = [] text = [] count = 0 for filename in glob.glob(TRAIN_DATA_DIR + '*.png'): image.append(np.array(Image.open(filename))) text.append(filename.lstrip(TRAIN_DATA_DIR).rstrip('.png')) count += 1 if count >= 100: break
将RGB验证码图像转为灰度图:
def rgb2gray(img): # Y' = 0.299R + o.587G + 0.114B return np.dot(img[..., : 3], [0.299, 0.587, 0.114]) image = np.array(image, dtype=np.float32) print(image.shape) image = rgb2gray(image) print(image.shape)
数据规范化:
image = image / 255
适配Keras图像数据格式:
def fit_keras_channels(batch, rows=CAPTCHA_HEIGHT, cols=CAPTCHA_WIDTH): if K.image_data_format() == 'channels_first': batch = batch.reshape(batch.shape[0], 1, rows, cols) input_shape = (1, rows, cols) else: batch = batch.reshape(batch.shape[0], rows, cols, 1) input_shape = (rows, cols, 1) return batch, input_shape image, input_shape = fit_keras_channels(image) print(image.shape) print(input_shape)
对验证码中每个字符进行one-hot编码:
def text2vec(text, length=CAPTCHA_LEN, charset=CAPTCHA_CHARSET): text_len = len(text) # 验证码长度校验 if text_len != length: raise ValueError('Error: length of captcha should be {}, but got {}'.format(length, text_len)) # 生成一个形如(CAPTCHA_LEN * CAPTCHA_CHARSET)的一维向量 vec = np.zeros(length * len(charset)) for i in range(length): # one-hot编码验证码中的每个数字 # 每个字符的热码 = 索引 + 偏移量 vec[charset.index(text[i]) + i * len(charset)] = 1 return vec text = list(text) vec = [None]*len(text) for i in range(len(vec)): vec[i] = text2vec(text[i])
将验证码向量解码为对应字符:
def vec2text(vector): if not isinstance(vector, np.ndarray): vector = np.asarray(vector) vector = np.reshape(vector, [CAPTCHA_LEN, -1]) text = '' for item in vector: text += CAPTCHA_CHARSET[np.argmax(item)] return text
-
训练模型
引入第三方包:
from PIL import Image from keras.utils.vis_utils import plot_model from keras.models import * from keras.layers import * import glob import pickle import numpy as np import tensorflow.gfile as gfile
定义超参数和字符集:
NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] LOWERCASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] UPPERCASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'] CAPTCHA_CHARSET = NUMBER # 验证码字符集 CAPTCHA_LEN = 4 # 验证码长度 CAPTCHA_HEIGHT = 60 # 验证码高度 CAPTCHA_WIDTH = 160 # 验证码宽度 TRAIN_DATA_DIR = 'train-data/' # 验证码数据集目录 TEST_DATA_DIR = 'test-data/' BATCH_SIZE = 100 EPOCHS = 10 OPT = 'adam' LOSS = 'binary_crossentropy' MODEL_DIR = 'models/train_demo/' MODEL_FORMAT = '.h5' HISTORY_DIR = 'history/train_demo/' HISTORY_FORMAT = '.history' filename_str = "{}captcha_{}_{}_bs_{}_epochs_{}{}" # 模型网络结构文件 MODEL_VIS_FILE = 'captch_classfication' + '.png' # 模型文件 MODEL_FILE = filename_str.format(MODEL_DIR, OPT, LOSS, str(BATCH_SIZE), str(EPOCHS), MODEL_FORMAT) # 训练记录文件 HISTORY_FILE = filename_str.format(HISTORY_DIR, OPT, LOSS, str(BATCH_SIZE), str(EPOCHS), HISTORY_FORMAT)
将RGB验证码图像转为灰度图 (函数代码与输入与输出数据处理中函数代码相同)
对验证码中每个字符进行one-hot编码 (函数代码与输入与输出数据处理中函数代码相同)
将验证码向量解码为对应字符 (函数代码与输入与输出数据处理中函数代码相同)
适配Keras图像数据格式 (函数代码与输入与输出数据处理中函数代码相同)
读取训练集:
X_train = [] Y_train = [] for filename in glob.glob(TRAIN_DATA_DIR + "*.png"): X_train.append(np.array(Image.open(filename))) Y_train.append(filename.lstrip(TRAIN_DATA_DIR).rstrip('.png'))
处理训练集图像:
X_train = np.array(X_train, dtype=np.float32) X_train = rgb2gray(X_train) X_train = X_train / 255 X_train, input_shape = fit_keras_channels(X_train)
处理训练集标签:
Y_train = list(Y_train) for i in range(len(Y_train)): Y_train[i] = text2vec(Y_train[i]) Y_train = np.asarray(Y_train)
读取测试集,处理对应图像和标签:
X_test = [] Y_test = [] for filename in glob.glob(TEST_DATA_DIR + "*.png"): X_test.append(np.array(Image.open(filename))) Y_test.append(filename.lstrip(TEST_DATA_DIR).lstrip('\\').rstrip('.png')) X_test = np.array(X_test, dtype=np.float32) X_test = rgb2gray(X_test) X_test = X_test / 255 X_test, _ = fit_keras_channels(X_test) Y_test = list(Y_test) for i in range(len(Y_test)): Y_test[i] = text2vec(Y_test[i]) Y_test = np.asarray(Y_test)
创建验证码识别模型:
# 输入层 inputs = Input(shape=input_shape, name='inputs') # 第1层卷积 conv1 = Conv2D(32, (3, 3), name="conv1")(inputs) relu1 = Activation('relu', name='relu1')(conv1) # 第2层卷积 conv2 = Conv2D(32, (3, 3), name='conv2')(relu1) relu2 = Activation('relu', name='relu2')(conv2) pool2 = MaxPooling2D(pool_size=(2, 2), padding='same', name='pool2')(relu2) # 第3层卷积 conv3 = Conv2D(64, (3, 3), name='conv3')(pool2) relu3 = Activation('relu', name='relu3')(conv3) pool3 = MaxPooling2D(pool_size=(2, 2), padding='same', name='pool3')(relu3) # 将 Pooled feature map 摊平后输入全连接网络 x = Flatten()(pool3) # Dropout x = Dropout(0.25)(x) # 4个全连接层分别对应10分类,分别对应4个字符 x = [Dense(10, activation='softmax', name='fc%d'%(i + 1))(x) for i in range(4)] # 4个字符向量拼接在一起,与标签向量形式一致,作为模型输出 outs = Concatenate()(x) # 定义模型的输入与输出 model = Model(inputs=inputs, outputs=outs) model.compile(optimizer=OPT, loss=LOSS, metrics=['accuracy'])
查看模型摘要:
model.summary()
模型可视化:
plot_model(model, to_file=MODEL_VIS_FILE, show_shapes=True)
[注:在Windows中执行模型可视化代码时,需要进行额外的配置,而且还需要添上如下两行代码:]
import os os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin'
训练模型:
history = model.fit(X_train, Y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=2, validation_data=(X_test, Y_test))
预测样例:
yy = model.predict(X_test[0].reshape(1, 60, 160, 1))
保存模型:
if not gfile.Exists(MODEL_DIR): gfile.MakeDirs(MODEL_DIR) model.save(MODEL_FILE)
保存训练记录过程:
history.history.keys() if gfile.Exists(HISTORY_DIR) == False: gfile.MakeDirs(HISTORY_DIR) with open(HISTORY_FILE, 'wb') as f: pickle.dump(history.history, f)
-
模型部署与效果演示
模型部署:[运行下方代码即可]
import base64 import numpy as np import tensorflow as tf from io import BytesIO from flask import Flask, request, jsonify from keras.models import load_model from PIL import Image NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] LOWERCASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] UPPERCASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'] CAPTCHA_CHARSET = NUMBER # 验证码字符集 CAPTCHA_LEN = 4 # 验证码长度 CAPTCHA_HEIGHT = 60 # 验证码高度 CAPTCHA_WIDTH = 160 # 验证码宽度 MODEL_FILE = 'models/train_demo/captcha_adam_binary_crossentropy_bs_100_epochs_10.h5' def vec2text(vector): if not isinstance(vector, np.ndarray): vector = np.asarray(vector) vector = np.reshape(vector, [CAPTCHA_LEN, -1]) text = '' for item in vector: text += CAPTCHA_CHARSET[np.argmax(item)] return text def rgb2gray(img): # Y' = 0.299R + o.587G + 0.114B return np.dot(img[..., : 3], [0.299, 0.587, 0.114]) app = Flask(__name__) # 创建 Flask 实例 # 测试URL @app.route('/ping', methods=['GET', 'POST']) def hello_world(): return 'hello world' # 验证码识别URL @app.route('/predict', methods=['GET', 'POST']) def predict(): response = {'success': False, 'prediction': '', 'debug': 'error'} received_image = False if request.method == 'POST': if request.files.get('image'): # 图像文件 image = request.files['image'].read() received_image = True response['debug'] = 'get image' elif request.get_json(): # base64 编码的图像文件 encoded_image = request.get_json()['image'] received_image = True response['debug'] = 'get json' if received_image: image = np.array(Image.open(BytesIO(image))) image = rgb2gray(image).reshape(1, 60, 160, 1).astype('float32') / 255 with graph.as_default(): pred = model.predict(image) response['prediction'] = response['prediction'] + vec2text(pred) response['success'] = True response['debug'] = 'predicted' else: response['debug'] = 'No Post' return jsonify(response) model = load_model(MODEL_FILE) # 加载模型 graph = tf.get_default_graph() # 获取 TensorFlow 默认数据流图 if __name__ == '__main__': app.run(debug=True)
效果演示,进入相应路径,在命令行执行如下命令即可测试:
curl -X POST -F image=@9986.png "http://localhost:5000/predict"
-
-
人脸识别
[注:以下代码为核心代码,但仍然依赖有其他文件,故单独运行时会报错]
-
加载训练数据集
训练数据集的组织形式:每人一个文件目录,目录以人名命名,如“Fan_Bingbing”;每个人的文件目录下包含10张图像,图像文件以“人名_序号”命名,仅支持“.jpg"和”.jpeg“两种格式文件。
import numpy as np import cv2 import os.path class IdentityMetadata(): def __init__(self, base, name, file): self.base = base # 数据集根目录 self.name = name # 目录名 self.file = file # 图像文件名 def __repr__(self): return self.image_path() def image_path(self): return os.path.join(self.base, self.name, self.file) def load_metadata(path): metadata = [] for i in os.listdir(path): for f in os.listdir(os.path.join(path, i)): # 检查文件后缀名, 仅支持 jpg 和 jpeg 两种文件格式 ext = os.path.splitext(f)[1] if ext == ".jpg" or ext == '.jpeg': metadata.append(IdentityMetadata(path, i, f)) return np.array(metadata) def load_image(path): img = cv2.imread(path, 1) # OpenCV 默认使用 BGR 通道加载图像, 转换为 RGB 图像 return img[..., ::-1] metadata = load_metadata("images")
-
人脸检测、对齐和提取
从原图提取 96x96 RGB 人脸图像,如果原图不是1:1比例,提取后的人脸会进行拉伸变换。
import matplotlib.pyplot as plt import matplotlib.patches as patches from align import AlignDlib # 初始化OpenFace人脸对齐工具,使用Dlib提供的68个关键点 alignment = AlignDlib('face_detection/landmarks.dat') # 加载一张训练图像 img = load_image(metadata[0].image_path()) # 检测人脸并返回边框 bb = alignment.getLargestFaceBoundingBox(img) # 使用指定的人脸关键点转换图像并截取 96x96 的人脸图像 aligned_img = alignment.align(96, img, bb, landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE) # 绘制原图 plt.subplot(131) plt.imshow(img) plt.xticks([]) plt.yticks([]) # 绘制带人脸边框的原图 plt.subplot(132) plt.imshow(img) plt.gca().add_patch(patches.Rectangle((bb.left(), bb.top()), bb.width(), bb.height(), fill=False, color='red')) plt.xticks([]) plt.yticks([]) # 绘制对齐后截取的 96x96 人脸图像 plt.subplot(133) plt.imshow(aligned_img) plt.xticks([]) plt.yticks([]) plt.show()
-
加载 nn4.small.v1 模型
from model import create_model from keras.models import Model from keras.layers import Input, Layer from keras.utils.vis_utils import plot_model nn4_small2 = create_model() # 输入 anchor,positive and negative 96x96 RGB 图像 in_a = Input(shape=(96, 96, 3)) in_p = Input(shape=(96, 96, 3)) in_n = Input(shape=(96, 96, 3)) # 输出对应的人脸特征向量 emb_a = nn4_small2(in_a) emb_p = nn4_small2(in_p) emb_n = nn4_small2(in_n) plot_model(nn4_small2, to_file='nn4_small2_model.png', show_shapes=True)
-
Triplet Loss Layer
模型训练的目标是学习出一个将人脸图像嵌入到欧几里得特征空间的函数 f ( x ) f(x) f(x),使得对于特定人脸图像 x x x,同一人不同人脸的欧氏距离尽可能小,不同人的欧氏距离尽可能大。
通过最小化 t r i p l e t l o s s L triplet \ loss \ L triplet loss L可以学习到我们想要的模型:
L = ∑ i = 1 N [ ∣ ∣ f ( x i a ) − f ( x i p ) ∣ ∣ 2 2 − ∣ ∣ f ( x i a ) − f ( x i n ) ∣ ∣ 2 2 + α ] + L = \sum_{i = 1}^{N}[||f(x_i^a) - f(x_i^p)||_2^2 - ||f(x_i^a) - f(x_i^n)||_2^2 + \alpha]_+ L=i=1∑N[∣∣f(xia)−f(xip)∣∣22−∣∣f(xia)−f(xin)∣∣22+α]+
[ z ] + [z]_+ [z]+即 m a x ( z , 0 ) max(z, 0) max(z,0), N N N是三元组集合的基数。下面使用Keras的自定义Loss来实现Triplet Loss:
from keras import backend as K class TripletLossLayer(Layer): def __init__(self, alpha, **kwargs): self.alpha = alpha super(TripletLossLayer, self).__init__(**kwargs) def triplet_loss(self, inputs): a, p, n = inputs p_dist = K.sum(K.square(a - p), axis=-1) n_dist = K.sum(K.square(a - n), axis=-1) return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0) def call(self, inputs): loss = self.triplet_loss(inputs) self.add_loss(loss) return loss triplet_loss_layer = TripletLossLayer(alpha=0.2, name='triplet_loss_layer')([emb_a, emb_p, emb_n]) nn4_small2_train = Model([in_a, in_p, in_n], triplet_loss_layer) plot_model(nn4_small2_train, to_file='nn4_small2_train.png', show_shapes=True)
注:以上代码为定义好一个可以训练的模型,实际操作的时候只需要用预训练好的模型来进行人脸识别就行。
-
加载预训练模型nn4.small2.v1
nn4_small2_pretrained = create_model() nn4_small2_pretrained.load_weights('models/nn4.small2.v1.h5') def align_image(img): return alignment.align(96, img, alignment.getLargestFaceBoundingBox(img), landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE) embedded = np.zeros((metadata.shape[0], 128)) for i, m in enumerate(metadata): img = load_image(m.image_path()) img = align_image(img) # 数据规范化 img = (img / 255.).astype(np.float32) # 人脸特征向量 embedded[i] = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0] def distance(emb1, emb2): return np.sum(np.square(emb1 - emb2)) def show_pair(idx1, idx2): plt.figure(figsize=(8, 3)) plt.suptitle(f'Distance = {distance(embedded[idx1], embedded[idx2]):.2f}') plt.subplot(121) plt.imshow(load_image(metadata[idx1].image_path())) plt.xticks([]) plt.yticks([]) plt.subplot(122) plt.imshow(load_image(metadata[idx2].image_path())) plt.xticks([]) plt.yticks([]) plt.show() show_pair(2, 3) show_pair(2, 12)
-
人脸分类
使用现有的分类器KNN和SVM来进行人脸分类(训练数据使用的是欧美人脸,因此在对亚洲人脸进行分类时准确率可能没那么高):
from sklearn.preprocessing import LabelEncoder from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import LinearSVC from sklearn.metrics import accuracy_score targets = np.array([m.name for m in metadata]) encoder = LabelEncoder() encoder.fit(targets) y = encoder.transform(targets) train_idx = np.arange(metadata.shape[0]) % 2 != 0 test_idx = np.arange(metadata.shape[0]) % 2 == 0 X_train = embedded[train_idx] X_test = embedded[test_idx] y_train = y[train_idx] y_test = y[test_idx] knn = KNeighborsClassifier(n_neighbors=1, metric='euclidean') svc = LinearSVC() knn.fit(X_train, y_train) svc.fit(X_train, y_train) acc_knn = accuracy_score(y_test, knn.predict(X_test)) acc_svc = accuracy_score(y_test, svc.predict(X_test)) print(f'KNN accuracy = {acc_knn}, SVM accuracy = {acc_svc}')
-
人脸识别测试
import warnings warnings.filterwarnings('ignore') example_idx = 2 example_image = load_image(metadata[example_idx].image_path()) example_prediction = svc.predict([embedded[test_idx][example_idx]]) example_identity = encoder.inverse_transform(example_prediction)[0] plt.imshow(example_image) plt.title(f'Recognized as {example_identity}') plt.xticks([]) plt.yticks([]) plt.show()
-
模型测试与可视化分析
from sklearn.metrics import f1_score distances = [] # squared L2 distance between pairs identical = [] # 1 if same identity, 0 otherwise num = len(metadata) for i in range(num - 1): for j in range(1, num): distances.append(distance(embedded[i], embedded[j])) identical.append(1 if metadata[i].name == metadata[j].name else 0) distances = np.array(distances) identical = np.array(identical) thresholds = np.arange(0.1, 1.0, 0.01) f1_scores = [f1_score(identical, distances < t) for t in thresholds] acc_scores = [accuracy_score(identical, distances < t) for t in thresholds] opt_idx = np.argmax(f1_scores) opt_tau = thresholds[opt_idx] opt_acc = accuracy_score(identical, distances < opt_tau) # 绘制F1值和准确率与threshold间关系 plt.plot(thresholds, f1_scores, label='F1 score') plt.plot(thresholds, acc_scores, label='Accuracy') plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold') plt.title(f'Accuracy at threshold {opt_tau:.2f} = {opt_acc:.3f}') plt.xlabel('Distance threshold') plt.legend() plt.show() dist_pos = distances[identical == 1] dist_neg = distances[identical == 0] plt.figure(figsize=(12, 4)) plt.subplot(121) plt.hist(dist_pos) plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold') plt.title('Distances (positive pairs)') plt.legend() plt.subplot(122) plt.hist(dist_neg) plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold') plt.title('Distacnes (negative pairs)') plt.legend() plt.show() # 降维人脸特征 from sklearn.manifold import TSNE X_embedded = TSNE(n_components=2).fit_transform(embedded) for i, t in enumerate(set(targets)): idx = targets == t plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=t) plt.legend(bbox_to_anchor=(1, 1)) plt.xticks([]) plt.yticks([]) plt.show()
-