TensorFlow学习笔记(二)----实战训练

TensorFlow实战训练

  1. 房价预测模型

    • 数据处理与分析

      单变量房价预测数据处理与分析:

      import pandas as pd
      import seaborn as sns
      import matplotlib.pyplot as plt
      
      sns.set(context='notebook', style='whitegrid', palette='dark')   # 设置画图环境
      df0 = pd.read_csv('data/data0.csv', names=['square', 'price'])   # 读取文件
      print(df0.head())                                                # 显示数据框的前5行
      sns.lmplot('square', 'price', df0, height=6, fit_reg=False)      # 画图
      plt.show()                                                       # 显示图片
      

      多变量房价预测数据处理与分析:

      import pandas as pd
      import matplotlib.pyplot as plt
      from mpl_toolkits.mplot3d import Axes3D
      
      df1 = pd.read_csv('data/data1.csv', names=['square', 'bedrooms', 'price'])
      fig = plt.figure()               # 创建一个原始的图像
      ax = plt.axes(projection='3d')   # 创建一个Axes3D object
      # 设置3个坐标轴的名称
      ax.set_xlabel('square')
      ax.set_ylabel('bedrooms')
      ax.set_zlabel('price')
      ax.scatter3D(df1['square'], df1['bedrooms'], df1['price'], c=df1['price'], cmap='Greens')                   # 绘制3D散点图
      plt.show()
      

      数据规范化:

      import pandas as pd
      import matplotlib.pyplot as plt
      from mpl_toolkits.mplot3d import Axes3D
      
      # 数据规范化处理函数
      def normalize_feature(df):
          return df.apply(lambda column: (column - column.mean()) / column.std())
      
      df = pd.read_csv('data/data1.csv', names=['square', 'bedrooms', 'price'])
      df = normalize_feature(df)      # 数据规范化
      ax = plt.axes(projection='3d')
      ax.set_xlabel('square')
      ax.set_ylabel('bedrooms')
      ax.set_zlabel('price')
      ax.scatter3D(df['square'], df['bedrooms'], df['price'], c=df['price'], cmap='Reds')
      plt.show()
      

      数据处理,在 X X X中加上一列1:

      import pandas as pd
      import numpy as np
      
      def normalize_feature(df):
          return df.apply(lambda column: (column - column.mean()) / column.std())
      
      df = pd.read_csv('data/data1.csv', names=['square', 'bedrooms', 'price'])
      df = normalize_feature(df)
      noes = pd.DataFrame({'ones': np.ones(len(df))})   # ones是n行1列的数据框
      df = pd.concat([noes, df], axis=1)                # 根据列合并数据
      X_data = np.array(df[df.columns[0:3]])
      y_data = np.array(df[df.columns[-1]]).reshape(len(df), 1)
      print(X_data.shape, type(X_data))
      print(y_data.shape, type(y_data))
      
    • 模型创建与训练

      创建线型回归模型(数据流图):

      import tensorflow as tf
      
      alpha = 0.01    # 学习率 alpha
      epoch = 500     # 训练全量数据的轮数
      
      # 利用命名作用域来使我们的数据流图更容易看懂
      with tf.name_scope('input'):
          X = tf.placeholder(tf.float32, X_data.shape, name='X')
          y = tf.placeholder(tf.float32, y_data.shape, name='y')
      
      with tf.name_scope('hypothesis'):
          W = tf.get_variable("weights", (X_data.shape[1], 1), initializer=tf.constant_initializer())
          y_pred = tf.matmul(X, W, name='y_pred')
      
      with tf.name_scope('loss'):
          # tf.matmul(a, b, transpose_a=True) 表示矩阵a的转置乘矩阵b
          # 损失函数操作 loss_op
          loss_op = 1 / (2 * len(X_data)) * tf.matmul((y_pred - y), (y_pred - y), transpose_a=True)
      
      with tf.name_scope('train'):
          # 随机梯度下降优化器 opt
          opt = tf.train.GradientDescentOptimizer(learning_rate=alpha)
          # 单轮训练操作 train_op
          train_op = opt.minimize(loss_op)
      

      创建会话(运行环境):

      init = tf.global_variables_initializer()
      with tf.Session() as sess:
          sess.run(init)   # 初始化全局变量
          # 创建FileWriter实例
          writer = tf.summary.FileWriter("./summary/linear-reg-0", sess.graph)  
          loss_data = []
          for e in range(1, epoch + 1):
              _, loss, w = sess.run([train_op, loss_op, W], feed_dict={X: X_data, y: y_data})
              loss_data.append(float(loss))
              if e % 100 == 0:
                  log_str = "Epoch %d \t Loss=%.4g \t Model: y = %.4gx1 + %.4gx2 + %.4g"
                  print(log_str % (e, loss, w[1], w[2], w[0]))
      writer.close()      # 关闭FileWrite输出流
      

      可视化损失值:

      import matplotlib.pyplot as plt
      import seaborn as sns
      
      sns.set(context="notebook", style="whitegrid", palette="dark")
      
      ax = sns.lineplot(x='epoch', y='loss', data=pd.DataFrame({'loss': loss_data, 'epoch': np.arange(epoch)}))
      ax.set_xlabel('epoch')
      ax.set_ylabel('loss')
      plt.show()
      

      注:这种方法非常耗内存,之后将使用TensorBoard来可视化损失值。

    • 可视化数据流图

      在命令窗口进入当前项目的目录下,然后执行:

      tensorboard --logdir ./summary --host localhost
      

      即可在本地的默认端口(6006)启动TensorBoard

  2. 手写数字识别

    第一种方法:通过MNIST Softmax网络实现

    • 加载MNIST数据集

      from keras.datasets import mnist
      
      (x_train, y_train), (x_test, y_test) = mnist.load_data('mnist/mnist.npz')
      print(x_train.shape, type(x_train))
      print(y_train.shape, type(y_train))
      
    • 数据处理:规范化

      # 将数据展开成28*28=784
      X_train = x_train.reshape(60000, 784)
      X_test = x_test.reshape(10000, 784)
      print(X_train.shape, type(X_train))
      print(X_test.shape, type(X_test))
      # 将数据类型转化为float32
      X_train = X_train.astype('float32')
      X_test = X_test.astype('float32')
      # 数据归一化
      X_train /= 255
      X_test /= 255
      
    • 统计训练数据中各标签数量

      import numpy as np
      import matplotlib.pyplot as plt
      
      label, count = np.unique(y_train, return_counts=True)
      print(label, count)
      fig = plt.figure()
      plt.bar(label, count, width=0.7, align='center')
      plt.xlabel('Label')
      plt.ylabel('Count')
      plt.xticks(label)   # 显示x轴的数字
      plt.ylim(0, 7500)   # 设置y轴的区间
      
      # 为柱状图的顶部添加数字
      for a, b in zip(label, count):
          plt.text(a, b, '%d' % b, ha='center', va='bottom', fontsize=10)
      
      plt.show()
      
    • 数据处理:one-hot编码

      from keras.utils import np_utils
      
      n_classes = 10
      Y_train = np_utils.to_categorical(y_train, n_classes)
      Y_test = np_utils.to_categorical(y_test, n_classes)
      
    • 使用Keras sequential model定义Softmax网络

      Softmax网络层:

      from keras.models import Sequential
      from keras.layers.core import Dense, Activation
      
      model = Sequential()
      model.add(Dense(512, input_shape=(784,)))
      model.add(Activation('relu'))
      
      model.add(Dense(512))
      model.add(Activation('relu'))
      

      编译模型:

      model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
      

      训练模型,并将指标保存到history中:

      history = model.fit(X_train, Y_train, batch_size=128, epochs=5, verbose=2, validation_data=(X_test, Y_test))
      

      可视化指标:

      fig = plt.figure()
      plt.subplot(2, 1, 1)
      plt.plot(history.history['acc'])
      plt.plot(history.history['val_acc'])
      plt.title('Model Accuracy')
      plt.xlabel('epoch')
      plt.ylabel('accuracy')
      plt.legend(['train', 'test'], loc='lower right')
      
      plt.subplot(2, 1, 2)
      plt.plot(history.history['loss'])
      plt.plot(history.history['val_loss'])
      plt.title('Model Loss')
      plt.xlabel('epoch')
      plt.ylabel('loss')
      plt.legend(['train', 'test'], loc='upper right')
      plt.tight_layout()
      
      plt.show()
      

      保存模型:

      import os
      import tensorflow.gfile as gfile
      
      save_dir = "models/mnist_model"
      if gfile.Exists(save_dir):
          gfile.DeleteRecursively(save_dir)
      gfile.MakeDirs(save_dir)
      model_name = 'keras_mnist.h5'
      model_path = os.path.join(save_dir, model_name)
      model.save(model_path)
      

      加载模型:

      from keras.models import load_model
      
      mnist_model = load_model(model_path)
      

      统计模型在测试集上的分类结果:

      loss_and_metrics = mnist_model.evaluate(X_test, Y_test, verbose=2)
      print("Test Loss: {}".format(loss_and_metrics[0]))
      print("Test Accuracy: {}%".format(loss_and_metrics[1] * 100))
      
      predicted_classes = mnist_model.predict_classes(X_test)
      correct_indices = np.nonzero(predicted_classes == y_test)[0]
      incorrect_indices = np.nonzero(predicted_classes != y_test)[0]
      print("Classified correctly count: {}".format(len(correct_indices)))
      print("Classified incorrectly count: {}".format(len(incorrect_indices)))
      

    第二种方法:通过MNIST CNN实现

    • 加载MNIST数据集 (代码同第一种方法)

    • 数据处理:规范化

      from keras import backend as K
      
      img_rows, img_cols = 28, 28
      
      if K.image_data_format() == 'channels_first':
          x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
          x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
          input_shape = (1, img_rows, img_cols)
      else:
          x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
          x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
          input_shape = (img_rows, img_cols, 1)
      
      print(x_train.shape, type(x_train))
      print(x_test.shape, type(x_test))
      
      # 将数据类型转化为float32
      X_train = x_train.astype('float32')
      X_test = x_test.astype('float32')
      # 数据归一化
      X_train /= 255
      X_test /= 255
      
    • 统计训练数据中各标签数量 (代码同第一种方法)

    • 数据处理:one-hot编码 (代码同第一种方法)

    • 使用Keras sequential model定义MNIST CNN网络

      from keras.models import Sequential
      from keras.layers import Dense, Dropout, Flatten
      from keras.layers import Conv2D, MaxPooling2D
      
      model = Sequential()
      # 第1层卷积,32个3x3的卷积核,激活函数使用relu
      model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
      
      # 第2层卷积,64个3x3的卷积核,激活函数使用relu
      model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
      
      # 最大池化层,池化窗口2x2
      model.add(MaxPooling2D(pool_size=(2, 2)))
      
      # Dropout 25% 的输入神经元
      model.add(Dropout(0.25))
      
      # 将Pooled feature map摊平后输入全连接网络
      model.add(Flatten())
      
      # 全连接层
      model.add(Dense(128, activation='relu'))
      
      # Dropout 50% 的输入神经元
      model.add(Dropout(0.5))
      
      # 使用softmax激活函数做多分类,输出各数字的概率
      model.add(Dense(n_classes, activation='softmax'))
      
    • 查看MNIST CNN模型网络结构

      # 查看整体网络结构
      model.summary()
      
      # 查看每一层的网络形状
      for layer in model.layers:
          print(layer.get_output_at(0).get_shape().as_list())
      
    • 编译模型 (代码同第一种方法)

    • 训练模型,并将指标保存到history中 (代码同第一种方法)

    • 可视化指标 (代码同第一种方法)

    • 保存模型 (代码同第一种方法)

    • 加载模型 (代码同第一种方法)

    • 统计模型在测试集上的分类结果 (代码同第一种方法)

  3. 验证码识别

    • 生成验证码数据集

      引入第三方包:

      from captcha.image import ImageCaptcha
      import random
      import numpy as np
      import tensorflow.gfile as gfile
      import matplotlib.pyplot as plt
      import PIL.Image as Image
      

      定义常量和字符集:

      NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
      LOWERCASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
      UPPERCASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
      CAPTCHA_CHARSET = NUMBER        # 验证码字符集
      CAPTCHA_LEN = 4                 # 验证码长度
      CAPTCHA_HEIGHT = 60             # 验证码高度
      CAPTCHA_WIDTH = 160             # 验证码宽度
      TRAIN_DATASET_SIZE = 5000       # 验证码数据集大小
      TEST_DATASET_SIZE = 1000
      TRAIN_DATA_DIR = 'train-data/'  # 验证码数据集目录
      TEST_DATA_DIR = 'test-data/'
      

      生成随机字符的方法:

      def gen_random_text(charset=CAPTCHA_CHARSET, length=CAPTCHA_LEN):
          text = [random.choice(charset) for _ in range(length)]
          return ''.join(text)
      

      创建并保存验证码数据集的方法:

      def create_captcha_dataset(size=100, data_dir='data/', height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH, image_format='.png'):
          if gfile.Exists(data_dir):
              gfile.DeleteRecursively(data_dir)
          gfile.MakeDirs(data_dir)
          # 创建 ImageCaptcha 实例 captcha
          captcha = ImageCaptcha(width=width, height=height)
          for _ in range(size):
              # 生成随机的验证码字符
              text = gen_random_text(CAPTCHA_CHARSET, CAPTCHA_LEN)
              captcha.write(text, data_dir + text + image_format)
          return None
      

      创建并保存训练集:

      create_captcha_dataset(TRAIN_DATASET_SIZE, TRAIN_DATA_DIR)
      

      创建并保存测试集:

      create_captcha_dataset(TEST_DATASET_SIZE, TEST_DATA_DIR)
      

      生成并返回验证码数据集的方法(在内存中):

      def gen_captcha_dataset(size=100, data_dir='data/test3-data', height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH, image_format='.png'):
          captcha = ImageCaptcha(width=width, height=height)
          images, texts = [None]*size, [None]*size
          for i in range(size):
              texts[i] = gen_random_text(CAPTCHA_CHARSET, CAPTCHA_LEN)
              # 使用 PIL.Image.open() 识别新生成的验证码图像
              # 然后,将图像转换为形如(CAPTCHA_WIDTH, CAPTCHA_HEIGHT, 3)的 Numpy 数组
              images[i] = np.array(Image.open(captcha.generate(texts[i])))
          return images, texts
      

      生成100张验证码图像和字符:

      images, texts = gen_captcha_dataset()
      plt.figure()
      for i in range(20):
          plt.subplot(5, 4, i + 1)                    # 绘制前20个验证码,以5行4列子图形式展示
          plt.tight_layout()                          # 自动适配子图尺寸
          plt.imshow(images[i])
          plt.title("Label: {}".format(texts[i]))     # 设置标签为子图标题
          plt.xticks()                                # 删除x轴标记
          plt.yticks()                                # 删除y轴标记
      plt.show()
      
    • 输入与输出数据处理

      引入第三方包:

      from PIL import Image
      from keras import backend as K
      import glob
      import numpy as np
      import matplotlib.pyplot as plt
      

      定义常量和字符集:

      NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
      LOWERCASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
                   'v', 'w', 'x', 'y', 'z']
      UPPERCASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
                   'V', 'W', 'X', 'Y', 'Z']
      CAPTCHA_CHARSET = NUMBER        # 验证码字符集
      CAPTCHA_LEN = 4                 # 验证码长度
      CAPTCHA_HEIGHT = 60             # 验证码高度
      CAPTCHA_WIDTH = 160             # 验证码宽度
      TRAIN_DATA_DIR = 'train-data/'  # 验证码数据集目录
      

      读取训练集前100张图片,并通过文件名解析验证码(标签):

      image = []
      text = []
      count = 0
      for filename in glob.glob(TRAIN_DATA_DIR + '*.png'):
          image.append(np.array(Image.open(filename)))
          text.append(filename.lstrip(TRAIN_DATA_DIR).rstrip('.png'))
          count += 1
          if count >= 100:
              break
      

      将RGB验证码图像转为灰度图:

      def rgb2gray(img):
          # Y' = 0.299R + o.587G + 0.114B
          return np.dot(img[..., : 3], [0.299, 0.587, 0.114])
      
      image = np.array(image, dtype=np.float32)
      print(image.shape)
      image = rgb2gray(image)
      print(image.shape)
      

      数据规范化:

      image = image / 255
      

      适配Keras图像数据格式:

      def fit_keras_channels(batch, rows=CAPTCHA_HEIGHT, cols=CAPTCHA_WIDTH):
          if K.image_data_format() == 'channels_first':
              batch = batch.reshape(batch.shape[0], 1, rows, cols)
              input_shape = (1, rows, cols)
          else:
              batch = batch.reshape(batch.shape[0], rows, cols, 1)
              input_shape = (rows, cols, 1)
          return batch, input_shape
      
      image, input_shape = fit_keras_channels(image)
      print(image.shape)
      print(input_shape)
      

      对验证码中每个字符进行one-hot编码:

      def text2vec(text, length=CAPTCHA_LEN, charset=CAPTCHA_CHARSET):
          text_len = len(text)
          # 验证码长度校验
          if text_len != length:
              raise ValueError('Error: length of captcha should be {}, but got {}'.format(length, text_len))
          # 生成一个形如(CAPTCHA_LEN * CAPTCHA_CHARSET)的一维向量
          vec = np.zeros(length * len(charset))
          for i in range(length):
              # one-hot编码验证码中的每个数字
              # 每个字符的热码 = 索引 + 偏移量
              vec[charset.index(text[i]) + i * len(charset)] = 1
          return vec
      
      text = list(text)
      vec = [None]*len(text)
      
      for i in range(len(vec)):
          vec[i] = text2vec(text[i])
      

      将验证码向量解码为对应字符:

      def vec2text(vector):
          if not isinstance(vector, np.ndarray):
              vector = np.asarray(vector)
          vector = np.reshape(vector, [CAPTCHA_LEN, -1])
          text = ''
          for item in vector:
              text += CAPTCHA_CHARSET[np.argmax(item)]
          return text
      
    • 训练模型

      引入第三方包:

      from PIL import Image
      from keras.utils.vis_utils import plot_model
      from keras.models import *
      from keras.layers import *
      import glob
      import pickle
      import numpy as np
      import tensorflow.gfile as gfile
      

      定义超参数和字符集:

      NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
      LOWERCASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
                   'v', 'w', 'x', 'y', 'z']
      UPPERCASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
                   'V', 'W', 'X', 'Y', 'Z']
      CAPTCHA_CHARSET = NUMBER        # 验证码字符集
      CAPTCHA_LEN = 4                 # 验证码长度
      CAPTCHA_HEIGHT = 60             # 验证码高度
      CAPTCHA_WIDTH = 160             # 验证码宽度
      TRAIN_DATA_DIR = 'train-data/'  # 验证码数据集目录
      TEST_DATA_DIR = 'test-data/'
      BATCH_SIZE = 100
      EPOCHS = 10
      OPT = 'adam'
      LOSS = 'binary_crossentropy'
      MODEL_DIR = 'models/train_demo/'
      MODEL_FORMAT = '.h5'
      HISTORY_DIR = 'history/train_demo/'
      HISTORY_FORMAT = '.history'
      filename_str = "{}captcha_{}_{}_bs_{}_epochs_{}{}"
      # 模型网络结构文件
      MODEL_VIS_FILE = 'captch_classfication' + '.png'
      # 模型文件
      MODEL_FILE = filename_str.format(MODEL_DIR, OPT, LOSS, str(BATCH_SIZE), str(EPOCHS), MODEL_FORMAT)
      # 训练记录文件
      HISTORY_FILE = filename_str.format(HISTORY_DIR, OPT, LOSS, str(BATCH_SIZE), str(EPOCHS), HISTORY_FORMAT)
      

      将RGB验证码图像转为灰度图 (函数代码与输入与输出数据处理中函数代码相同)

      对验证码中每个字符进行one-hot编码 (函数代码与输入与输出数据处理中函数代码相同)

      将验证码向量解码为对应字符 (函数代码与输入与输出数据处理中函数代码相同)

      适配Keras图像数据格式 (函数代码与输入与输出数据处理中函数代码相同)

      读取训练集:

      X_train = []
      Y_train = []
      for filename in glob.glob(TRAIN_DATA_DIR + "*.png"):
          X_train.append(np.array(Image.open(filename)))
          Y_train.append(filename.lstrip(TRAIN_DATA_DIR).rstrip('.png'))
      

      处理训练集图像:

      X_train = np.array(X_train, dtype=np.float32)
      X_train = rgb2gray(X_train)
      X_train = X_train / 255
      X_train, input_shape = fit_keras_channels(X_train)
      

      处理训练集标签:

      Y_train = list(Y_train)
      for i in range(len(Y_train)):
          Y_train[i] = text2vec(Y_train[i])
      Y_train = np.asarray(Y_train)
      

      读取测试集,处理对应图像和标签:

      X_test = []
      Y_test = []
      for filename in glob.glob(TEST_DATA_DIR + "*.png"):
          X_test.append(np.array(Image.open(filename)))
          Y_test.append(filename.lstrip(TEST_DATA_DIR).lstrip('\\').rstrip('.png'))
      X_test = np.array(X_test, dtype=np.float32)
      X_test = rgb2gray(X_test)
      X_test = X_test / 255
      X_test, _ = fit_keras_channels(X_test)
      Y_test = list(Y_test)
      for i in range(len(Y_test)):
          Y_test[i] = text2vec(Y_test[i])
      Y_test = np.asarray(Y_test)
      

      创建验证码识别模型:

      # 输入层
      inputs = Input(shape=input_shape, name='inputs')
      # 第1层卷积
      conv1 = Conv2D(32, (3, 3), name="conv1")(inputs)
      relu1 = Activation('relu', name='relu1')(conv1)
      # 第2层卷积
      conv2 = Conv2D(32, (3, 3), name='conv2')(relu1)
      relu2 = Activation('relu', name='relu2')(conv2)
      pool2 = MaxPooling2D(pool_size=(2, 2), padding='same', name='pool2')(relu2)
      # 第3层卷积
      conv3 = Conv2D(64, (3, 3), name='conv3')(pool2)
      relu3 = Activation('relu', name='relu3')(conv3)
      pool3 = MaxPooling2D(pool_size=(2, 2), padding='same', name='pool3')(relu3)
      # 将 Pooled feature map 摊平后输入全连接网络
      x = Flatten()(pool3)
      # Dropout
      x = Dropout(0.25)(x)
      # 4个全连接层分别对应10分类,分别对应4个字符
      x = [Dense(10, activation='softmax', name='fc%d'%(i + 1))(x) for i in range(4)]
      # 4个字符向量拼接在一起,与标签向量形式一致,作为模型输出
      outs = Concatenate()(x)
      # 定义模型的输入与输出
      model = Model(inputs=inputs, outputs=outs)
      model.compile(optimizer=OPT, loss=LOSS, metrics=['accuracy'])
      

      查看模型摘要:

      model.summary()
      

      模型可视化:

      plot_model(model, to_file=MODEL_VIS_FILE, show_shapes=True)
      

      [注:在Windows中执行模型可视化代码时,需要进行额外的配置,而且还需要添上如下两行代码:]

      import os
      os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin'
      

      训练模型:

      history = model.fit(X_train, Y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=2, validation_data=(X_test, Y_test))
      

      预测样例:

      yy = model.predict(X_test[0].reshape(1, 60, 160, 1))
      

      保存模型:

      if not gfile.Exists(MODEL_DIR):
          gfile.MakeDirs(MODEL_DIR)
      model.save(MODEL_FILE)
      

      保存训练记录过程:

      history.history.keys()
      if gfile.Exists(HISTORY_DIR) == False:
          gfile.MakeDirs(HISTORY_DIR)
      with open(HISTORY_FILE, 'wb') as f:
          pickle.dump(history.history, f)
      
    • 模型部署与效果演示

      模型部署:[运行下方代码即可]

      import base64
      import numpy as np
      import tensorflow as tf
      from io import BytesIO
      from flask import Flask, request, jsonify
      from keras.models import load_model
      from PIL import Image
      
      NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
      LOWERCASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
                   'v', 'w', 'x', 'y', 'z']
      UPPERCASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
                   'V', 'W', 'X', 'Y', 'Z']
      CAPTCHA_CHARSET = NUMBER        # 验证码字符集
      CAPTCHA_LEN = 4                 # 验证码长度
      CAPTCHA_HEIGHT = 60             # 验证码高度
      CAPTCHA_WIDTH = 160             # 验证码宽度
      
      MODEL_FILE = 'models/train_demo/captcha_adam_binary_crossentropy_bs_100_epochs_10.h5'
      
      def vec2text(vector):
          if not isinstance(vector, np.ndarray):
              vector = np.asarray(vector)
          vector = np.reshape(vector, [CAPTCHA_LEN, -1])
          text = ''
          for item in vector:
              text += CAPTCHA_CHARSET[np.argmax(item)]
          return text
      
      def rgb2gray(img):
          # Y' = 0.299R + o.587G + 0.114B
          return np.dot(img[..., : 3], [0.299, 0.587, 0.114])
      
      app = Flask(__name__)  # 创建 Flask 实例
      
      # 测试URL
      @app.route('/ping', methods=['GET', 'POST'])
      def hello_world():
          return 'hello world'
      
      # 验证码识别URL
      @app.route('/predict', methods=['GET', 'POST'])
      def predict():
          response = {'success': False, 'prediction': '', 'debug': 'error'}
          received_image = False
          if request.method == 'POST':
              if request.files.get('image'):  # 图像文件
                  image = request.files['image'].read()
                  received_image = True
                  response['debug'] = 'get image'
              elif request.get_json():        # base64 编码的图像文件
                  encoded_image = request.get_json()['image']
                  received_image = True
                  response['debug'] = 'get json'
              if received_image:
                  image = np.array(Image.open(BytesIO(image)))
                  image = rgb2gray(image).reshape(1, 60, 160, 1).astype('float32') / 255
                  with graph.as_default():
                      pred = model.predict(image)
                  response['prediction'] = response['prediction'] + vec2text(pred)
                  response['success'] = True
                  response['debug'] = 'predicted'
          else:
              response['debug'] = 'No Post'
          return jsonify(response)
      
      
      model = load_model(MODEL_FILE)   # 加载模型
      graph = tf.get_default_graph()   # 获取 TensorFlow 默认数据流图
      
      if __name__ == '__main__':
          app.run(debug=True)
      

      效果演示,进入相应路径,在命令行执行如下命令即可测试:

      curl -X POST -F image=@9986.png "http://localhost:5000/predict"
      
  4. 人脸识别

    [注:以下代码为核心代码,但仍然依赖有其他文件,故单独运行时会报错]

    • 加载训练数据集

      训练数据集的组织形式:每人一个文件目录,目录以人名命名,如“Fan_Bingbing”;每个人的文件目录下包含10张图像,图像文件以“人名_序号”命名,仅支持“.jpg"和”.jpeg“两种格式文件。

      import numpy as np
      import cv2
      import os.path
      
      class IdentityMetadata():
          def __init__(self, base, name, file):
              self.base = base     # 数据集根目录
              self.name = name     # 目录名
              self.file = file     # 图像文件名
      
          def __repr__(self):
              return self.image_path()
      
          def image_path(self):
              return os.path.join(self.base, self.name, self.file)
      
      def load_metadata(path):
          metadata = []
          for i in os.listdir(path):
              for f in os.listdir(os.path.join(path, i)):
                  # 检查文件后缀名, 仅支持 jpg 和 jpeg 两种文件格式
                  ext = os.path.splitext(f)[1]
                  if ext == ".jpg" or ext == '.jpeg':
                      metadata.append(IdentityMetadata(path, i, f))
          return np.array(metadata)
      
      def load_image(path):
          img = cv2.imread(path, 1)
          # OpenCV 默认使用 BGR 通道加载图像, 转换为 RGB 图像
          return img[..., ::-1]
      
      metadata = load_metadata("images")
      
    • 人脸检测、对齐和提取

      从原图提取 96x96 RGB 人脸图像,如果原图不是1:1比例,提取后的人脸会进行拉伸变换。

      import matplotlib.pyplot as plt
      import matplotlib.patches as patches
      from align import AlignDlib
      
      # 初始化OpenFace人脸对齐工具,使用Dlib提供的68个关键点
      alignment = AlignDlib('face_detection/landmarks.dat')
      
      # 加载一张训练图像
      img = load_image(metadata[0].image_path())
      
      # 检测人脸并返回边框
      bb = alignment.getLargestFaceBoundingBox(img)
      
      # 使用指定的人脸关键点转换图像并截取 96x96 的人脸图像
      aligned_img = alignment.align(96, img, bb, landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE)
      
      # 绘制原图
      plt.subplot(131)
      plt.imshow(img)
      plt.xticks([])
      plt.yticks([])
      
      # 绘制带人脸边框的原图
      plt.subplot(132)
      plt.imshow(img)
      plt.gca().add_patch(patches.Rectangle((bb.left(), bb.top()), bb.width(), bb.height(), fill=False, color='red'))
      plt.xticks([])
      plt.yticks([])
      
      # 绘制对齐后截取的 96x96 人脸图像
      plt.subplot(133)
      plt.imshow(aligned_img)
      plt.xticks([])
      plt.yticks([])
      
      plt.show()
      
    • 加载 nn4.small.v1 模型

      from model import create_model
      from keras.models import Model
      from keras.layers import Input, Layer
      from keras.utils.vis_utils import plot_model
      
      nn4_small2 = create_model()
      
      # 输入 anchor,positive and negative 96x96 RGB 图像
      in_a = Input(shape=(96, 96, 3))
      in_p = Input(shape=(96, 96, 3))
      in_n = Input(shape=(96, 96, 3))
      
      # 输出对应的人脸特征向量
      emb_a = nn4_small2(in_a)
      emb_p = nn4_small2(in_p)
      emb_n = nn4_small2(in_n)
      
      plot_model(nn4_small2, to_file='nn4_small2_model.png', show_shapes=True)
      
    • Triplet Loss Layer

      模型训练的目标是学习出一个将人脸图像嵌入到欧几里得特征空间的函数 f ( x ) f(x) f(x),使得对于特定人脸图像 x x x,同一人不同人脸的欧氏距离尽可能小,不同人的欧氏距离尽可能大。

      通过最小化 t r i p l e t   l o s s   L triplet \ loss \ L triplet loss L可以学习到我们想要的模型:
      L = ∑ i = 1 N [ ∣ ∣ f ( x i a ) − f ( x i p ) ∣ ∣ 2 2 − ∣ ∣ f ( x i a ) − f ( x i n ) ∣ ∣ 2 2 + α ] + L = \sum_{i = 1}^{N}[||f(x_i^a) - f(x_i^p)||_2^2 - ||f(x_i^a) - f(x_i^n)||_2^2 + \alpha]_+ L=i=1N[f(xia)f(xip)22f(xia)f(xin)22+α]+
      [ z ] + [z]_+ [z]+ m a x ( z , 0 ) max(z, 0) max(z,0) N N N是三元组集合的基数。

      下面使用Keras的自定义Loss来实现Triplet Loss:

      from keras import backend as K
      
      class TripletLossLayer(Layer):
          def __init__(self, alpha, **kwargs):
              self.alpha = alpha
              super(TripletLossLayer, self).__init__(**kwargs)
      
          def triplet_loss(self, inputs):
              a, p, n = inputs
              p_dist = K.sum(K.square(a - p), axis=-1)
              n_dist = K.sum(K.square(a - n), axis=-1)
              return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
      
          def call(self, inputs):
              loss = self.triplet_loss(inputs)
              self.add_loss(loss)
              return loss
      
      triplet_loss_layer = TripletLossLayer(alpha=0.2, name='triplet_loss_layer')([emb_a, emb_p, emb_n])
      nn4_small2_train = Model([in_a, in_p, in_n], triplet_loss_layer)
      
      plot_model(nn4_small2_train, to_file='nn4_small2_train.png', show_shapes=True)
      

      注:以上代码为定义好一个可以训练的模型,实际操作的时候只需要用预训练好的模型来进行人脸识别就行。

    • 加载预训练模型nn4.small2.v1

      nn4_small2_pretrained = create_model()
      nn4_small2_pretrained.load_weights('models/nn4.small2.v1.h5')
      
      def align_image(img):
          return alignment.align(96, img, alignment.getLargestFaceBoundingBox(img), landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE)
      
      embedded = np.zeros((metadata.shape[0], 128))
      
      for i, m in enumerate(metadata):
          img = load_image(m.image_path())
          img = align_image(img)
          # 数据规范化
          img = (img / 255.).astype(np.float32)
          # 人脸特征向量
          embedded[i] = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0]
      
      def distance(emb1, emb2):
          return np.sum(np.square(emb1 - emb2))
      
      def show_pair(idx1, idx2):
          plt.figure(figsize=(8, 3))
          plt.suptitle(f'Distance = {distance(embedded[idx1], embedded[idx2]):.2f}')
          plt.subplot(121)
          plt.imshow(load_image(metadata[idx1].image_path()))
          plt.xticks([])
          plt.yticks([])
          plt.subplot(122)
          plt.imshow(load_image(metadata[idx2].image_path()))
          plt.xticks([])
          plt.yticks([])
          plt.show()
      
      show_pair(2, 3)
      show_pair(2, 12)
      
    • 人脸分类

      使用现有的分类器KNN和SVM来进行人脸分类(训练数据使用的是欧美人脸,因此在对亚洲人脸进行分类时准确率可能没那么高):

      from sklearn.preprocessing import LabelEncoder
      from sklearn.neighbors import KNeighborsClassifier
      from sklearn.svm import LinearSVC
      from sklearn.metrics import accuracy_score
      
      targets = np.array([m.name for m in metadata])
      
      encoder = LabelEncoder()
      encoder.fit(targets)
      
      y = encoder.transform(targets)
      
      train_idx = np.arange(metadata.shape[0]) % 2 != 0
      test_idx = np.arange(metadata.shape[0]) % 2 == 0
      
      X_train = embedded[train_idx]
      X_test = embedded[test_idx]
      y_train = y[train_idx]
      y_test = y[test_idx]
      
      knn = KNeighborsClassifier(n_neighbors=1, metric='euclidean')
      svc = LinearSVC()
      
      knn.fit(X_train, y_train)
      svc.fit(X_train, y_train)
      
      acc_knn = accuracy_score(y_test, knn.predict(X_test))
      acc_svc = accuracy_score(y_test, svc.predict(X_test))
      
      print(f'KNN accuracy = {acc_knn}, SVM accuracy = {acc_svc}')
      
    • 人脸识别测试

      import warnings
      warnings.filterwarnings('ignore')
      
      example_idx = 2
      example_image = load_image(metadata[example_idx].image_path())
      example_prediction = svc.predict([embedded[test_idx][example_idx]])
      example_identity = encoder.inverse_transform(example_prediction)[0]
      
      plt.imshow(example_image)
      plt.title(f'Recognized as {example_identity}')
      plt.xticks([])
      plt.yticks([])
      plt.show()
      
    • 模型测试与可视化分析

      from sklearn.metrics import f1_score
      
      distances = []      # squared L2 distance between pairs
      identical = []      # 1 if same identity, 0 otherwise
      
      num = len(metadata)
      
      for i in range(num - 1):
          for j in range(1, num):
              distances.append(distance(embedded[i], embedded[j]))
              identical.append(1 if metadata[i].name == metadata[j].name else 0)
      
      distances = np.array(distances)
      identical = np.array(identical)
      thresholds = np.arange(0.1, 1.0, 0.01)
      
      f1_scores = [f1_score(identical, distances < t) for t in thresholds]
      acc_scores = [accuracy_score(identical, distances < t) for t in thresholds]
      
      opt_idx = np.argmax(f1_scores)
      opt_tau = thresholds[opt_idx]
      opt_acc = accuracy_score(identical, distances < opt_tau)
      
      # 绘制F1值和准确率与threshold间关系
      plt.plot(thresholds, f1_scores, label='F1 score')
      plt.plot(thresholds, acc_scores, label='Accuracy')
      plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold')
      plt.title(f'Accuracy at threshold {opt_tau:.2f} = {opt_acc:.3f}')
      plt.xlabel('Distance threshold')
      plt.legend()
      plt.show()
      
      dist_pos = distances[identical == 1]
      dist_neg = distances[identical == 0]
      
      plt.figure(figsize=(12, 4))
      plt.subplot(121)
      plt.hist(dist_pos)
      plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold')
      plt.title('Distances (positive pairs)')
      plt.legend()
      
      plt.subplot(122)
      plt.hist(dist_neg)
      plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold')
      plt.title('Distacnes (negative pairs)')
      plt.legend()
      
      plt.show()
      
      # 降维人脸特征
      from sklearn.manifold import TSNE
      
      X_embedded = TSNE(n_components=2).fit_transform(embedded)
      
      for i, t in enumerate(set(targets)):
          idx = targets == t
          plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=t)
      
      plt.legend(bbox_to_anchor=(1, 1))
      plt.xticks([])
      plt.yticks([])
      plt.show()
      
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值