Datawhale_街景字符编码识别-Task3—字符识别模型

1、加载库

import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd

from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
import matplotlib.image as mpimg
import matplotlib.patches as patches

from pandas.plotting import register_matplotlib_converters
from sklearn.model_selection import train_test_split
import urllib
import os
import csv
import cv2
import time
from PIL import Image

import pprint

2、参数设置

train_label = './train.json'
val_label = './val.json'

width = 224
height = 112
channel = 3
batch_size = 64

3、加载并查看数据集

json_train = pd.read_json(train_label)
json_valid = pd.read_json(val_label)
json_train.head()

在这里插入图片描述

json_train = json_train.T
json_valid = json_valid.T
json_train.head()

在这里插入图片描述

json_train = json_train.reset_index(drop=False)

json_valid = json_valid.reset_index(drop=False)
json_train.head()

在这里插入图片描述

4、添加分类标签

train_label_fill_in_x = json_train['label'].map(lambda x: x[:4] + (4 - len(x))*[10])
valid_label_fill_in_x = json_valid['label'].map(lambda x: x[:4] + (4 - len(x))*[10])
json_train['label_train'] = train_label_fill_in_x
json_valid['label_valid'] = valid_label_fill_in_x
json_train.head()

在这里插入图片描述

def sep_dig_labels(df, label):
    
    d0=[]
    d1=[]
    d2=[]
    d3=[]
    
    extract_labels = df[label]
    
    for each in extract_labels:
        d0.append(each[0])
        d1.append(each[1])
        d2.append(each[2])
        d3.append(each[3])
    
    df['d0'] = d0
    df['d1'] = d1
    df['d2'] = d2
    df['d3'] = d3
sep_dig_labels(json_train, 'label_train')
sep_dig_labels(json_valid, 'label_valid')
json_train.head()

在这里插入图片描述

json_valid.head()

在这里插入图片描述

5、构建数据集

train_folder = './train/'
valid_folder = './val/'
trainGen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
                                                        rotation_range=40,
                                                        width_shift_range=0.2,
                                                        height_shift_range=0.2,
                                                        shear_range=0.2,
                                                        zoom_range=0.2,
                                                        fill_mode='nearest')
validGen=keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
train_generator = trainGen.flow_from_dataframe(json_train,                 #dataframe
                                               #directory=train_folder,     #根目录(当前路径) 
                                               directory='./train',     #根目录(当前路径) 
                                               x_col='index',
                                               y_col=['d0','d1','d2','d3'],
                                               target_size=(width, height),
                                               batch_size=batch_size,
                                               seed=3,
                                               shuffle=True,
                                               class_mode='multi_output',)

在这里插入图片描述

valid_generator = validGen.flow_from_dataframe(json_valid,
                                              directory='./val',
                                              x_col='index',
                                              y_col=['d0','d1','d2','d3'],
                                              target_size=(width, height),
                                              batch_size=batch_size,
                                              seed=3,
                                              shuffle=False,
                                              class_mode='multi_output',)

在这里插入图片描述

6、定义模型

6.1、自定义模型

input_img = keras.layers.Input(shape=(width, height, channel), name='img')

# # ----------------------
x = keras.layers.Conv2D(filters=64,kernel_size=3,padding='same',activation='relu')(input_img)

# x = keras.layers.BatchNormalization()(x)

x = keras.layers.MaxPooling2D(pool_size=(2,2))(x)

# x = keras.layers.Dropout(0.2)(x)
# # ----------------------
x = keras.layers.Conv2D(filters=64,kernel_size=3,activation='relu')(x)

# x = keras.layers.BatchNormalization()(x)

x = keras.layers.MaxPooling2D(pool_size=(2,2))(x)

# x = keras.layers.Dropout(0.2)(x)
# # ----------------------
x = keras.layers.Conv2D(filters=128,kernel_size=3,activation='relu')(x)

# x = keras.layers.BatchNormalization()(x)

x = keras.layers.MaxPooling2D(pool_size=(2,2))(x)

# x = keras.layers.Dropout(0.2)(x)
# # ----------------------
x = keras.layers.Conv2D(filters=128,kernel_size=3,activation='relu')(x)

# x = keras.layers.BatchNormalization()(x)

x = keras.layers.MaxPooling2D(pool_size=(2,2))(x)

# x = keras.layers.Dropout(0.2)(x)
# # ----------------------
x = keras.layers.Conv2D(filters=256,kernel_size=3,activation='relu')(x)

# x = keras.layers.BatchNormalization()(x)

x = keras.layers.MaxPooling2D(pool_size=(2,2))(x)

# x = keras.layers.Dropout(0.2)(x)
# # ----------------------
x = keras.layers.Flatten()(x)

dig0 = keras.layers.Dense(11, activation='softmax', name='d0')(x)
dig1 = keras.layers.Dense(11, activation='softmax', name='d1')(x)
dig2 = keras.layers.Dense(11, activation='softmax', name='d2')(x)
dig3 = keras.layers.Dense(11, activation='softmax', name='d3')(x)

model = keras.models.Model(input_img, [dig0,dig1,dig2,dig3])

model.summary()

在这里插入图片描述

losses = {
    "d0": 'sparse_categorical_crossentropy',
    "d1": 'sparse_categorical_crossentropy',
    "d2": 'sparse_categorical_crossentropy',
    "d3": 'sparse_categorical_crossentropy',
}
model.compile(optimizer='adam',
              loss=losses,
              metrics=['accuracy'])
logdir = os.path.join('hourse_num')#'./hourse_num'
if not os.path.exists(logdir):
    os.mkdir(logdir)
    
output_model_file = os.path.join(logdir,
                                 "first_try.h5")
callbacks=[
    keras.callbacks.TensorBoard(logdir),
    keras.callbacks.ModelCheckpoint(output_model_file,
                                    save_best_only = True,
                                    save_weights_only = False),
    keras.callbacks.EarlyStopping(patience=5,min_delta=1e-3)
]
train_num = train_generator.samples
valid_num = valid_generator.samples
print(train_num)
print(valid_num)

在这里插入图片描述

epochs=10

history = model.fit_generator(train_generator,
                              steps_per_epoch=train_num//batch_size,
                              validation_data=valid_generator,
                              validation_steps=valid_num//batch_size,
                              epochs=epochs,
                              callbacks=callbacks)

在这里插入图片描述

6.2、VGG16模型

1、使用tensorflow2.0自带的VGG16迁移学习
2、epochs = 10
3、batch_size = 32
4、使用验证集验证模型
5、使用tensorflow回调函数callbacks实现:保存模型(h5格式)、提前终止模型、tensorboard显示

from tensorflow.keras.applications import VGG16

input_img = keras.layers.Input(shape=(width, height, channel), name='img')

conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(width, height, channel))(input_img)
conv_base.trainable = False

x = keras.layers.Flatten()(conv_base)

dig0 = keras.layers.Dense(11, activation='softmax', name='d0')(x)
dig1 = keras.layers.Dense(11, activation='softmax', name='d1')(x)
dig2 = keras.layers.Dense(11, activation='softmax', name='d2')(x)
dig3 = keras.layers.Dense(11, activation='softmax', name='d3')(x)

#conv_base.trainable = False

model = keras.models.Model(input_img, [dig0,dig1,dig2,dig3])
model.summary()

在这里插入图片描述

losses = {
    "d0": 'sparse_categorical_crossentropy',
    "d1": 'sparse_categorical_crossentropy',
    "d2": 'sparse_categorical_crossentropy',
    "d3": 'sparse_categorical_crossentropy',
}
model.compile(optimizer='adam',
              loss=losses,
              metrics=['accuracy'])
logdir = os.path.join('hourse_num')#'./hourse_num'
if not os.path.exists(logdir):
    os.mkdir(logdir)
    
output_model_file = os.path.join(logdir,
                                 "first_try.h5")
callbacks=[
    keras.callbacks.TensorBoard(logdir),
    keras.callbacks.ModelCheckpoint(output_model_file,
                                    save_best_only = True,
                                    save_weights_only = False),
    keras.callbacks.EarlyStopping(patience=5,min_delta=1e-3)
]
train_num = train_generator.samples
valid_num = valid_generator.samples
print(train_num)
print(valid_num)
epochs=10

history = model.fit_generator(train_generator,
                              steps_per_epoch=train_num//batch_size,
                              validation_data=valid_generator,
                              validation_steps=valid_num//batch_size,
                              epochs=epochs,
                              callbacks=callbacks)

在这里插入图片描述

7、预测并生成提交文件





天池是一个著名的数据科学竞赛平台,而datawhale是一家致力于数据科学教育和社群建设的组织。街景字符编码识别是指通过计算机视觉技术,对街道场景中的字符进行自动识别和分类。 街景字符编码识别是一项重要的研究领域,对于提高交通安全、城市管理和智能驾驶技术都具有重要意义。街道场景中的字符包括道路标志、车牌号码、店铺招牌等。通过对这些字符进行准确的识别,可以辅助交通管理人员进行交通监管、道路规划和交通流量分析。同时,在智能驾驶领域,街景字符编码识别也是一项关键技术,可以帮助自动驾驶系统准确地识别和理解道路上的各种标志和标识,为自动驾驶提供可靠的环境感知能力。 天池和datawhale联合举办街景字符编码识别竞赛,旨在吸引全球数据科学和计算机视觉领域的优秀人才,集思广益,共同推动该领域的研究和发展。通过这个竞赛,参赛选手可以使用各种机器学习和深度学习算法,基于提供的街景字符数据集,设计和训练模型,实现准确的字符编码识别。这个竞赛不仅有助于促进算法研发和技术创新,也为各参赛选手提供了一个学习、交流和展示自己技能的平台。 总之,天池datawhale街景字符编码识别是一个具有挑战性和实际应用需求的竞赛项目,旨在推动计算机视觉和智能交通领域的技术发展,同时也为数据科学爱好者提供了一个学习和展示自己能力的机会。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值