想不到深度学习如此强大, 为了区分文字和模糊图像,仅用了160张图像,迭代2000次,就可以较好地区分出来了:
绿色框表示模糊,粉红色框表示文字。
如果是用传统方式编程序实现,难度太大了。
不过也有很多分类错的,误识别为模糊的:
观察一下发现蓝天和婴儿误分类为模糊,那就新增2个分类,baby和sky,果然模糊的错误分类减少了:
cfg/yolo-tiny-text-blur.cfg
[net]
batch=64
subdivisions=64
height=448
width=448
channels=3
momentum=0.9
decay=0.0005
learning_rate=0.0001
policy=steps
steps=20,40,60,80,20000,30000
scales=5,5,2,2,.1,.1
max_batches = 40000
[crop]
crop_width=448
crop_height=448
flip=0
angle=0
saturation = 1.5
exposure = 1.5
[convolutional]
filters=16
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
filters=1024
size=3
stride=1
pad=1
activation=leaky
[connected]
output=256
activation=linear
[connected]
output=4096
activation=leaky
[dropout]
probability=.5
[connected]
output=686
activation=linear
[detection]
classes=4
coords=4
rescore=1
side=7
num=2
softmax=0
sqrt=1
jitter=.2
object_scale=1
noobject_scale=.5
class_scale=1
coord_scale=5