先看最终效果:
项目主要分为三部分:
1、数据集解析处理
2、模型训练
3、推理计算与GUI可视化
本文使用的数据集是开源的果蔬数据集Fruits 360,下载后截图如下所示:
其中:
Test 表示测试数据集目录
Training 表示训练数据集目录
接下来各自看下对应的数据集情况。
Training 目录截图如下所示:
Test目录截图如下所示:
可以看到:训练集和测试集都是提前划分好的,每个目录下面都有131个子目录,也就是该数据集中一共是131个类别,接下来我们详细统计下数据详情,代码如下:
def lookDataset():
'''
查看数据集情况
'''
dataDir = "data/train/"
train_dict = {}
train_num = 0
for one_label in os.listdir(dataDir):
oneDir = dataDir + one_label + "/"
one_num = len(os.listdir(oneDir))
train_dict[one_label] = one_num
train_num += one_num
print("Total Train Number: ", train_num)
with open("train_num.json", "w") as f:
f.write(json.dumps(train_dict))
dataDir = "data/test/"
test_dict = {}
test_num = 0
for one_label in os.listdir(dataDir):
oneDir = dataDir + one_label + "/"
one_num = len(os.listdir(oneDir))
test_dict[one_label] = one_num
test_num += one_num
print("Total Test Number: ", test_num)
with open("test_num.json", "w") as f:
f.write(json.dumps(test_dict))
结果输出如下:
训练集-测试集数据量如下:
Total Train Number: 67692
Total Test Number: 22688
其中,我对其各个类别数据量也进行了统计,测试集详情如下所示:
{
"Apple Braeburn": 164,
"Apple Crimson Snow": 148,
"Apple Golden 1": 160,
"Apple Golden 2": 164,
"Apple Golden 3": 161,
"Apple Granny Smith": 164,
"Apple Pink Lady": 152,
"Apple Red 1": 164,
"Apple Red 2": 164,
"Apple Red 3": 144,
"Apple Red Delicious": 166,
"Apple Red Yellow 1": 164,
"Apple Red Yellow 2": 219,
"Apricot": 164,
"Avocado": 143,
"Avocado ripe": 166,
"Banana": 166,
"Banana Lady Finger": 152,
"Banana Red": 166,
"Beetroot": 150,
"Blueberry": 154,
"Cactus fruit": 166,
"Cantaloupe 1": 164,
"Cantaloupe 2": 164,
"Carambula": 166,
"Cauliflower": 234,
"Cherry 1": 164,
"Cherry 2": 246,
"Cherry Rainier": 246,
"Cherry Wax Black": 164,
"Cherry Wax Red": 164,
"Cherry Wax Yellow": 164,
"Chestnut": 153,
"Clementine": 166,
"Cocos": 166,
"Corn": 150,
"Corn Husk": 154,
"Cucumber Ripe": 130,
"Cucumber Ripe 2": 156,
"Dates": 166,
"Eggplant": 156,
"Fig": 234,
"Ginger Root": 99,
"Granadilla": 166,
"Grape Blue": 328,
"Grape Pink": 164,
"Grape White": 166,
"Grape White 2": 166,
"Grape White 3": 164,
"Grape White 4": 158,
"Grapefruit Pink": 166,
"Grapefruit White": 164,
"Guava": 166,
"Hazelnut": 157,
"Huckleberry": 166,
"Kaki": 166,
"Kiwi": 156,
"Kohlrabi": 157,
"Kumquats": 166,
"Lemon": 164,
"Lemon Meyer": 166,
"Limes": 166,
"Lychee": 166,
"Mandarine": 166,
"Mango": 166,
"Mango Red": 142,
"Mangostan": 102,
"Maracuja": 166,
"Melon Piel de Sapo": 246,
"Mulberry": 164,
"Nectarine": 164,
"Nectarine Flat": 160,
"Nut Forest": 218,
"Nut Pecan": 178,
"Onion Red": 150,
"Onion Red Peeled": 155,
"Onion White": 146,
"Orange": 160,
"Papaya": 164,
"Passion Fruit": 166,
"Peach": 164,
"Peach 2": 246,
"Peach Flat": 164,
"Pear": 164,
"Pear 2": 232,
"Pear Abate": 166,
"Pear Forelle": 234,
"Pear Kaiser": 102,
"Pear Monster": 166,
"Pear Red": 222,
"Pear Stone": 237,
"Pear Williams": 166,
"Pepino": 166,
"Pepper Green": 148,
"Pepper Orange": 234,
"Pepper Red": 222,
"Pepper Yellow": 222,
"Physalis": 164,
"Physalis with Husk": 164,
"Pineapple": 166,
"Pineapple Mini": 163,
"Pitahaya Red": 166,
"Plum": 151,
"Plum 2": 142,
"Plum 3": 304,
"Pomegranate": 164,
"Pomelo Sweetie": 153,
"Potato Red": 150,
"Potato Red Washed": 151,
"Potato Sweet": 150,
"Potato White": 150,
"Quince": 166,
"Rambutan": 164,
"Raspberry": 166,
"Redcurrant": 164,
"Salak": 162,
"Strawberry": 164,
"Strawberry Wedge": 246,
"Tamarillo": 166,
"Tangelo": 166,
"Tomato 1": 246,
"Tomato 2": 225,
"Tomato 3": 246,
"Tomato 4": 160,
"Tomato Cherry Red": 164,
"Tomato Heart": 228,
"Tomato Maroon": 127,
"Tomato not Ripened": 158,
"Tomato Yellow": 153,
"Walnut": 249,
"Watermelon": 157
}
训练集详情如下所示:
{
"Apple Braeburn": 492,
"Apple Crimson Snow": 444,
"Apple Golden 1": 480,
"Apple Golden 2": 492,
"Apple Golden 3": 481,
"Apple Granny Smith": 492,
"Apple Pink Lady": 456,
"Apple Red 1": 492,
"Apple Red 2": 492,
"Apple Red 3": 429,
"Apple Red Delicious": 490,
"Apple Red Yellow 1": 492,
"Apple Red Yellow 2": 672,
"Apricot": 492,
"Avocado": 427,
"Avocado ripe": 491,
"Banana": 490,
"Banana Lady Finger": 450,
"Banana Red": 490,
"Beetroot": 450,
"Blueberry": 462,
"Cactus fruit": 490,
"Cantaloupe 1": 492,
"Cantaloupe 2": 492,
"Carambula": 490,
"Cauliflower": 702,
"Cherry 1": 492,
"Cherry 2": 738,
"Cherry Rainier": 738,
"Cherry Wax Black": 492,
"Cherry Wax Red": 492,
"Cherry Wax Yellow": 492,
"Chestnut": 450,
"Clementine": 490,
"Cocos": 490,
"Corn": 450,
"Corn Husk": 462,
"Cucumber Ripe": 392,
"Cucumber Ripe 2": 468,
"Dates": 490,
"Eggplant": 468,
"Fig": 702,
"Ginger Root": 297,
"Granadilla": 490,
"Grape Blue": 984,
"Grape Pink": 492,
"Grape White": 490,
"Grape White 2": 490,
"Grape White 3": 492,
"Grape White 4": 471,
"Grapefruit Pink": 490,
"Grapefruit White": 492,
"Guava": 490,
"Hazelnut": 464,
"Huckleberry": 490,
"Kaki": 490,
"Kiwi": 466,
"Kohlrabi": 471,
"Kumquats": 490,
"Lemon": 492,
"Lemon Meyer": 490,
"Limes": 490,
"Lychee": 490,
"Mandarine": 490,
"Mango": 490,
"Mango Red": 426,
"Mangostan": 300,
"Maracuja": 490,
"Melon Piel de Sapo": 738,
"Mulberry": 492,
"Nectarine": 492,
"Nectarine Flat": 480,
"Nut Forest": 654,
"Nut Pecan": 534,
"Onion Red": 450,
"Onion Red Peeled": 445,
"Onion White": 438,
"Orange": 479,
"Papaya": 492,
"Passion Fruit": 490,
"Peach": 492,
"Peach 2": 738,
"Peach Flat": 492,
"Pear": 492,
"Pear 2": 696,
"Pear Abate": 490,
"Pear Forelle": 702,
"Pear Kaiser": 300,
"Pear Monster": 490,
"Pear Red": 666,
"Pear Stone": 711,
"Pear Williams": 490,
"Pepino": 490,
"Pepper Green": 444,
"Pepper Orange": 702,
"Pepper Red": 666,
"Pepper Yellow": 666,
"Physalis": 492,
"Physalis with Husk": 492,
"Pineapple": 490,
"Pineapple Mini": 493,
"Pitahaya Red": 490,
"Plum": 447,
"Plum 2": 420,
"Plum 3": 900,
"Pomegranate": 492,
"Pomelo Sweetie": 450,
"Potato Red": 450,
"Potato Red Washed": 453,
"Potato Sweet": 450,
"Potato White": 450,
"Quince": 490,
"Rambutan": 492,
"Raspberry": 490,
"Redcurrant": 492,
"Salak": 490,
"Strawberry": 492,
"Strawberry Wedge": 738,
"Tamarillo": 490,
"Tangelo": 490,
"Tomato 1": 738,
"Tomato 2": 672,
"Tomato 3": 738,
"Tomato 4": 479,
"Tomato Cherry Red": 492,
"Tomato Heart": 684,
"Tomato Maroon": 367,
"Tomato not Ripened": 474,
"Tomato Yellow": 459,
"Walnut": 735,
"Watermelon": 475
}
整体来看,划分得还是比较均衡的,基本维持在3:1的状况。
当然了,如果想要自己对数据集划分,也是可以的,这里我也同样实现了数据集随机比例划分功能,如下所示:
def random2Dataset(dataDir='data/original/',ratio=0.3):
'''
对原始数据集进行划分,得到:训练集和测试集
'''
label_list=os.listdir(dataDir)
for one_label in label_list:
oneDir=dataDir+one_label+'/'
pic_list=os.listdir(oneDir)
testNum=int(len(pic_list)*ratio)
oneTrainDir='data/train/'+one_label+'/'
oneTestDir='data/test/'+one_label+'/'
if not os.path.exists(oneTrainDir):
os.makedirs(oneTrainDir)
if not os.path.exists(oneTestDir):
os.makedirs(oneTestDir)
#创建测试集
for i in range(testNum):
one_path=oneDir+random.choice(os.listdir(oneDir))
name=str(len(os.listdir(oneTestDir))+1)
new_path=oneTestDir+one_label+'_'+name+'.jpg'
shutil.move(one_path,new_path)
#创建训练集
for one_pic in os.listdir(oneDir):
one_path=oneDir+one_pic
name=str(len(os.listdir(oneTrainDir))+1)
new_path=oneTrainDir+one_label+'_'+name+'.jpg'
shutil.move(one_path,new_path)
可以根据自己的实际情况进行选择。
模型层面我基于VGG主干网络进行改造,设计新的网络模型,如下所示:
可以看到:参数量缩减了很多。
我默认设置了1000次的迭代,实际观察发现:不到10次就足够了,我们可以看下训练可视化的曲线:
准确度曲线:
损失曲线:
我在模型训练结束的时候在测试集总数据集上面进行了测试,准确率达到了96.55%,如下所示: