柠檬分类实战
思考并动手进行调优,以在验证集上的准确率为评价指标,验证集上准确率越高,得分越高!
思考:数据本身已进行了数据增强,所以可以从其他方面入手来实现提高准确率。本文通过修改定义Adam优化器以及batch_size(从32改为128)提高了准确率。因为效果比较好,所以没有对网络模型进行优化。模型的优化也是一个重要的方面。
# 导入所需要的库
from sklearn.utils import shuffle
import os
import pandas as pd
import numpy as np
from PIL import Image
import paddle
import paddle.nn as nn
from paddle.io import Dataset
import paddle.vision.transforms as T
import paddle.nn.functional as F
from paddle.metric import Accuracy
import warnings
warnings.filterwarnings("ignore")
#解压数据
!unzip -oq /home/aistudio/data/data72793/lemon_homework.zip
!unzip -oq /home/aistudio/lemon_homework/lemon_lesson.zip
!unzip -oq /home/aistudio/lemon_lesson/test_images.zip
!unzip -oq /home/aistudio/lemon_lesson/train_images.zip
# 读取数据
train_images = pd.read_csv('lemon_lesson/train_images.csv', usecols=['id','class_num'])
# labelshuffling
def labelShuffling(dataFrame, groupByName = 'class_num'):
groupDataFrame = dataFrame.groupby(by=[groupByName])
labels = groupDataFrame.size()
print("length of label is ", len(labels))
maxNum = max(labels)
lst = pd.DataFrame()
for i in range(len(labels