新的模型结构还没,想出来.....
想先试试cifar10上训练的结果和lenet5的差距,baseline主要参考这里。
然而我没有1080Ti...... 我只有1060 6G版,哭
首先先写个cifar10的数据读入和预处理的脚本,脚本在之前的repo cifar10_loader.py 这里只讲一下大概的函数:
import pickle
import glob
import cv2
import tqdm
import os
import sys
import logging
import random
import numpy as np
import math
class Cifa10_data: #承接cifar10 数据的对象,在训练脚本中主要用这个类
#cropSzie 是否裁剪图像,这里为了和mnist保持一致,这里裁剪成28*28的图片(原始是32*32的)
#rotate_ratio 随机取多少比例的图片做旋转
#flip_ratio 随机取多少比例的图片进行水平镜像
def __init__(self,base_dir,batch_size,rotate_ratio,flip_ratio,cropSize,validate_batch_num=3):
self.train_data_tensor,self.test_data_tensor,\
self.train_label_tensor,self.test_label_tensor=load_cifar10(base_dir,rotate_ratio,
flip_ratio,
cropSize)
self.batch_size=batch_size
self.batchs_for_one_epoch_train=self.train_data_tensor.shape[0]//batch_size
self.batchs_for_one_epoch_test=self.test_data_tensor.shape[0]//batch_size
self.train_batch_counter=0
self.test_batch_counter=0
self.label_map=load_label_map(base_dir)
self.valid_batches=validate_batch_num
self.shuffle_train()
def next_Batch_train(self):
if(self.train_batch_counter+1)<self.batchs_for_one_epoch_train:
start_idx=self.train_batch_counter*self.batch_size
end_idx=(self.train_batch_counter+1)*self.batch_size
self.train_batch_counter+=1
else:
self.train_batch_counter=0
start_idx=0
end_idx=self.batch_size
self.shuffle_train()
return self.train_data_tensor[start_idx:end_idx],self.train_label_tensor[start_idx:end_idx]
def next_Batch_test(self):
if(self.test_batch_counter+1)<self.batchs_for_one_epoch_test:
start_idx=self.test_batch_counter*self.batch_size
end_idx=(self.test_batch_counter+1)*self.batch_size
self.test_batch_counter+=1
else:
return None
return self.test_data_tensor[start_idx:end_idx],self.test_label_tensor[start_idx:end_idx]
def get_validate_datas(self):
start_idx=0
end_idx=self.valid_batches*self.batch_size
return self.test_data_tensor[start_idx:end_idx],self.test_label_tensor[start_idx:end_idx]
def shuffle_train(self):
perm=list(range(self.train_data_tensor.shape[0]))
np.random.shuffle(perm)
self.train_data_tensor=self.train_data_tensor[perm]
self.train_label_tensor=self.train_label_tensor[perm]
def file_loader(file_path):
with open(file_path, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
#cifar10数据是1维数据这里读取图像并把图像还原成32*32的彩色图
images=map(lambda x:rotate_image(
cv2.cvtColor(
np.array(x).reshape((32,32,3)
,order="F"
),
cv2.COLOR_RGB2BGR
),
270,
True
),
dict[b'data']
)
labels=dict[b'labels']
return list(images),labels
def load_cifar10(base_dir:str,rotate_ratio=0.1,flip_ratio=0.1,croppedSize=None):
train_flie_list=glob.glob(os.path.join(base_dir,"data_batch_*"))
test_file_list=glob.glob(os.path.join(base_dir,"test_batch"))
train_image=[]
train_label=[]
test_image=[]
test_label=[]
logging.info("train data file loading....")
for file_path in tqdm.tqdm(train_flie_list):
images,labels=file_loader(file_path)
train_image.extend(images)
train_label.extend(labels)
logging.info("test file loading....")
for file_path in tqdm.tqdm(test_file_list):
images,labels=file_loader(file_path)
test_image.extend(images)
test_label.extend(labels)
logging.info("data preprocessing")
train_data_tensor,train_label_tensor=preprocess(train_image,train_label,True,rotate_ratio,flip_ratio,croppedSize)
test_data_tensor,test_label_tensor=preprocess(test_image,test_label,False,rotate_ratio,flip_ratio,croppedSize)
return train_data_tensor,test_data_tensor,train_label_tensor,test_label_tensor
def rotate_image(img,rotate,keep_size=False):
height, width = img.shape[:2]
if not keep_size:
heightNew = int(width * math.fabs(math.sin(math.radians(rotate))) + height * math.fabs(math.cos(math.radians(rotate))))
widthNew = int(height * math.fabs(math.sin(math.radians(rotate))) + width * math.fabs(math.cos(math.radians(rotate))))
else:
heightNew=height
widthNew=width
matRotation = cv2.getRotationMatrix2D((width / 2, height / 2), rotate, 1)
matRotation[0, 2] += (widthNew - width) / 2
matRotation[1, 2] += (heightNew - height) / 2
imgRotation = cv2.warpAffine(img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255))
return imgRotation
def preprocess(images_list,label_list,is_train=True,rotate_ratio=0.1,flip_ratio=0.1,cropSzie=None):
rotate_angle=[30,60,90]
flip_code=[1]
if cropSzie==None:
offset=0
else:
offset=(images_list[0].shape[0]-cropSzie)//2
cropped_size=images_list[0].shape[0]-offset
cropSzie=images_list[0].shape[0]-2*offset
if not is_train:
image_element_tensor=[item[offset:cropped_size,offset:cropped_size,:].reshape(1,cropSzie,cropSzie,3) for item in images_list]
return np.concatenate(image_element_tensor,axis=0).astype(np.float32),build_onehot(label_list,10).astype(np.float32)
else:
smaple_idx_list=random.sample(range(0,len(images_list)),int(len(images_list)*rotate_ratio))
smaple_flip_idx_list=random.sample(range(0,len(images_list)),int(len(images_list)*flip_ratio))
rotated_images=list(map(lambda x:rotate_image(images_list[x],np.random.choice(rotate_angle),True),smaple_idx_list))
rotate_image_labels=[label_list[item] for item in smaple_idx_list]
fliped_images=list(map(lambda x:cv2.flip(images_list[x],np.random.choice(flip_code)),smaple_flip_idx_list))
fliped_image_labels=[label_list[item] for item in smaple_flip_idx_list]
images_list.extend(rotated_images)
label_list.extend(rotate_image_labels)
images_list.extend(fliped_images)
label_list.extend(fliped_image_labels)
image_element_tensor=[item[offset:cropped_size,offset:cropped_size,:].reshape(1,cropSzie,cropSzie,3) for item in images_list]
return np.concatenate(image_element_tensor,axis=0).astype(np.float32),build_onehot(label_list,10).astype(np.float32)
def build_onehot(labels,label_num):
label_tensor=np.zeros((len(labels),label_num),dtype=np.int)
for i in range(len(labels)):
label_tensor[i,labels[i]]=1
return label_tensor
def load_label_map(base_dir):
file_path=os.path.join(base_dir,"batches.meta")
with open(file_path, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return [str(item, encoding = "utf-8") for item in dict[b'label_names']]
if __name__ == "__main__":
logger = logging.getLogger() # initialize logging class
logger.setLevel(logging.DEBUG) # default log level
format = logging.Formatter("%(asctime)s - %(message)s") # output format
sh = logging.StreamHandler(stream=sys.stdout) # output to standard output
sh.setFormatter(format)
logger.addHandler(sh)
data_loader=Cifa10_data("C:\\Users\\rebel\\.keras\\datasets\\cifar-10-batches-py",128,0.25,0.25,28,3)
print(data_loader.test_data_tensor.shape)
print(data_loader.train_data_tensor.shape)
print(data_loader.get_validate_datas()[0].shape)
注意,这里用的是cifar10 python版本的数据。
然后在model1的基础上进行修改
值得一提的是上一篇文章中模型在对图片标准化的过程中有bug....本来应该除标准差,结果除成方差了.....(已经在repo中修复)
mean,var=tf.nn.moments(x_image,[1,2],keep_dims=True)
x_image=tf.subtract(x_image,mean)
x_image=tf.divide(x_image,tf.sqrt(var)) #这里,修复bug
1.先修改模型的输入,因为这里是彩色图像所以输入维度要改成
cropSize=28
x=tf.placeholder(shape=[None,cropSize,cropSize,3],dtype=tf.float32)
y=tf.placeholder(shape=[None,10],dtype=tf.float32)
keep=tf.placeholder(tf.float32)
#change 1:normalize input
mean,var=tf.nn.moments(x,[1,2],keep_dims=True)
x_image__=tf.subtract(x,mean)
x_image1=tf.divide(x_image__,tf.sqrt(var))
2.由于我们的数据读入使用 Cifa10_data 类,所以对训练和测试的数据读入也有小小的修改
然后总steps 设置为10000步
按照之前的方法,训练分两个阶段第一个阶段用adam 第二个阶段用sgd。直接train一把,果不其然,adam这玩意很难伺候,经常train到3000-6000步左右时梯度崩了,loss变成了nan值.....
经过不停的修改学习率,总算train下来了:
acc:74.2%,第一阶段train 的dropout keep 0.6 第二阶段 keep 0.9 flip_ratio 和 rotate ratio 均为0.05, 学习率 adam 4e-5 sgd:4e-6
在训练过程中发现 train acc 和 validation acc 差距比较大,考虑可能有点过拟合,所以我又修改了一下:
第一阶段train 的dropout keep 0.5 第二阶段 keep 1.0
acc:75.4%
第一阶段一共7000步第二阶段3000步,训练时长13min左右
emmmm...超不过
在调整第一阶段训练和第二阶段训练的过程中突然想到可以加入wam up的过程,取一个比较大的学习率训几步然后再开始第一阶段第二阶段这样会不会比较好呢。
这样就变成了:
第一阶段:adam 学习率:4e-4 2000步 keep=0.3
第二阶段: adam 学习率:4e-5 5000步 keep=0.5
第三阶段:sgd 学习率:4e-6 3000步 keep=1.0
同时把flip ratio 改到了 0.1 引入更多的水平镜像
但是我手滑了一下.... 导致第二阶段和第一阶段连在了一起,也就是说前2000步在训练时train了两次.... 实际一共训了 12000步
最终 acc:达到了 77.48% 超过 baseline 1.2%个点,训练时间16min。
改正手滑后最终 acc:77.23% 训练时间 13min。
最终证明了我的小模型(model1)超过了lenet5!
代码放在:https://github.com/lordrebel/beatLenet5 model1_cifar10