环境
Paddle>=1.2
cuda9 cudnn7
Python3.5
train关键代码段
def train(model):
predict,loss,iou = create_model(model=model)
optimizer = fluid.optimizer.Adam(learning_rate=1e-4)
optimizer.minimize(loss)
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
fluid.memory_optimize(fluid.default_main_program(),print_log=False, skip_opt_set=set([loss.name,predict.name]))
if pretrain_model:
load_model(exe,fluid.default_main_program(),model=model)
print("load model succeed")
else:
print("load succeed")
def trainLoop():
batches = DataSet.get_batch_generator(1, total_step)
iou_count = 0
mean_iou = 0
iou_sum = 0
for i, imgs, labels, names in batches:
preTime = time.time()
result = exe.run(fluid.default_main_program(),
feed={'img': imgs,
'label': labels},
fetch_list=[loss,predict,iou])
nowTime = time.time()
iou_sum += result[2]
iou_count += 1
mean_iou = iou_sum/iou_count
print(' iou = ',result[2],'mean_iou = ',mean_iou)
if iou_count % 1000 == 0:
iou_count = 0
iou_sum = 0
if i % 1000 == 0 and i!= 0:
print("Model saved")
save_model(exe,fluid.default_main_program(),model=model)
if i % 10 ==0:
train_path = path+'/train.png'
picture = result[1]
picture = np.argmax(picture,axis=-1)
picture = picture.reshape((1024,1024))
saveImage(picture,train_path)
label_path = path+'/trainlabel.png'
train_lab = np.argmax(labels[0],axis=2)
saveImage(train_lab,label_path)
if i % 20 == 0:
argmax = np.argmax(result[1],axis=1)
abc = Counter(argmax)
print(' ',abc)
if i % 2 == 0:
print("step {:d},loss {:.6f},step_time: {:.3f}".format(
i,result[0][0],nowTime - preTime))
trainLoop()
if __name__ == "__main__":
parse = argparse.ArgumentParser(description='')
parse.add_argument('--model', help='model name', nargs='?')
args = parse.parse_args()
model = args.model
DataSet = create_reader(model)
train(model)
完整代码见:https://github.com/qixuxiang/Baidu_Lane_Segmentation/blob/master/train.py
上面代码改成多GPU训练版本只需要改三处:
一. 在exe.run(fluid.default_startup_program())
后加入以下四行:
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_threads = fluid.core.get_cuda_device_count()
exec_strategy.num_iteration_per_drop_scope = 100
build_strategy = fluid.BuildStrategy()
binary = fluid.compiler.CompiledProgram(fluid.default_main_program()).with_data_parallel(
loss_name=loss.name,build_strategy=build_strategy,exec_strategy=exec_strategy)
二. 在trainLoop()
中把fluid.default_main_program()
替换为binary
三. 最后把batchsize改为显卡数或者显卡整数倍即可。