def main(num_samples=10,max_num_epochs=10,gpus_per_trial=1):
data_dir=os.path.abspath('./data')
load_data(data_dir)
config={
"l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
"l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
"lr": tune.loguniform(1e-4, 1e-1),
"batch_size": tune.choice([2, 4, 8, 16])
}
scheduler=ASHAScheduler(metric='loss',mode='min',max_t=max_num_epochs,
grace_period=1,reduction_factor=2)
#定义显示的一些指标
reporter=CLIReporter(
parameter_columns=['l1','l2','lr','batch_size'],
metric_columns=['loss','accuracy','training_iteration'])
#使用偏函数partial,partial(函数,给函数的参数)
# result=tune.run(partial(train_cifar,data_dir=data_dir),
# resources_per_trial={"cpu": 1, "gpu": gpus_per_trial},config=config,
# num_samples=num_samples,scheduler=scheduler,progress_reporter=reporter)
result = tune.run(
partial(train_cifar, data_dir=data_dir),
resources_per_trial={"cpu": 1, "gpu": gpus_per_trial},
config=config,
num_samples=num_samples,
scheduler=scheduler,
progress_reporter=reporter)
best_trial=result.get_best_trial('loss','min','last')
print('最好的配置{}'.format(best_trial.config))
print('最好的验证损失:{}'.format(best_trial.last_result['loss']))
print('最好的最后验证精度:{}'.format(best_trial.last_result['accuracy']))
best_trained_model=Net(best_trial.config['l1'],best_trial.config['l2'])
device='cuda:0' if torch.cuda.is_available() else 'cpu'
best_trained_model.to(device)
best_checkpoint_dir=best_trial.checkpoint.value
model_state,optimizer_state=torch.load(os.path.join(best_checkpoint_dir,'checkpoint'))
best_trained_model.load_state_dict(model_state)
test_acc=test_accuracy(best_trained_model,device)
print('最好的测试集精度:{}'.format(test_acc))
pytorch官网超参调节教程Hyperparameter tuning with Ray Tune — PyTorch Tutorials 1.12.0+cu102 documentation
看了后有些疑惑,去ray的官网查看
ray在调参时如果报错Trials did not complete,那就是前面模型的定定义和使用除了问题,导致无法运行