最近因实验室的算力紧张,就尝试使用算力平台来进行实验,这里总结了几个我使用过的的平台,以及我目前一直使用的OpenBayes的有关情况,以下都是个人使用感受,非常主观,如有不同,就是我错了 :)。
OpenBayes贝式计算:价格适中,不是最贵的也不是最便宜的,但是用起来的综合性价比是最高的,主要是交互舒服,卡的种类也比较多,不会有各种乱七八糟的bug,并且网速很快,每次下载环境和数据集很快,这个其实也蛮重要的
九天必昇:目前就是V100的16GB和32GB可供选择,虽然可以白嫖,但是有两个很大的问题:一是资源太紧张了,根本抢不到;二是太卡了,每次点开一个python文件就要卡半天
驱动云:驱动云对新用户的活动很友好,但是不太稳定,平台也没有明确的算力划分,不知道到底用的是什么卡,虽然选择很多,但是都特别特别模糊,感觉最快的那部分应该是3090的suan'l
用我的专属链接,注册 OpenBayes,新用户可获得 4 小时 RTX 4090 + 5小时 CPU 的免费使用时长,永久有效
https://openbayes.com/console/signup?r=Fywoooo_6Yu9
具体算力价格对比:
OpenBayes:
驱动云:
九天必昇:
下面使用OpenBayes运行视觉提示 Visual Prompt的代码:
OpenBayes已经帮忙准备好了pytorch、tensorflow等和CUDA配套的深度学习包,这里可以直接选择,这里实在太棒了,不用去苦苦配环境了,在这里我们选择2.0的pytorch版本,运行实例
然后在git clone之后下载环境,可以看到这里的下载速度一般都是10MB/S或 20MB/S以上,体验很好,不用一直苦苦等待下环境了
这里我直接使用VP + CLIP的代码,也就是main_clip.py,可以看到下载预训练模型的速度也是相当快的,达到30MB/s 以上
下载CIFAR100数据集的速度也相当快:
训练代码如下:
from __future__ import print_function
import argparse
import os
from tqdm import tqdm
import time
import random
import wandb
import torch
import torch.backends.cudnn as cudnn
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR100
import clip
from models import prompters
from utils import accuracy, AverageMeter, ProgressMeter, save_checkpoint
from utils import cosine_lr, convert_models_to_fp32, refine_classname
def parse_option():
parser = argparse.ArgumentParser('Visual Prompting for CLIP')
parser.add_argument('--print_freq', type=int, default=10,
help='print frequency')
parser.add_argument('--save_freq', type=int, default=50,
help='save frequency')
parser.add_argument('--batch_size', type=int, default=256,
help='batch_size')
parser.add_argument('--num_workers', type=int, default=16,
help='num of workers to use')
parser.add_argument('--epochs', type=int, default=1000,
help='number of training epoch5s')
# optimization
parser.add_argument('--optim', type=str, default='sgd',
help='optimizer to use')
parser.add_argument('--learning_rate', type=float, default=40,
help='learning rate')
parser.add_argument("--weight_decay", type=float, default=0,
help="weight decay")
parser.add_argument("--warmup", type=int, default=1000,
help="number of steps to warmup for")
parser.add_argument('--momentum', type=float, default=0.9,
help='momentum')
parser.add_argument('--patience', type=int, default=1000)
# model
parser.add_argument('--model', type=str, default='clip')
parser.add_argument('--arch', type=str, default='vit_b32')
parser.add_argument('--method', type=str, default='padding',
choices=['padding', 'random_patch', 'fixed_patch'],
help='choose visual prompting method')
parser.add_argument('--prompt_size', type=int, default=30,
help='size for visual prompts')
# dataset
parser.add_argument('--root', type=str, default='./data',
help='dataset')
parser.add_argument('--dataset', type=str, default='cifar100',
help='dataset')
parser.add_argument('--image_size', type=int, default=224,
help='image size')
# other
parser.add_argument('--seed', type=int, default=0,
help='seed for initializing training')
parser.add_argument('--model_dir', type=str, default='./save/models',
help='path to save models')
parser.add_argument('--image_dir', type=str, default='./save/images',
help='path to save images')
parser.add_argument('--filename', type=str, default=None,
help='filename to save')
parser.add_argument('--trial', type=int, default=1,
help='number of trials')
parser.add_argument('--resume', type=str, default=None,
help='path to resume from checkpoint')
parser.add_argument('--evaluate', default=False,
action="store_true",
help='evaluate model test set')
parser.add_argument('--gpu', type=int, default=None,
help='gpu to use')
parser.add_argument('--use_wandb', default=False,
action="store_true",
help='whether to use wandb')
args = parser.parse_args()
args.filename = '{}_{}_{}_{}_{}_{}_lr_{}_decay_{}_bsz_{}_warmup_{}_trial_{}'. \
format(args.method, args.prompt_size, args.dataset, args.model, args.arch,
args.optim, args.learning_rate, args.weight_decay, args.batch_size, args.warmup, args.trial)
return args
best_acc1 = 0
device = "cuda" if torch.cuda.is_available() else "cpu"
def main():
global best_acc1, device
args = parse_option()
print (args)
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
# create model
model, preprocess = clip.load('ViT-B/32', device, jit=False)
convert_models_to_fp32(model)
model.eval()
prompter = prompters.__dict__[args.method](args).to(device)
# optionally resume from a checkpoint
if args.resume:
if os.path.isfile(args.resume):
print("=> loading checkpoint '{}'".format(args.resume))
if args.gpu is None:
checkpoint = torch.load(args.resume)
else:
# Map model to be loaded to specified single gpu.
loc = 'cuda:{}'.format(args.gpu)
checkpoint = torch.load(args.resume, map_location=loc)
args.start_epoch = checkpoint['epoch']
best_acc1 = checkpoint['best_acc1']
if args.gpu is not None:
# best_acc1 may be from a checkpoint from a different GPU
best_acc1 = best_acc1.to(args.gpu)
prompter.load_state_dict(checkpoint['state_dict'])
print("=> loaded checkpoint '{}' (epoch {})"
.format(args.resume, checkpoint['epoch']))
else:
print("=> no checkpoint found at '{}'".format(args.resume))
# create data
template = 'This is a photo of a {}'
print(f'template: {template}')
train_dataset = CIFAR100(args.root, transform=preprocess,
download=True, train=True)
val_dataset = CIFAR100(args.root, transform=preprocess,
download=True, train=False)
train_loader = DataLoader(train_dataset,
batch_size=args.batch_size, pin_memory=True,
num_workers=args.num_workers, shuffle=True)
val_loader = DataLoader(val_dataset,
batch_size=args.batch_size, pin_memory=True,
num_workers=args.num_workers, shuffle=False)
class_names = train_dataset.classes
class_names = refine_classname(class_names)
texts = [template.format(label) for label in class_names]
# define criterion and optimizer
optimizer = torch.optim.SGD(prompter.parameters(),
lr=args.learning_rate,
momentum=args.momentum,
weight_decay=args.weight_decay)
criterion = torch.nn.CrossEntropyLoss().to(device)
scaler = GradScaler()
total_steps = len(train_loader) * args.epochs
scheduler = cosine_lr(optimizer, args.learning_rate, args.warmup, total_steps)
cudnn.benchmark = True
# make dir
refined_template = template.lower().replace(' ', '_')
args.filename = f'{args.filename}_template_{refined_template}'
args.model_folder = os.path.join(args.model_dir, args.filename)
if not os.path.isdir(args.model_folder):
os.makedirs(args.model_folder)
# wandb
if args.use_wandb:
wandb.init(project='Visual Prompting')
wandb.config.update(args)
wandb.run.name = args.filename
wandb.watch(prompter, criterion, log='all', log_freq=10)
if args.evaluate:
acc1 = validate(val_loader, texts, model, prompter, criterion, args)
return
epochs_since_improvement = 0
for epoch in range(args.epochs):
# train for one epoch
train(train_loader, texts, model, prompter, optimizer, scheduler, criterion, scaler, epoch, args)
# evaluate on validation set
acc1 = validate(val_loader, texts, model, prompter, criterion, args)
# remember best acc@1 and save checkpoint
is_best = acc1 > best_acc1
best_acc1 = max(acc1, best_acc1)
save_checkpoint({
'epoch': epoch + 1,
'state_dict': prompter.state_dict(),
'best_acc1': best_acc1,
'optimizer': optimizer.state_dict(),
}, args, is_best=is_best)
if is_best:
epochs_since_improvement = 0
else:
epochs_since_improvement += 1
print(f"There's no improvement for {epochs_since_improvement} epochs.")
if epochs_since_improvement >= args.patience:
print("The training halted by early stopping criterion.")
break
wandb.run.finish()
def train(train_loader, texts, model, prompter, optimizer, scheduler, criterion, scaler, epoch, args):
batch_time = AverageMeter('Time', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
progress = ProgressMeter(
len(train_loader),
[batch_time, data_time, losses, top1],
prefix="Epoch: [{}]".format(epoch))
# switch to train mode
prompter.train()
num_batches_per_epoch = len(train_loader)
end = time.time()
for i, (images, target) in enumerate(tqdm(train_loader)):
# measure data loading time
data_time.update(time.time() - end)
# adjust learning rate
step = num_batches_per_epoch * epoch + i
scheduler(step)
optimizer.zero_grad()
images = images.to(device)
target = target.to(device)
text_tokens = clip.tokenize(texts).to(device)
# with automatic mixed precision
with autocast():
prompted_images = prompter(images)
output, _ = model(prompted_images, text_tokens)
loss = criterion(output, target)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
# Note: we clamp to 4.6052 = ln(100), as in the original paper.
model.logit_scale.data = torch.clamp(model.logit_scale.data, 0, 4.6052)
# measure accuracy
acc1 = accuracy(output, target, topk=(1,))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0].item(), images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
progress.display(i)
if args.use_wandb:
wandb.log({
'training_loss': losses.avg,
'training_acc': top1.avg
})
if i % args.save_freq == 0:
save_checkpoint({
'epoch': epoch + 1,
'state_dict': prompter.state_dict(),
'best_acc1': best_acc1,
'optimizer': optimizer.state_dict(),
}, args)
return losses.avg, top1.avg
def validate(val_loader, texts, model, prompter, criterion, args):
batch_time = AverageMeter('Time', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1_org = AverageMeter('Original Acc@1', ':6.2f')
top1_prompt = AverageMeter('Prompt Acc@1', ':6.2f')
progress = ProgressMeter(
len(val_loader),
[batch_time, losses, top1_org, top1_prompt],
prefix='Validate: ')
# switch to evaluation mode
prompter.eval()
with torch.no_grad():
end = time.time()
for i, (images, target) in enumerate(tqdm(val_loader)):
images = images.to(device)
target = target.to(device)
text_tokens = clip.tokenize(texts).to(device)
prompted_images = prompter(images)
# compute output
output_prompt, _ = model(prompted_images, text_tokens)
output_org, _ = model(images, text_tokens)
loss = criterion(output_prompt, target)
# measure accuracy and record loss
acc1 = accuracy(output_prompt, target, topk=(1,))
losses.update(loss.item(), images.size(0))
top1_prompt.update(acc1[0].item(), images.size(0))
acc1 = accuracy(output_org, target, topk=(1,))
top1_org.update(acc1[0].item(), images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
progress.display(i)
print(' * Prompt Acc@1 {top1_prompt.avg:.3f} Original Acc@1 {top1_org.avg:.3f}'
.format(top1_prompt=top1_prompt, top1_org=top1_org))
if args.use_wandb:
wandb.log({
'val_loss': losses.avg,
'val_acc_prompt': top1_prompt.avg,
'val_acc_org': top1_org.avg,
})
return top1_prompt.avg
if __name__ == '__main__':
main()
模型训练中:
可以看到在经过第一个epoch之后,准确率就达到了66.85%,超过zero-shot的63.1%,模型收敛很快