简介
evaluate库是一个机器学习模型评估函数库,可以加载各种任务的评估函数
加载评估函数
accuracy=evaluate.load("accuracy")
accuracy
#全局计算
accuracy=evaluate.load("accuracy")
results=accuracy.compute(reference=[0,1,2,0,1,2],predictions=[0,1,2,1,2,1])
results
#迭代计算
accuracy=evaluate.load("accuracy")
for ref,pred in zip([0,1,0,1],[1,0,0,1]):
accuracy.add(reference=ref,predictions=pred)
accuracy.compute()
accuracy=evaluate.load("accuracy")
for refs,preds in zip([[0,1],[0,1]],[[1,0],[0,1]]):
accuracy.add(reference=refs,predictions=preds)
accuracy.compute()
#多个评估指标计算
clf_metrics=evaluate.combine(["accuracy","f1","recall","precision"])
clf_metrics
clf_metrics.combine(precision=[0,1,0],reference=[0,1,1])
#评估结果对比可视化
from evaluate.visualization import radar_plot
data=[
{"accuracy":0.99,"precision":0.8,"f1"=0.95,"latency_in_seconds":33.6},
{"accuracy":0.98,"precision":0.87,"f1"=0.91,"latency_in_seconds":11.2},
{"accuracy":0.98,"precision":0.78,"f1"=0.88,"latency_in_seconds":87.6},
{"accuracy":0.88,"precision":0.78,"f1"=0.81,"latency_in_seconds":101.6},
]
model_names=["Model 1","Model 2","Model 3","Model 4"]
plot=radar_plot(data=data,model_names=model_names)
模型微调
###微调前
def evaluate():
model.eval()
with torch.inference)model():
for batch in validloader:
if torch.cuda.is_evailable():
batch={k:v.cuda() for k,v in batch.items()}
output=model(**batch)
pred=torch.argmax(output.logits,dim=-1)
acc_num+=(pred.long()==batch["labels"].long()).float().sum()
return acc_num/len(validset)
def train(epoch=3,log_step=100)
global_step=0
for ep in range(epoch):
model.train()
for batch in trainloader:
if torch.cude.is_available():
batch={k:v.cuda() for k,v in batch.items()}
optimizer.zero_grad(0
output=model(**batch)
output.loss.backward()
optimizer.step()
if global_step % log_step==0:
print(f"ep:{ep},global_step:{global_step},loss:{output.loss.item()}")
global_step+=1
acc=evaluate()
print(f"ep:{ep},acc:{acc}")
###
import evaluate
clf_metrics=evaluate.combine(["accuracy","f1"])
def evaluate():
model.eval()
with torch.inference)model():
for batch in validloader:
if torch.cuda.is_evailable():
batch={k:v.cuda() for k,v in batch.items()}
output=model(**batch)
pred=torch.argmax(output.logits,dim=-1)
clf_metrics.add_batch(prediction=pred.long(),reference=batch["label"].long())
return clf_metrics,compute()
def train(epoch=3,log_step=100)
global_step=0
for ep in range(epoch):
model.train()
for batch in trainloader:
if torch.cude.is_available():
batch={k:v.cuda() for k,v in batch.items()}
optimizer.zero_grad(0
output=model(**batch)
output.loss.backward()
optimizer.step()
if global_step % log_step==0:
print(f"ep:{ep},global_step:{global_step},loss:{output.loss.item()}")
global_step+=1
clf=evaluate()
print(f"ep:{ep},{clf}")