问题:测试xformers提速时,提示没有triton模块。
解决:到huggingface下载对应python版本安装,问题解决。
https://huggingface.co/madbuda/triton-windows-builds/tree/main

import torch
import xformers.ops
import time
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 8
sequence_length = 512
d_model = 64
query = torch.randn(batch_size, sequence_length, d_model, device=device)
key = torch.randn(batch_size, sequence_length, d_model, device=device)
value = torch.randn(batch_size, sequence_length, d_model, device=device)
torch.cuda.synchronize()
start_time = time.time()
# without xformers
attn_weights = torch.matmul(query, key.transpose(-2, -1))
attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1)
output_without_xformers = torch.matmul(attn_weights, value)
torch.cuda.synchronize()
time_without_xformers = time.time() - start_time
torch.cuda.synchronize()
start_time = time.time()
# xformers
output_with_xformers = xformers.ops.memory_efficient_attention(query, key, value)
torch.cuda.synchronize()
time_with_xformers = time.time() - start_time
print(f"Time without xformers: {time_without_xformers:.6f} seconds")
print(f"Time with xformers: {time_with_xformers:.6f} seconds")