部署运行
本阶段任务是设计部署流程、编写lora部署代码。
首先需要编写模型导入代码
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ckpt_dir = '/root/zsk/MAGAer13/mplug-owl-bloomz-7b-multilingual/'
self.model, self.tokenizer, self.processor = get_model(pretrained_ckpt=ckpt_dir, use_bf16=True)
self.model = self.model.to(self.device)
print('Model loaded.')
使用初始权重get_model方法
def get_model(pretrained_ckpt, use_bf16=False):
"""Model Provider with tokenizer and processor.
Args:
pretrained_ckpt (string): The path to pre-trained checkpoint.
use_bf16 (bool, optional): Whether to use bfloat16 to load the model. Defaults to False.
Returns:
model: MplugOwl Model
tokenizer: MplugOwl text tokenizer
processor: MplugOwl processor (including text and image)
"""
model = MplugOwlForConditionalGeneration.from_pretrained(
pretrained_ckpt,
torch_dtype=torch.bfloat16 if use_bf16 else torch.half,
)
image_processor = MplugOwlImageProcessor.from_pretrained(pretrained_ckpt)
tokenizer = AutoTokenizer.from_pretrained(pretrained_ckpt)
processor = MplugOwlProcessor(image_processor, tokenizer)
return model, tokenizer, processor
使用LoRA初始权重get_model方法
由于使用LoRA训练后得到的权重Key和原始权重Key不对应,因此需要进行公式转换,具体为
代码如下:
def get_lora_model(pretrained_ckpt, use_bf16=False):
print(pretrained_ckpt)
model = MplugOwlForConditionalGeneration.from_pretrained(
pretrained_ckpt,
torch_dtype=torch.bfloat16 if use_bf16 else torch.half,
)
peft_config = LoraConfig(
target_modules=r'.*language_model.*\.query_key_value',
inference_mode=True,
r=8,
lora_alpha=32,
lora_dropout=0
)
model = get_peft_model(model, peft_config)
print('start load lora model')
model.load_state_dict(torch.load(pretrained_ckpt))
print('loaded pretrained lora model')
# model.print_trainable_parameters()
for param in model.parameters():
# freeze base model's layers
param.requires_grad = False
image_processor = MplugOwlImageProcessor.from_pretrained(pretrained_ckpt)
tokenizer = AutoTokenizer.from_pretrained(pretrained_ckpt)
processor = MplugOwlProcessor(image_processor, tokenizer)
return model, tokenizer, processor
生成代码do_generation
def do_generate(prompts, image_list, model, tokenizer, processor, use_bf16=False, **generate_kwargs):
"""The interface for generation
Args:
prompts (List[str]): The prompt text
image_list (List[str]): Paths of images
model (MplugOwlForConditionalGeneration): MplugOwlForConditionalGeneration
tokenizer (AutoTokenizer): AutoTokenizer
processor (MplugOwlProcessor): MplugOwlProcessor
use_bf16 (bool, optional): Whether to use bfloat16. Defaults to False.
Returns:
sentence (str): Generated sentence.
"""
if image_list:
images = [Image.open(_) for _ in image_list]
else:
images = None
inputs = processor(text=prompts, images=images, return_tensors='pt')
inputs = {k: v.bfloat16() if v.dtype == torch.float else v for k, v in inputs.items()}
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.no_grad():
res = model.generate(**inputs, **generate_kwargs)
sentence = tokenizer.decode(res.tolist()[0], skip_special_tokens=True)
return sentence