studio-conda -t lmdeploy -o pytorch-2.1.2
conda activate lmdeploy
pip install lmdeploy[all]==0.3.0
ls /root/share/new_models/Shanghai_AI_Laboratory/
touch /root/pipeline_transformer.py
将以下内容复制粘贴进入pipeline_transformer.py
。
import torch from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("/root/internlm2-chat-1_8b", trust_remote_code=True) # Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and cause OOM Error. model = AutoModelForCausalLM.from_pretrained("/root/internlm2-chat-1_8b", torch_dtype=torch.float16, trust_remote_code=True).cuda() model = model.eval() inp = "hello" print("[INPUT]", inp) response, history = model.chat(tokenizer, inp, history=[]) print("[OUTPUT]", response) inp = "please provide three suggestions about time management" print("[INPUT]", inp) response, history = model.chat(tokenizer, inp, history=history) print("[OUTPUT]", response)
python /root/pipeline_transformer.py