一. 原测试代码
from transformers import AutoModelForCausalLM, AutoTokenizer
# 设定使用的设备
device = "cuda" # 指定模型加载到的设备
# 加载模型和分词器
model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2-0.5B-Instruct",
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
# 定义提示和消息
prompt = "Give me a short introduction to large language model."
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
# 应用聊天模板
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# 创建模型输入
model_inputs = tokenizer([text], return_tensors="pt", padding=True, truncation=True)
print("Tokenized input:", model_inputs)
# 获取注意力掩码
attention_mask = model_inputs['attention_mask']
# 生成文本
generated_ids = model.generate(
input_ids=model_inputs['input_ids'],
attention_mask=attention_mask, # 传递注意力掩码
max_new_tokens=512
)
# 处理生成的输出
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs['input_ids'], generated_ids)
]
# 解码生成的文本
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
# 输出结果
print(response)
二.API接口
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
app = FastAPI()
# 加载模型和分词器
model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2-0.5B-Instruct",
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
# 设定使用的设备
device = "cuda" if torch.cuda.is_available() else "cpu"
class PromptRequest(BaseModel):
prompt: str = "Give me a short introduction to large language model."
@app.post("/generate")
async def generate(prompt_request: PromptRequest):
prompt = prompt_request.prompt
# 定义消息
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
# 应用聊天模板
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# 创建模型输入
model_inputs = tokenizer([text], return_tensors="pt", padding=True, truncation=True).to(device)
# 获取注意力掩码
attention_mask = model_inputs['attention_mask']
# 生成文本
generated_ids = model.generate(
input_ids=model_inputs['input_ids'],
attention_mask=attention_mask,
max_new_tokens=512
)
# 处理生成的输出
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs['input_ids'], generated_ids)
]
# 解码生成的文本
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return {"response": response}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
终端输入 uvicorn app:app --reload 1.app(py文件名)
三.多轮对话
from modelscope import AutoTokenizer, AutoModelForCausalLM
local_model = "../Qwen2-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct", torch_dtype="auto", device_map="auto", cache_dir=local_model)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct", cache_dir=local_model)
# 初始化对话历史
dialog_history = []
while True:
prompt = input("输入对话:")
if prompt == "q":
break
# 更新对话历史
dialog_history.append({"role": "user", "content": prompt})
# 构建模型输入,包括系统消息和对话历史
messages = [{"role": "system", "content": "You are a helpful assistant. "}] + dialog_history
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt")
# 生成响应
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
# 将响应添加到对话历史
dialog_history.append({"role": "assistant", "content": response})
print(f"回答:{response}")
四.构建人设
from flask import Flask, request, jsonify
from modelscope import AutoTokenizer, AutoModelForCausalLM
app = Flask(__name__)
# 加载模型和分词器
local_model = "../Qwen2-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct", torch_dtype="auto", device_map="auto", cache_dir=local_model)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct", cache_dir=local_model)
# 定义角色和性格特征
role_name = "name"
personality_traits = "knowledgeable, helpful, and humorous"
system_message = f"You are playing the role of {role_name}, a {personality_traits} assistant."
# 初始化对话历史
dialog_history = []
@app.route('/talk', methods=['POST'])
def talk():
global dialog_history
data = request.get_json()
prompt = data.get('prompt')
if prompt == "q":
return jsonify({"response": "Goodbye!", "role": role_name}), 200
# 更新对话历史
dialog_history.append({"role": "user", "content": prompt})
# 构建模型输入,包括系统消息和对话历史
messages = [{"role": "system", "content": system_message}] + dialog_history
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt")
# 生成响应
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
# 将响应添加到对话历史
dialog_history.append({"role": "assistant", "content": response})
return jsonify({"response": response, "role": role_name}), 200
if __name__ == '__main__':
app.run(debug=True)
五.打印参数
from transformers import AutoModelForCausalLM, AutoTokenizer
model_directory = "../Qwen2-0.5B-Instruct" # 替换为你的模型目录路径
model = AutoModelForCausalLM.from_pretrained(model_directory)
tokenizer = AutoTokenizer.from_pretrained(model_directory)
for name, param in model.named_parameters():
print(f"Name: {name}")
print(f"Type: {type(param.data)}")
print(f"Shape: {param.shape}")
print(f"Trainable: {param.requires_grad}\n")
def calculate_total_params(model):
return sum(param.numel() for param in model.parameters())
total_params = calculate_total_params(model)
print(f"Total number of parameters: {total_params}")
trainable_params = sum(param.numel() for param in model.parameters() if param.requires_grad)
non_trainable_params = sum(param.numel() for param in model.parameters() if not param.requires_grad)
print(f"Trainable parameters: {trainable_params}")
print(f"Non-trainable parameters: {non_trainable_params}")