Qwen2-0.5b

一. 原测试代码

from transformers import AutoModelForCausalLM, AutoTokenizer

# 设定使用的设备
device = "cuda"  # 指定模型加载到的设备

# 加载模型和分词器
model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2-0.5B-Instruct",
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")

# 定义提示和消息
prompt = "Give me a short introduction to large language model."
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": prompt}
]

# 应用聊天模板
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

# 创建模型输入
model_inputs = tokenizer([text], return_tensors="pt", padding=True, truncation=True)
print("Tokenized input:", model_inputs)
# 获取注意力掩码
attention_mask = model_inputs['attention_mask']

# 生成文本
generated_ids = model.generate(
    input_ids=model_inputs['input_ids'],
    attention_mask=attention_mask,  # 传递注意力掩码
    max_new_tokens=512
)

# 处理生成的输出
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs['input_ids'], generated_ids)
]

# 解码生成的文本
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

# 输出结果
print(response)

二.API接口

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

app = FastAPI()

# 加载模型和分词器
model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2-0.5B-Instruct",
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")

# 设定使用的设备
device = "cuda" if torch.cuda.is_available() else "cpu"

class PromptRequest(BaseModel):
    prompt: str = "Give me a short introduction to large language model."

@app.post("/generate")
async def generate(prompt_request: PromptRequest):
    prompt = prompt_request.prompt
    
    # 定义消息
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    
    # 应用聊天模板
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    
    # 创建模型输入
    model_inputs = tokenizer([text], return_tensors="pt", padding=True, truncation=True).to(device)
    
    # 获取注意力掩码
    attention_mask = model_inputs['attention_mask']
    
    # 生成文本
    generated_ids = model.generate(
        input_ids=model_inputs['input_ids'],
        attention_mask=attention_mask,
        max_new_tokens=512
    )
    
    # 处理生成的输出
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs['input_ids'], generated_ids)
    ]
    
    # 解码生成的文本
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    return {"response": response}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

终端输入 uvicorn app:app --reload  1.app(py文件名)

三.多轮对话

from modelscope import AutoTokenizer, AutoModelForCausalLM

local_model = "../Qwen2-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct", torch_dtype="auto", device_map="auto", cache_dir=local_model)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct", cache_dir=local_model)

# 初始化对话历史
dialog_history = []

while True:
    prompt = input("输入对话:")
    if prompt == "q":
        break

    # 更新对话历史
    dialog_history.append({"role": "user", "content": prompt})
    
    # 构建模型输入,包括系统消息和对话历史
    messages = [{"role": "system", "content": "You are a helpful assistant. "}] + dialog_history
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors="pt")
    
    # 生成响应
    generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    # 将响应添加到对话历史
    dialog_history.append({"role": "assistant", "content": response})
    
    print(f"回答:{response}")

四.构建人设

from flask import Flask, request, jsonify
from modelscope import AutoTokenizer, AutoModelForCausalLM

app = Flask(__name__)

# 加载模型和分词器
local_model = "../Qwen2-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct", torch_dtype="auto", device_map="auto", cache_dir=local_model)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct", cache_dir=local_model)

# 定义角色和性格特征
role_name = "name"
personality_traits = "knowledgeable, helpful, and humorous"
system_message = f"You are playing the role of {role_name}, a {personality_traits} assistant."

# 初始化对话历史
dialog_history = []

@app.route('/talk', methods=['POST'])
def talk():
    global dialog_history
    data = request.get_json()
    prompt = data.get('prompt')
    if prompt == "q":
        return jsonify({"response": "Goodbye!", "role": role_name}), 200

    # 更新对话历史
    dialog_history.append({"role": "user", "content": prompt})
    
    # 构建模型输入,包括系统消息和对话历史
    messages = [{"role": "system", "content": system_message}] + dialog_history
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors="pt")
    
    # 生成响应
    generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    # 将响应添加到对话历史
    dialog_history.append({"role": "assistant", "content": response})
    
    return jsonify({"response": response, "role": role_name}), 200

if __name__ == '__main__':
    app.run(debug=True)

五.打印参数

from transformers import AutoModelForCausalLM, AutoTokenizer


model_directory = "../Qwen2-0.5B-Instruct"  # 替换为你的模型目录路径
model = AutoModelForCausalLM.from_pretrained(model_directory)
tokenizer = AutoTokenizer.from_pretrained(model_directory)

for name, param in model.named_parameters():
    print(f"Name: {name}")
    print(f"Type: {type(param.data)}")
    print(f"Shape: {param.shape}")
    print(f"Trainable: {param.requires_grad}\n")
    
def calculate_total_params(model):
    return sum(param.numel() for param in model.parameters())

total_params = calculate_total_params(model)
print(f"Total number of parameters: {total_params}")

trainable_params = sum(param.numel() for param in model.parameters() if param.requires_grad)
non_trainable_params = sum(param.numel() for param in model.parameters() if not param.requires_grad)

print(f"Trainable parameters: {trainable_params}")
print(f"Non-trainable parameters: {non_trainable_params}")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值