在Ubuntu22.04系统电脑端部署Qwen2-Audio（无尿点版）-CSDN博客

本文链接：https://blog.csdn.net/brain1234/article/details/141826025

1.基本环境

ubuntu22.04

cuda 12.4

显卡：RTX2080Ti

anaconda

2 Qwen2-Audio安装

2.1 仓库克隆

# 国内github镜像克隆

git clone https://mirror.ghproxy.com/https://github.com/QwenLM/Qwen2-Audio.git

# 进入目录

cd Qwen2-Audio/

2.2 安装依赖

2.2.1 创建进入虚拟环境

# 1.创建虚拟环境

conda create -n qwen2 python=3.10

# 2.激活环境

conda activate qwen2

2.2.2安装依赖

（1）设置清华源、更新pip

pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple

python -m pip install --upgrade pip

（2）安装torch 12.4 cuda版本

下载地址：pytorch官网

pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

检验：

python
import torch
torch.__version__
torch.cuda.is_available()

在这里插入图片描述出现此内容说明pytorch安装成功

（3）安装依赖文件

cd demo
pip install -r requirements_web_demo.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

（4）安装webui界面及其他未安装依赖

pip install librosa
pip install --upgrade “accelerate>=0.21.0”
pip install django
pip install git+https://mirror.ghproxy.com/https://github.com/huggingface/transformers

（5）安装魔搭库准备下载模型文件：

pip install modelscope

（6）下载相关模型

cd ..
modelscope download --model qwen/qwen2-audio-7b-instruct --local_dir './Qwen/Qwen2-Audio-7B-Instruct'

（7）执行代码，初步启动webui界面验证

改端口:

sudo vim demo/web_demo_audio.py

在这里插入图片描述
运行：

python demo/web_demo_audio.py

输入

http://localhost:15110

成功进入
在这里插入图片描述

（8）加入声音驱动，实现真正的语音聊天（电脑需要配备有麦克风）

cd demo

pip install tenacity

创建一个新的webui界面的文件

touch test_audio.py

vim test_audio.py

输入以下内容

import gradio as gr
import modelscope_studio as mgr
import librosa
from transformers import AutoProcessor, Qwen2AudioForConditionalGeneration
from argparse import ArgumentParser
import requests
import os
from django.http import HttpResponse
from tenacity import retry, wait_fixed, stop_after_attempt, RetryError
# 默认的模型检查点路径
DEFAULT_CKPT_PATH = 'Qwen/Qwen2-Audio-7B-Instruct'

# 定义重试逻辑
@retry(wait=wait_fixed(2), stop=stop_after_attempt(5))
def post_request_with_retry(url, data):
    response = requests.post(url, json=data)
    response.raise_for_status()  # 如果返回码不是200，会引发异常
    return response

def text_to_speech(text2):
    data = {
        "text": text2,
        "text_language": "zh",
    }
    try:
        # 使用重试机制发出请求
        response = post_request_with_retry('http://127.0.0.1:15110', data)
        audio_file_path = "/home/t/Qwen2-Audio/demo/output.mp3"
        with open(audio_file_path, "wb") as f:
            f.write(response.content)
        return audio_file_path
    except RetryError:
        print("请求失败，已达到最大重试次数")
        return None
    except requests.RequestException as e:
        print(f"请求失败：{e}")
        return None
        
if response.status_code == 200:
    audio_file_path = "/root/project/Qwen2-Audio/demo/output.mp3"
    with open(audio_file_path, "wb") as f:
        f.write(response.content)
    return audio_file_path
else:
    print(f"错误：请求失败，状态码为 {response.status_code}")
    return None

def _get_args():
    """
    解析命令行参数，获取运行配置。

返回:
    argparse.Namespace: 包含命令行参数的命名空间对象。
"""
parser = ArgumentParser()
parser.add_argument("-c", "--checkpoint-path", type=str, default=DEFAULT_CKPT_PATH,
                    help="Checkpoint name or path, default to %(default)r")  # 模型检查点路径
parser.add_argument("--cpu-only", action="store_true", help="Run demo with CPU only")  # 是否仅使用CPU
parser.add_argument("--inbrowser", action="store_true", default=False,
                    help="Automatically launch the interface in a new tab on the default browser.")  # 是否在浏览器中自动打开界面
parser.add_argument("--server-port", type=int, default=15110,
                    help="Demo server port.")  # 指定服务器端口
parser.add_argument("--server-name", type=str, default="0.0.0.0",
                    help="Demo server name.")  # 指定服务器名称

args = parser.parse_args()
return args

def add_text(chatbot, task_history, input):
    """
    将用户输入的文本内容添加到聊天记录中，并更新聊天机器人界面。

参数:
    chatbot (gr.components.Chatbot): 聊天机器人组件。
    task_history (list): 任务历史记录。
    input (gr.inputs): 用户输入内容。

返回:
    tuple: 更新后的聊天机器人界面和任务历史记录，以及重置后的用户输入框。
"""
text_content = input.text  # 获取文本输入内容
content = []
if len(input.files) > 0:  # 如果用户上传了音频文件
    for i in input.files:
        content.append({'type': 'audio', 'audio_url': i.path})  # 将音频文件添加到内容列表中
if text_content:  # 如果用户输入了文本
    content.append({'type': 'text', 'text': text_content})  # 将文本内容添加到内容列表中
task_history.append({"role": "user", "content": content})  # 更新任务历史记录

# 更新聊天机器人界面，添加用户输入
chatbot.append([{
    "text": input.text,
    "files": input.files,
}, None])
return chatbot, task_history, None
'''
def add_file(chatbot, task_history, audio_file_path):
    """
    将音频文件添加到聊天记录中。

参数:
    chatbot (gr.components.Chatbot): 聊天机器人组件。
    task_history (list): 任务历史记录。
    audio_file_path (str): 音频文件的路径。

返回:
    tuple: 更新后的聊天机器人界面和任务历史记录。
"""
# 确保任务历史记录中的音频条目是正确的格式
task_history.append({"role": "user", "content": [{"type": "audio", "audio_url": audio_file_path}]})

# 更新聊天记录，直接使用 audio_file_path 而不是 gr.Audio 组件
chatbot.append((None, {"type": "audio", "audio_url": audio_file_path}))

return chatbot, task_history
'''
import os

def add_file(chatbot, task_history, audio_path):
    if not os.path.isfile(audio_path):
        print(f"Error: The file {audio_path} does not exist.")
        return chatbot, task_history

# 将音频文件信息添加到任务历史
task_history.append({
    "role": "user",
    "content": [{"type": "audio", "audio_url": audio_path}]
})

# 假设 chatbot 组件可以接受字典格式的输入
chatbot_state = [{
    "text": f"[Audio file: {os.path.basename(audio_path)}]",
    "files": [audio_path]  # 直接使用文件路径而不是 gr.File
}, None]
chatbot.append(chatbot_state)  # 更新 chatbot 状态

return chatbot, task_history

def reset_user_input():
    """
    重置用户输入字段。

返回:
    gr.update: 将文本框的值重置为空。
"""
return gr.Textbox.update(value='')

def reset_state(task_history):
    """
    重置聊天记录和任务历史。

参数:
    task_history (list): 当前的任务历史记录。

返回:
    tuple: 清空的聊天记录和任务历史。
"""
return [], []

def regenerate(chatbot, task_history):
    """
    重新生成最后的机器人响应。

参数:
    chatbot (gr.components.Chatbot): 聊天机器人组件。
    task_history (list): 任务历史记录。

返回:
    tuple: 更新后的聊天机器人界面和任务历史记录。
"""
# 如果最后一条消息是助手生成的，则移除它
if task_history and task_history[-1]['role'] == 'assistant':
    task_history.pop()
    chatbot.pop()
# 如果任务历史记录不为空，重新生成响应
if task_history:
    chatbot, task_history = predict(chatbot, task_history)
return chatbot, task_history

def predict(chatbot, task_history):
    """
    根据当前任务历史记录生成模型响应，并将响应转换为音频文件添加到聊天记录中。

参数:
    chatbot (gr.components.Chatbot): 聊天机器人组件。
    task_history (list): 任务历史记录。

返回:
    tuple: 更新后的聊天机器人界面和任务历史记录。
"""
print(f"{task_history=}")
print(f"{chatbot=}")

# 使用处理器将任务历史记录格式化为模型输入
text = processor.apply_chat_template(task_history, add_generation_prompt=True, tokenize=False)
audios = []

# 遍历任务历史，查找音频内容并加载
for message in task_history:
    if isinstance(message["content"], list):
        for ele in message["content"]:
            if ele["type"] == "audio":
                audios.append(
                    librosa.load(ele['audio_url'], sr=processor.feature_extractor.sampling_rate)[0]
                )

if len(audios) == 0:  # 如果没有音频，则设置为 None
    audios = None
print(f"{text=}")
print(f"{audios=}")

# 使用处理器生成模型输入
inputs = processor(text=text, audios=audios, return_tensors="pt", padding=True)
if not _get_args().cpu_only:  # 如果支持 GPU，则将输入数据移动到 CUDA 设备
    inputs["input_ids"] = inputs.input_ids.to("cuda")

# 生成响应
generate_ids = model.generate(**inputs, max_length=256)
generate_ids = generate_ids[:, inputs.input_ids.size(1):]

# 解码生成的文本响应
# 假设其他参数已经正确设置
response = processor.batch_decode(generate_ids, skip_special_tokens=True)[0]
task_history.append({'role': 'assistant', 'content': response})
chatbot.append((None, response))  # 添加文本响应

# 将文本响应转换为语音
audio_file_path = text_to_speech(response)
if audio_file_path:
    chatbot, task_history = add_file(chatbot, task_history, audio_file_path)

return chatbot, task_history

def _launch_demo(args):
    """
    启动Gradio的Web用户界面，展示Qwen2-Audio-Instruct模型的聊天功能。

参数:
    args (argparse.Namespace): 从命令行解析的参数。
"""
with gr.Blocks() as demo:
    # 添加页面标题和描述
    gr.Markdown(
        """<p align="center"><img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/assets/blog/qwenaudio/qwen2audio_logo.png" style="height: 80px"/><p>""")
    gr.Markdown("""<center><font size=8>Qwen2-Audio-Instruct Bot</center>""")
    gr.Markdown(
        """\
<center><font size=3>This WebUI is based on Qwen2-Audio-Instruct, developed by Alibaba Cloud. \
(本WebUI基于Qwen2-Audio-Instruct打造，实现聊天机器人功能。)</center>""")
    gr.Markdown("""\
<center><font size=4>Qwen2-Audio <a href="https://modelscope.cn/models/qwen/Qwen2-Audio-7B">🤖 </a> 
| <a href="https://huggingface.co/Qwen/Qwen2-Audio-7B">🤗</a>&nbsp ｜ 
Qwen2-Audio-Instruct <a href="https://modelscope.cn/models/qwen/Qwen2-Audio-7B-Instruct">🤖 </a> | 
<a href="https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct">🤗</a>&nbsp ｜ 
&nbsp<a href="https://github.com/QwenLM/Qwen2-Audio">Github</a></center>""")
    
    # 创建聊天机器人组件
    chatbot = mgr.Chatbot(label='Qwen2-Audio-7B-Instruct', elem_classes="control-height", height=750)

    # 创建用户输入组件，支持文本、麦克风和文件上传
    user_input = mgr.MultimodalInput(
        interactive=True,
        sources=['microphone', 'upload'],
        submit_button_props=dict(value="🚀 Submit (发送)"),
        upload_button_props=dict(value="📁 Upload (上传文件)", show_progress=True),
    )
    task_history = gr.State([])  # 初始化任务历史状态

    with gr.Row():  # 创建清除历史和重试按钮
        empty_bin = gr.Button("🧹 Clear History (清除历史)")
        regen_btn = gr.Button("🤔️ Regenerate (重试)")

    # 当用户提交输入时，调用add_text函数，然后调用predict函数生成响应
    user_input.submit(fn=add_text,
                      inputs=[chatbot, task_history, user_input],
                      outputs=[chatbot, task_history, user_input]).then(
        predict, [chatbot, task_history], [chatbot, task_history], show_progress=True
    )
    # 清除历史按钮的点击事件处理，重置聊天记录和任务历史
    empty_bin.click(reset_state, outputs=[chatbot, task_history], show_progress=True)
    # 重试按钮的点击事件处理，重新生成最后的响应
    regen_btn.click(regenerate, [chatbot, task_history], [chatbot, task_history], show_progress=True)

# 启动Gradio界面
demo.queue().launch(
    share=False,  # 不共享URL
    inbrowser=args.inbrowser,  # 是否自动在浏览器中打开
    server_port=args.server_port,  # 指定服务器端口
    server_name=args.server_name,  # 指定服务器名称
    ssl_certfile="/root/project/cert.pem", 
    ssl_keyfile="/root/project/key.pem", 
    ssl_verify=False
)

if __name__ == "__main__":
    args = _get_args()  # 获取命令行参数
    if args.cpu_only:
        device_map = "cpu"  # 如果指定了仅使用CPU，设置设备映射为CPU
    else:
        device_map = "auto"  # 否则自动选择设备

# 加载模型
model = Qwen2AudioForConditionalGeneration.from_pretrained(
    args.checkpoint_path,
    torch_dtype="auto",  # 自动选择数据类型
    device_map=device_map,  # 设置设备映射
    resume_download=True,  # 断点续传
).eval()
model.generation_config.max_new_tokens = 2048  # 设置最大生成token数，用于长对话
print("generation_config", model.generation_config)
processor = AutoProcessor.from_pretrained(args.checkpoint_path, resume_download=True)  # 加载处理器
_launch_demo(args)  # 启动演示界面

修改该文件中demo.queue().launch（140行）

加入ssl参数，注意替换路径

demo.queue().launch(
        share=False,  # 不共享URL
        inbrowser=args.inbrowser,  # 是否自动在浏览器中打开
        server_port=args.server_port,  # 指定服务器端口
        server_name=args.server_name,  # 指定服务器名称
        ssl_certfile="/home/t/Qwen2-Audio/demo/cert.pem",   # /home/t/Qwen2-Audio这是我的项目地址，需要替换为你的项目地址
        ssl_keyfile="/home/t/Qwen2-Audio/demo/key.pem", 
        ssl_verify=False
    )

在这里插入图片描述

每次开始时都需要执行这些操作

1.创建ssl

# 解决找不到录音机问题

openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 365 -nodes #

一直按enter键默认选择即可

可能报错1：
Could not create share link. Missing file: /home/t/anaconda3/envs/qwen2/lib/python3.10/site-packages/gradio/frpc_linux_amd64_v0.2.

wget https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_linux_amd64

mv frpc_linux_amd64 frpc_linux_amd64_v0.2

mv frpc_linux_amd64_v0.2 /home/t/anaconda3/envs/qwen2/lib/python3.10/site-packages/gradio/

python demo/web_demo_audio.py

可能报错2：端口被占用