推荐食用顺序:
1.直接按教程先部署上。
2.看代码注释,然后你就会发现全是掉包,毫无难度。
3.了解一些概念,比如量化。
4.进阶学习模型,看ptuining教程。大头在那里呢,这年头掉包侠没人要了。
个人基础介绍:
LLM大模型零基础,研一,有少部分ML和大数据分析经验,有一咩咩前后端经验。
课程评价:
首先,它完全免费。其次,教程里相当于手把手教学,但只教操作不教原因,也就是说,里面的代码需要你自己弄懂怎么回事。
web_demo.py文件
#demo.py文件
from transformers import AutoModel, AutoTokenizer
import gradio as gr
import mdtex2html
from utils import load_model_on_gpus
#加载预训练tokenizer,且表示信任远程代码
tokenizer = AutoTokenizer.from_pretrained("../../pretrain", trust_remote_code=True)
#加载预训练模型到CUDA设备上,默认是fp16.如果设备算力不够可以改成int8int4之类不同量化版本的模型。
#model = AutoModel.from_pretrained("../../pretrain", trust_remote_code=True).quantize(8)cuda()就是改成int8
model = AutoModel.from_pretrained("../../pretrain", trust_remote_code=True).cuda()
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
# from utils import load_model_on_gpus
# model = load_model_on_gpus("THUDM/chatglm3-6b", num_gpus=2)
#实例化模型
model = model.eval()
"""Override Chatbot.postprocess"""
# 自定义Chatbot.postprocess方法,处理模型的输出结果
def postprocess(self, y):
if y is None:
return []
for i, (message, response) in enumerate(y):
# 将message和response使用mdtex2html库转换格式
y[i] = (
None if message is None else mdtex2html.convert((message)),
None if response is None else mdtex2html.convert(response),
)
return y
# 将自定义的postprocess方法应用到Chatbot类
gr.Chatbot.postprocess = postprocess
# 解析文本格式的输入
def parse_text(text):
"""copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
lines = text.split("\n")
lines = [line for line in lines if line != ""]
count = 0
for i, line in enumerate(lines):
if "```" in line:
count += 1
items = line.split('`')
if count % 2 == 1:
lines[i] = f'<pre><code class="language-{items[-1]}">'
else:
lines[i] = f'<br></code></pre>'
else:
if i > 0:
if count % 2 == 1:
line = line.replace("`", "\`")
line = line.replace("<", "<")
line = line.replace(">", ">")
line = line.replace(" ", " ")
line = line.replace("*", "*")
line = line.replace("_", "_")
line = line.replace("-", "-")
line = line.replace(".", ".")
line = line.replace("!", "!")
line = line.replace("(", "(")
line = line.replace(")", ")")
line = line.replace("$", "$")
lines[i] = "<br>"+line
text = "".join(lines)
return text
# 预测方法
def predict(input, chatbot, max_length, top_p, temperature, history, past_key_values):
chatbot.append((parse_text(input), "")) # 将输入添加到chatbot中
for response, history, past_key_values in model.stream_chat(tokenizer, input, history, past_key_values=past_key_values,
return_past_key_values=True,
max_length=max_length, top_p=top_p,
temperature=temperature):
# 更新chatbot的最后一个对话结果
chatbot[-1] = (parse_text(input), parse_text(response))
# 返回chatbot、history和past_key_values
yield chatbot, history, past_key_values
# 重置用户输入
def reset_user_input():
return gr.update(value='')
#重置状态
def reset_state():
return [], [], None
# 创建交互式界面
with gr.Blocks() as demo:
gr.HTML("""<h1 align="center">ChatGLM3-6B</h1>""")
chatbot = gr.Chatbot()
with gr.Row():
with gr.Column(scale=4):
with gr.Column(scale=12):
user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(
container=False)
with gr.Column(min_width=32, scale=1):
submitBtn = gr.Button("Submit", variant="primary")
with gr.Column(scale=1):
emptyBtn = gr.Button("Clear History")
max_length = gr.Slider(0, 8192, value=8192, step=1.0, label="Maximum length", interactive=True)
top_p = gr.Slider(0, 1, value=0.8, step=0.01, label="Top P", interactive=True)
temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True)
# 定义需要保存的状态
history = gr.State([])
past_key_values = gr.State(None)
# 设置按钮的点击事件
submitBtn.click(predict, [user_input, chatbot, max_length, top_p, temperature, history, past_key_values],
[chatbot, history, past_key_values], show_progress=True)
submitBtn.click(reset_user_input, [], [user_input])
emptyBtn.click(reset_state, outputs=[chatbot, history, past_key_values], show_progress=True)
# 启动交互式界面
demo.queue().launch(share=True, server_name="0.0.0.0",server_port=7000)
web_demo2.py文件
# 导入所需的模块
from transformers import AutoModel, AutoTokenizer # 导入transformers库的AutoModel和AutoTokenizer
import streamlit as st # 导入streamlit库用于构建交互式界面
# 设置页面配置
st.set_page_config(
page_title="ChatGLM3-6b 演示",
page_icon=":robot:",
layout='wide'
)
# 从缓存中获取模型
@st.cache_resource
def get_model():
tokenizer = AutoTokenizer.from_pretrained("../../pretrain", trust_remote_code=True)
model = AutoModel.from_pretrained("../../pretrain", trust_remote_code=True).cuda()
# 多显卡支持,使用下面两行代码代替上面一行代码。将num_gpus改为你实际的显卡数量。
# from utils import load_model_on_gpus
# model = load_model_on_gpus("THUDM/chatglm3-6b", num_gpus=2)
model = model.eval()
return tokenizer, model
tokenizer, model = get_model()
# 设置页面标题
st.title("ChatGLM3-6B")
# 设置侧边栏选项
max_length = st.sidebar.slider(
'max_length', 0, 8192, 8192, step=1
)
top_p = st.sidebar.slider(
'top_p', 0.0, 1.0, 0.8, step=0.01
)
temperature = st.sidebar.slider(
'temperature', 0.0, 1.0, 0.8, step=0.01
)
# 检查会话状态中是否存在历史记录
if 'history' not in st.session_state:
st.session_state.history = []
# 检查会话状态中是否存在过去的键值
if 'past_key_values' not in st.session_state:
st.session_state.past_key_values = None
# 显示历史对话记录
for i, message in enumerate(st.session_state.history):
if message["role"] == "user":
with st.chat_message(name="user", avatar="user"):
st.markdown(message["content"])
else:
with st.chat_message(name="assistant", avatar="assistant"):
st.markdown(message["content"])
# 显示用户输入框
with st.chat_message(name="user", avatar="user"):
input_placeholder = st.empty()
# 显示助手回复框
with st.chat_message(name="assistant", avatar="assistant"):
message_placeholder = st.empty()
# 显示用户输入区域
prompt_text = st.text_area(label="用户命令输入",
height=100,
placeholder="请在这儿输入您的命令")
# 显示发送按钮
button = st.button("发送", key="predict")
# 如果点击了发送按钮
if button:
input_placeholder.markdown(prompt_text)
history, past_key_values = st.session_state.history, st.session_state.past_key_values
# 使用模型预测并生成回复
for response, history, past_key_values in model.stream_chat(tokenizer, prompt_text, history,
past_key_values=past_key_values,
max_length=max_length, top_p=top_p,
temperature=temperature,
return_past_key_values=True):
message_placeholder.markdown(response)
# 更新会话状态中的历史记录和过去的键值
st.session_state.history = history
st.session_state.past_key_values = past_key_values