Vicuna-13B,7B 与PDF的对话
(1).环境部署
1.安装依赖
# 从github拉取项目
git clone https://github.com/lm-sys/FastChat.git
cd FastChat
# 安装依赖
pip3 install --upgrade pip # enable PEP 660 support
pip3 install -e .
2.Vicuna模型权重转换
Vicunna是基于 LLaMa 微调得来的大规模语言对话模型,部署时需要先下载LLama模型的权重文件。LLama模型权重文件需要向官方申请,有人上传了llama的权重文件,可以从这些地址下载。
LLama模型权重下载
git lfs clone https://huggingface.co/decapoda-research/llama-7b-hf
# 或者
git lfs clone https://huggingface.co/yahma/llama-7b-hf
git lfs clone https://huggingface.co/decapoda-research/llama-13b-hf
# 或者
git lfs clone https://huggingface.co/yahma/llama-13b-hf
Vicuna模型权重下载
# 下载Vicuna-7b 和 Vicuna-13b的模型权重
git lfs clone https://huggingface.co/lmsys/vicuna-7b-delta-v1.1
git lfs clone https://huggingface.co/lmsys/vicuna-13b-delta-v1.1
3.模型权重合并
python3 -m fastchat.model.apply_delta \
--base /model/llama-13b-hf \
--delta /model/vicuna-13b-delta-v1.1 \
--target /model/vicuna-13b-all-v1.1
(2) 与PDF交互
核心代码
1.加载模型
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings,OpenAIEmbeddings
from fastchat.model import load_model, get_conversation_template, add_model_args
import openai
import os
model_path = "/home/xs/project/MiniGPT-4/vincuna-13b-all-v1.1"
os.environ['OPENAI_API_KEY'] = ""
embedding_model = OpenAIEmbeddings()
proxies = {
'http': "http://127.0.0.1:7890",
'https': "https://127.0.0.1:7890"}
openai.proxy = proxies
model, tokenizer_ = load_model(
model_path,
"cuda",
1,
load_8bit=True,
debug="store_true",
)
2.加载文档与矢量数据库的制作
from loader import UnstructuredPaddlePDFLoader
from textsplitter import ChineseTextSplitter
from langchain.text_splitter import CharacterTextSplitter
import loader
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
loader = UnstructuredPaddlePDFLoader('../langchain-pdf/test_pdf.pdf')
textsplitter = ChineseTextSplitter(pdf=True, sentence_size=800)
docs = loader.load_and_split(textsplitter)
vector_store = FAISS.from_documents(docs,embedding=embedding_model)
3.检索答案与制作prompt
# 定义模板
from typing import List
from fastchat.conversation import get_conv_template
PROMPT_TEMPLATE = '''你是一个AI Assistant,请根据USER给出的描述,简洁和专业的来回答用户的问题. 如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”。回答时应保持语句完整。
'''
conv = get_conv_template("vicuna_v1.1")
query = "2022年我国航天发射情况"
def generate_prompt(related_docs: List[str], query: str,
prompt_template=PROMPT_TEMPLATE) -> str:
context = "\n".join([doc.page_content for doc in related_docs])
prompt = prompt_template.replace("{question}", query).replace("{context}", context)
return prompt
# 根据问题以及文档内容进行生成
prompt = generate_prompt(related_docs=related_docs_with_score,query=query)
prompt
4.模型输入与回答
conv.append_message(conv.roles[0], prompt)
conv.append_message(conv.roles[1], "")
prompt_ = conv.get_prompt()
input_ids = tokenizer_([prompt_]).input_ids
output_ids = model.generate(
torch.as_tensor(input_ids).cuda(),
do_sample=True,
temperature=0.8,
max_new_tokens=512,
)
if model.config.is_encoder_decoder:
output_ids = output_ids[0]
else:
output_ids = output_ids[0][len(input_ids[0]) :]
outputs = tokenizer_.decode(
output_ids, skip_special_tokens=True, spaces_between_special_tokens=False
)
print(f"ASSISTANT: {outputs}")