转载自:如何基于Huggingface + Langchain 快速实现RAG
https://www.bilibili.com/video/BV1ia4y1y7gF/
源码: https://github.com/blackinkkkxi/RAG_langchain/blob/main/langchain_hf.ipynb
安装环境
! pip install langchain huggingface_hub transformers sentence_transformers accelerate bitsandbytes
! pip install pypdf faiss-cpu
使用API token 调用LLM
from getpass import getpass
HUGGINGFACEHUB_API_TOKEN = getpass()
在 ipynb 运行 getpass,上方会弹出输入框,输入 huggingface 的 token
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN
from langchain_community.llms import HuggingFaceHub
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
### 创建prompt 模板
question = "Where is the capital of China? "
template = """Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate(template=template, input_variables=["question" ])
repo_id = "google/flan-t5-base" # 具体可以参考 https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads
llm = HuggingFaceHub(
repo_id=repo_id,
)
llm_chain = LLMChain(prompt=prompt, llm=llm, llm_kwargs = {"temperature":0, "max_length":512})
print(llm_chain.run(question))
c:\Users\xx\.conda\envs\llangchainhf\lib\site-packages\huggingface_hub\utils\_deprecation.py:131: FutureWarning: 'InferenceApi' (from 'huggingface_hub.inference_api') is deprecated and will be removed from version '1.0'. `InferenceApi` client is deprecated in favor of the more feature-complete `InferenceClient`. Check out this guide to learn how to convert your script to use it: https://huggingface.co/docs/huggingface_hub/guides/inference#legacy-inferenceapi-client.
warnings.warn(warning_message, FutureWarning)
c:\Users\blackink\.conda\envs\llangchainhf\lib\site-packages\langchain_core\_api\deprecation.py:117: LangChainDeprecationWarning: The function `run` was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. Use invoke instead.
warn_deprecated(
China is located in the north of the world. The capital of China is Beijing. The answer: Beijing.
构建RAG检索
from langchain.document_loaders import PyPDFLoader
###加载文件
loader = PyPDFLoader("data//baichuan.pdf")
pages = loader.load()
from langchain.text_splitter import RecursiveCharacterTextSplitter
###文本切分
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 300, chunk_overlap = 50,)
docs = text_splitter.split_documents(pages[:4])
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain_community.vectorstores import FAISS
embeddings = HuggingFaceInferenceAPIEmbeddings(
api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
)
db = FAISS.from_documents(docs, embeddings)
query = "How large is the baichuan2 vocabulary size?"
result_simi = db.similarity_search(query , k = 3) # k=3,取前 top 3
拼接
source_knowledge = "\n".join([x.page_content for x in result_simi])
augmented_prompt = """Using the contexts below, answer the query.
contexts:
{source_knowledge}
query: {query}"""
prompt = PromptTemplate(template=augmented_prompt, input_variables=["source_knowledge" ,"query"])
llm_chain = LLMChain(prompt=prompt, llm=llm , llm_kwargs = {"temperature":0, "max_length":1024})
print(llm_chain.run( {"source_knowledge":source_knowledge ,"query" : query }))
125,696
augmented_prompt_2 = f"""Using the contexts below, answer the query.
contexts:
{source_knowledge}
query: {query}"""
print(augmented_prompt_2)
Using the contexts below, answer the query.
contexts:
have taken both these aspects into account. We
have expanded the vocabulary size from 64,000
in Baichuan 1 to 125,696, aiming to strike a
balance between computational efficiency and
model performance.
Tokenizer V ocab Size Compression Rate ↓
LLaMA 2 32,000 1.037
Bloom 250,680 0.501
improve after training on more than 2.6 trillion
tokens. By sharing these intermediary results,
we hope to provide the community with greater
insight into the training dynamics of Baichuan 2.
Understanding these dynamics is key to unraveling
the inner working mechanism of large language
Baichuan 2: Open Large-scale Language Models
Aiyuan Yang, Bin Xiao, Bingning Wang, Borong Zhang, Chao Yin, Chenxu Lv, Da Pan
Dian Wang, Dong Yan, Fan Yang, Fei Deng, Feng Wang, Feng Liu, Guangwei Ai
Guosheng Dong, Haizhou Zhao, Hang Xu, Haoze Sun, Hongda Zhang, Hui Liu, Jiaming Ji
query: How large is the baichuan2 vocabulary size?
本地加载LLMbaichuan model 为例
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation.utils import GenerationConfig
import torch
from modelscope import snapshot_download, Model
model_dir = snapshot_download("baichuan-inc/Baichuan2-7B-Chat", revision='master')
model = Model.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16)
messages = []
messages.append({"role": "user", "content": "讲解一下“温故而知新”"})
response = model(messages)
print(response)
content = '''Using the contexts below, answer the query.
contexts:
have taken both these aspects into account. We
have expanded the vocabulary size from 64,000
in Baichuan 1 to 125,696, aiming to strike a
balance between computational efficiency and
model performance.
Tokenizer V ocab Size Compression Rate ↓
LLaMA 2 32,000 1.037
Bloom 250,680 0.501
improve after training on more than 2.6 trillion
tokens. By sharing these intermediary results,
we hope to provide the community with greater
insight into the training dynamics of Baichuan 2.
Understanding these dynamics is key to unraveling
the inner working mechanism of large language
Baichuan 2: Open Large-scale Language Models
Aiyuan Yang, Bin Xiao, Bingning Wang, Borong Zhang, Chao Yin, Chenxu Lv, Da Pan
Dian Wang, Dong Yan, Fan Yang, Fei Deng, Feng Wang, Feng Liu, Guangwei Ai
Guosheng Dong, Haizhou Zhao, Hang Xu, Haoze Sun, Hongda Zhang, Hui Liu, Jiaming Ji
query: How large is the baichuan2 vocabulary size?
'''
messages = []
messages.append({"role": "user", "content": content})
response = model(messages)
print(response)