简介
这里不对 langchain 和 chatGPT 进行介绍,仅对实现过程进行整理
环境
Python >=3.8
Flask2.2.3
Jinja23.1.2
langchain0.0.143
openai0.27.4
实现 总结功能
使用 langchain 和 openai 接口实现总结功能
实现逻辑:通过text_splitter
将pdf 分块,送入 langchain 的summarize_chain
中进行处理
同样也可以使用 OpenAIEmbeddings
来实现,文档地址:langchain 官方文档
创建文件:summarize.py
from langchain import PromptTemplate
from langchain.callbacks import get_openai_callback
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
def summarize_docs(docs, doc_url, llm):
print(f'You have {
len(docs)} document(s) in your {
doc_url} data')
print(f'There are {
len(docs[0].page_content)} characters in your document')
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = text_splitter.split_documents(docs)
print(f'You have {
len(split_docs)} split document(s)')
prompt_template = """Write a concise summary of the following:
{text}
CONCISE SUMMARY IN CHINESE:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=False, return_intermediate_steps=True,
map_prompt=PROMPT, combine_prompt=PROMPT)
response = ""
with get_openai_callback() as cb:
response = chain({
"input_documents": split_docs}, return_only_outputs=True)
print(f"Total Tokens: {
cb.total_tokens}")
print(f"Prompt Tokens: {
cb.prompt_tokens}")
print(f"Completion Tokens: {
cb.completion_tokens}")
print(f"Successful Requests: {
cb.successful_requests}