两个 pdf 分别是两个电商公司的财报之类的,像分析这两家公司的盈利情况,利用 llamaindex 的 rag 检索盈利的部分,来对比
使用 tongyi,如果指定,默认是 openai 了
# 加载电商财报数据
from llama_index.core import SimpleDirectoryReader
A_docs = SimpleDirectoryReader(
input_files=["电商A-Third Quarter 2023 Results.pdf"]
).load_data()
B_docs = SimpleDirectoryReader(
input_files=["电商B-Third Quarter 2023 Results.pdf"]
).load_data()
# 加载本地 embedding 模型
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
Settings.embed_model = HuggingFaceEmbedding(
model_name="../../bge-small-zh-v1.5"
)
# 配置大模型
import os
from dotenv import load_dotenv
from langchain_community.llms import Tongyi
load_dotenv('../../key.env') # 指定加载 env 文件
key = os.getenv('DASHSCOPE_API_KEY') # 获得指定环境变量
DASHSCOPE_API_KEY = os.environ["DASHSCOPE_API_KEY"] # 获得指定环境变量
Settings.llm = Tongyi(temperature=1)
# 从文档中创建索引
from llama_index.core import VectorStoreIndex
A_index = VectorStoreIndex.from_documents(A_docs)
B_index = VectorStoreIndex.from_documents(B_docs)
# 持久化索引(保存到本地)
from llama_index.core import StorageContext
A_index.storage_context.persist(persist_dir="./storage/A")
B_index.storage_context.persist(persist_dir="./storage/B")
# 从本地读取索引
from llama_index.core import load_index_from_storage
try:
storage_context = StorageContext.from_defaults(
persist_dir="./storage/A"
)
A_index = load_index_from_storage(storage_context)
storage_context = StorageContext.from_defaults(
persist_dir="./storage/B"
)
B_index = load_index_from_storage(storage_context)
index_loaded = True
except:
index_loaded = False
# 创建查询引擎
A_engine = A_index.as_query_engine(similarity_top_k=3)
B_engine = B_index.as_query_engine(similarity_top_k=3)
# 配置查询工具
from llama_index.core.tools import QueryEngineTool
from llama_index.core.tools import ToolMetadata
query_engine_tools = [
QueryEngineTool(
query_engine=A_engine,
metadata=ToolMetadata(
name="A_Finance",
description=(
"用于提供A公司的财务信息 "
),
),
),
QueryEngineTool(
query_engine=B_engine,
metadata=ToolMetadata(
name="B_Finance",
description=(
"用于提供A公司的财务信息 "
),
),
),
]
# 创建ReAct Agent
from llama_index.core.agent import ReActAgent
agent = ReActAgent.from_tools(query_engine_tools, verbose=True)
# 让Agent完成任务
agent.chat("比较一下电商A,电商B个公司的销售额,请使用中文回答")