利用Langchain 0.1 AzureOpenAI 基于文本问答示例
# Azure OpenAI Service - Chat on private data using LangChain
## Firstly, create a file called `.env` in this folder, and add the following content, obviously with your values:
"""
OPENAI_API_KEY=xxxxxx
OPENAI_API_BASE=https://xxxxxxx.openai.azure.com/
"""
import os
import openai
from dotenv import load_dotenv
from langchain_community.document_loaders.directory import DirectoryLoader
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
# Load environment variables (set OPENAI_API_KEY and OPENAI_API_BASE in .env)
load_dotenv()
# Configure Azure OpenAI Service API
openai.api_type = "azure"
openai.api_version = os.getenv('AZURE_API_VERSION')
openai.azure_endpoint = os.getenv('AZURE_OPENAI_API_KEY')
openai.api_key = os.getenv("AZURE_OPENAI_KEY")
# Init LLM and embeddings model
llm = AzureChatOpenAI(
model="gpt-35-turbo",
deployment_name="gpt-35-turbo",
api_key=openai.api_key,
azure_endpoint=openai.azure_endpoint,
api_version=openai.api_version ,
)
# You need to deploy your own embedding model as well as your own chat completion model
embeddings = AzureOpenAIEmbeddings(
model="text-embedding-ada-002",
chunk_size=1,
api_key=openai.api_key,
azure_endpoint = openai.azure_endpoint,
)
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import TokenTextSplitter
## First, we load up our documents from the `data` directory:
loader = DirectoryLoader('../data/qna/', glob="*.txt", loader_cls=TextLoader)
documents = loader.load()
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
## Next, let's ingest them into FAISS so we can efficiently query our embeddings:
from langchain_community.vectorstores import FAISS
db = FAISS.from_documents(documents=docs, embedding=embeddings)
## Now let's create a chain that can do the whole chat on our embedding database:
from langchain.chains import ConversationalRetrievalChain
from langchain_core.prompts import PromptTemplate
# Adapt if needed
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:""")
qa = ConversationalRetrievalChain.from_llm(llm=llm,
retriever=db.as_retriever(),
condense_question_prompt=CONDENSE_QUESTION_PROMPT,
return_source_documents=True,
verbose=False)
## Now let's ask a question:
chat_history = []
query = "what is azure openai service?"
result = qa({"question": query, "chat_history": chat_history})
print(result["answer"])
## We can use this to easy implement chat conversations:
chat_history = []
query = "what is Azure OpenAI Service?"
result = qa({"question": query, "chat_history": chat_history})
print("Question:", query)
print("Answer:", result["answer"])
chat_history = [(query, result["answer"])]
query = "Which regions does the service support?"
result = qa({"question": query, "chat_history": chat_history})
print("Question:", query)
print("Answer:", result["answer"])
https://github.com/microsoft/azure-openai-in-a-day-workshop/blob/main/requirements.txt