langchain and ChatGLM-6B

UnstructuredFileLoader
# encoding=utf-8

from langchain.document_loaders import UnstructuredFileLoader
from langchain.document_loaders import PyPDFLoader # for loading the pdf
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# 定划文件路径
filepath="LangchainHelloWorld_glyy2.pdf"
# filepath="test.pdf"
# 加载文件
loader = UnstructuredFileLoader(filepath)
docs = loader.load()
# 文本分额
# text_splitter = CharacterTextSplitter(chunk_size=508,chunk_overlap=200)
# print(text_splitter)
# docs = text_splitter.split_text(docs)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=508, chunk_overlap=200)
docs = text_splitter.split_documents(docs)

# 构建向量库
# embeddings = OpenAIEmbeddings()
embeddings = HuggingFaceEmbeddings()
vector_store =FAISS.from_documents(docs, embeddings)
# 根据提问匹配上下文
query="全院平均诊次成本计算公式?"
docs = vector_store.similarity_search(query)
context =[doc.page_content for doc in docs]
# 构造Prompt
prompt =f"已知信息:\n{context}\n根据已知信息回答问题:\n{query}"
# LLm生成回答

# 启动模型
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True)
# model=AutoModel.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True).quantize(8).half().cuda()
model=AutoModel.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True).half().cuda()
# model = AutoModel.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True).float()
# response, history = model.chat(tokenizer, "你好", history=[])
# print(response)
chatglm= model.eval()
response, his = chatglm.chat(tokenizer, prompt, history=[])
print(response)
print(his)


UnstructuredFileLoader
# encoding=utf-8

from langchain.document_loaders import UnstructuredFileLoader
from langchain.document_loaders import PyPDFLoader # for loading the pdf
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# 定划文件路径
filepath="test.docx"
# filepath="test.pdf"
# 加载文件
loader = UnstructuredFileLoader(filepath)
docs = loader.load()
# 文本分额
# text_splitter = CharacterTextSplitter(chunk_size=508,chunk_overlap=200)
# print(text_splitter)
# docs = text_splitter.split_text(docs)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=508, chunk_overlap=200)
docs = text_splitter.split_documents(docs)

# 构建向量库
# embeddings = OpenAIEmbeddings()
embeddings = HuggingFaceEmbeddings()
vector_store =FAISS.from_documents(docs, embeddings)
# 根据提问匹配上下文
query="基于DRG的公立医院多层级成本报告体系构建研究的作者是哪几位?"
docs = vector_store.similarity_search(query)
context =[doc.page_content for doc in docs]
# 构造Prompt
prompt =f"已知信息:\n{context}\n根据已知信息回答问题:\n{query}"
# LLm生成回答

# 启动模型
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True)
# model=AutoModel.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True).quantize(8).half().cuda()
model=AutoModel.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True).half().cuda()
# model = AutoModel.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True).float()
# response, history = model.chat(tokenizer, "你好", history=[])
# print(response)
chatglm= model.eval()
response, his = chatglm.chat(tokenizer, prompt, history=[])
print(response)
print(his)
UnstructuredFileLoader
#启动模型
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True)
# model=AutoModel.from_pretrained("E:\\projects\\chatglm\\chatglm6bhub", trust_remote_code = True).quantize(4).half().cuda()
model = AutoModel.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True).float()
# response, history = model.chat(tokenizer, "你好", history=[])
# print(response)


chatglm= model.eval()

from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
#定划文件路径
filepath="test.txt"
#加载文件
loader=UnstructuredFileLoader(filepath)
docs=loader.load()
#文本分额
text_splitter = CharacterTextSplitter(chunk_size=508,chunk_overlap=200)
docs = text_splitter.split_text(docs)
#构建向量库
embeddings = OpenAIEmbeddings()
vector_store =FAISS.from_documents(docs, embeddings)
#根据提问匹配上下文
query="Langchain能够接入哪些数据类型?"
docs = vector_store.similarity_search(query)
context =[doc.page_content for doc in docs]
#构造Prompt
prompt =f"已知信息:\n{context}\n根据已知信息回答问题:\n{query}"
#LLm生成回答
response, his = chatglm.chat(tokenizer, prompt, history=[])
print(response)
print(his)
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# 定划文件路径
filepath="test.txt"
# 加载文件
loader = UnstructuredFileLoader(filepath)
docs = loader.load()
# 文本分额
# text_splitter = CharacterTextSplitter(chunk_size=508,chunk_overlap=200)
# print(text_splitter)
# docs = text_splitter.split_text(docs)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=508, chunk_overlap=200)
docs = text_splitter.split_documents(docs)

# 构建向量库
# embeddings = OpenAIEmbeddings()
embeddings = HuggingFaceEmbeddings()
vector_store =FAISS.from_documents(docs, embeddings)
# 根据提问匹配上下文
query="Langchain能够接入哪些数据类型?"
docs = vector_store.similarity_search(query)
context =[doc.page_content for doc in docs]
# 构造Prompt
prompt =f"已知信息:\n{context}\n根据已知信息回答问题:\n{query}"
# LLm生成回答

# 启动模型
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True)
# model=AutoModel.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True).quantize(8).half().cuda()
model=AutoModel.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True).half().cuda()
# model = AutoModel.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True).float()
# response, history = model.chat(tokenizer, "你好", history=[])
# print(response)
chatglm= model.eval()
response, his = chatglm.chat(tokenizer, prompt, history=[])
print(response)
print(his)
DirectoryLoader

from langchain.document_loaders import UnstructuredFileLoader
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# 定划文件路径
# filepath="test.txt"
filepath="**/*.XLS"
# 加载文件
# loader = UnstructuredFileLoader(filepath)
loader = DirectoryLoader('./', glob=filepath, loader_cls=UnstructuredFileLoader, show_progress=True)
docs = loader.load()
# 文本分额
# text_splitter = CharacterTextSplitter(chunk_size=508,chunk_overlap=200)
# print(text_splitter)
# docs = text_splitter.split_text(docs)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=508, chunk_overlap=200)
docs = text_splitter.split_documents(docs)

# 构建向量库
# embeddings = OpenAIEmbeddings()
embeddings = HuggingFaceEmbeddings()
vector_store =FAISS.from_documents(docs, embeddings)
# 根据提问匹配上下文
query="2022年的Y02_1表年初预算结转和结余的合计是多少?"
docs = vector_store.similarity_search(query)
context =[doc.page_content for doc in docs]
# 构造Prompt
prompt =f"已知信息:\n{context}\n根据已知信息回答问题:\n{query}"
# LLm生成回答

# 启动模型
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True)
model=AutoModel.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True).quantize(8).half().cuda()
# model = AutoModel.from_pretrained("/mnt/workspace/ChatGLM-6B/model/chatglm-6b", trust_remote_code = True).float()
# response, history = model.chat(tokenizer, "你好", history=[])
# print(response)
chatglm= model.eval()
response, his = chatglm.chat(tokenizer, prompt, history=[])
print(response)
print(his)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值