本次需要下载模型bert-base-chinese向量化模型,Qwen2-0.5B-Instruct对话模型,实现共分为三大块。
1.数据进行读取,切分,向量化后用faiss保存在本地。
2.对话模型进行推理,调用对话功能
3.收到用户问题,进行向量检索,把检索内容和问题一起送到大模型
下面需要注意的是数据只能是TXT文本,如果需要其他格式的要你自己改
按行读取,切分,效果不是很好(本次演示没有使用langchain,如需更好的效果,可以自己写切分或者用langchain切分也可以)
第一块的实现代码如下:
from transformers import BertTokenizer, BertModel import numpy as np import torch # 加载预训练的BERT模型和分词器 tokenizer = BertTokenizer.from_pretrained(r'bert-base-chinese') model = BertModel.from_pretrained(r'bert-base-chinese') data='笔记本和台式机有什么区别' print(type(data)) # 示例文本数据 # texts = [ # "我喜欢看电影", # "这部电影很精彩", # "我喜欢读书", # "阅读是我的爱好" # ] with open('train1.txt', 'r', encoding='utf-8') as file: texts = [line.strip() for line in file] print('texts:',texts) # 对文本进行分词并转换为模型可接受的格式 inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512) data_inputs=tokenizer(data, return_tensors="pt", padding=True, truncation=True, max_length=512) # # 使用BERT模型获取文本的表示 print(data_inputs) with torch.no_grad(): outputs = model(**inputs) data_outputs=model(**data_inputs) # # 通常我们取最后一层的[CLS]标记的隐藏状态作为句子的表示 # # 这里假设我们只需要第一个token(即[CLS]标记)的输出 sentence_embeddings = outputs.last_hidden_state[:, 0, :].numpy() data_sentence_embeddings=data_outputs.last_hidden_state[:, 0, :].numpy() # # 保存向量到文件 np.save('bert_embeddings.npy', sentence_embeddings) # 加载向量(如果需要) loaded_embeddings = np.load('bert_embeddings.npy') # 相似度检索示例(使用余弦相似度) from sklearn.metrics.pairwise import cosine_similarity # 假设我们想要查询第一个文本与其他文本的相似度 query_vec = sentence_embeddings[0] query_vec_reshaped = query_vec[np.newaxis, :] data_query_vec=data_sentence_embeddings[0] data_query_vec_reshaped=data_query_vec[np.newaxis, :] print(query_vec) similarities = cosine_similarity(query_vec_reshaped, loaded_embeddings) print(similarities) data_similarities=cosine_similarity(data_query_vec_reshaped,loaded_embeddings) # 打印结果 for i, similarity in enumerate(similarities[0]): print(f"文本 {i + 1} 与查询文本的相似度: {similarity:.4f}") max_similarity_idx = np.argmax(similarities[0]) max_similarity_idx1=np.argmax(similarities[0][1]) data_max_similarity_idx=np.argmax(data_similarities[0]) print(f"与查询文本相似度最高的文本是: {texts[max_similarity_idx]}") # print(f"与查询文本相似度最高的文本是: {texts[max_similarity_idx1]}") print(f'\n\n',texts[data_max_similarity_idx]) # 注意:FAISS对于大规模数据集和高效的相似度搜索特别有用。 # 对于小数据集,你可以直接使用NumPy或SciPy的函数来计算相似度。
下面需要注意的是要把它单独放到一个py文件内,第三个代码会调用这个类建议命名chatglm_model_input.py
第二块代码实现:
from modelscope import AutoModel, snapshot_download from modelscope import AutoModelForCausalLM, AutoTokenizer import torch class GLMmodel(): def __init__(self,Qwen:bool=None,ChatGLM3:bool=None ,GLM4:bool=None, model_name_or :str ="model_name_or"): super(GLMmodel, self).__init__() historya='' self.historya=historya self.model_name_or=model_name_or self.ChatGLM3=ChatGLM3 self.Qwen=Qwen if self.ChatGLM3==True: chatglm3_model_dir = snapshot_download(self.model_name_or, revision="v1.0.0") chatglm3_tokenizer = AutoTokenizer.from_pretrained(chatglm3_model_dir, trust_remote_code=True) self.chatglm3_tokenizer=chatglm3_tokenizer chatglm3_model = AutoModel.from_pretrained(chatglm3_model_dir, trust_remote_code=True).half().cuda() chatglm3_model=chatglm3_model.eval() self.chatglm3_model=chatglm3_model elif self.Qwen==True: model = AutoModelForCausalLM.from_pretrained( self.model_name_or, torch_dtype="auto", device_map="auto" ) self.qwen2_model=model tokenizer = AutoTokenizer.from_pretrained(self.model_name_or) self.qwen2_tokenizer=tokenizer elif GLM4==True: device = "cuda" tokenizer = AutoTokenizer.from_pretrained(self.model_name_or, trust_remote_code=True) self.glm4_tokenizer=tokenizer model = AutoModelForCausalLM.from_pretrained( self.model_name_or, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, trust_remote_code=True ).to(device).eval() self.glm4_model=model def Chatglm3_6b_output(self,data:str) -> str: response, history = self.chatglm3_model.chat(self.chatglm3_tokenizer, data, history=self.historya) self.historya+=history return response def Chatglm3_6b_output_history(self,data:str) -> str: response,history=self.chatglm3_model.chat(self.chatglm3_tokenizer,data,history=[]) return response def Qwen2_output(self,data:str)-> str: device = "cuda" model_inputs = self.qwen2_tokenizer([data], return_tensors="pt").to(device) generated_ids = self.qwen2_model.generate( model_inputs.input_ids, max_new_tokens=400, repetition_penalty=1.15 ) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ] response = self.qwen2_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] return response def GLM4_output(self,data:str)-> str: device = "cuda" model=self.glm4_model tokenizer=self.glm4_tokenizer inputs = tokenizer.apply_chat_template([{"role": "user", "content": data}], add_generation_prompt=True, tokenize=True, return_tensors="pt", return_dict=True ) inputs = inputs.to(device) gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1} with torch.no_grad(): outputs = model.generate(**inputs, **gen_kwargs) outputs = outputs[:, inputs['input_ids'].shape[1]:] print(tokenizer.decode(outputs[0], skip_special_tokens=True)) return tokenizer.decode(outputs[0],skip_special_tokens=True)
第三块代码:
from transformers import BertTokenizer,BertModel import numpy as np import torch from chatglm_model_input import GLMmodel import warnings warnings.filterwarnings('ignore') tokenizer=BertTokenizer.from_pretrained(r'bert-base-chinese') model=BertModel.from_pretrained(r'bert-base-chinese') while True: data=input('问题:') data_inputs=tokenizer(data, return_tensors="pt", padding=True, truncation=True, max_length=512) with torch.no_grad(): data_outputs = model(**data_inputs) data_sentence_embeddings = data_outputs.last_hidden_state[:, 0, :].numpy() loaded_embeddings = np.load(r'bert_embeddings.npy') from sklearn.metrics.pairwise import cosine_similarity data_query_vec=data_sentence_embeddings[0] data_query_vec_reshaped=data_query_vec[np.newaxis, :] data_similarities=cosine_similarity(data_query_vec_reshaped,loaded_embeddings) for i, similarity in enumerate(data_similarities[0]): print(f"文本 {i + 1} 与查询文本的相似度: {similarity:.4f}") max_similarity_idx = np.argmax(data_similarities[0]) with open(r'train1.txt', 'r', encoding='utf-8') as file: texts = [line.strip() for line in file] print(f"与查询文本相似度最高的文本是: {texts[max_similarity_idx]}") llm=GLMmodel(Qwen=True,model_name_or=r'Qwen2-0.5B-Instruct') out=llm.Qwen2_output(f'请根据匹配的结果{texts[max_similarity_idx]}和用户问题{data}进行回答') print('大模型回答:',out)