简单智能客服系统,主要功能包括:
- 加载和管理知识库。
- 预处理用户问题并将其转换为向量。
- 通过余弦相似度匹配最相似的问题并返回答案。
- 调用外部API获取AI回复。
- 记录用户偏好和反馈以优化知识库
import os
import json
import re
from datetime import datetime
import requests
import paddle
from paddlenlp.transformers import BertTokenizer, BertModel
from paddle.nn.functional import cosine_similarity
import numpy as np
# 设置环境变量以指定缓存目录
os.environ['TRANSFORMERS_CACHE'] = 'D:\\paddlenlp_cache'
# 打印支持的预训练模型名称
print(BertTokenizer.pretrained_init_configuration.keys())
# 使用 bert-base-chinese 模型
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
model = BertModel.from_pretrained('bert-base-chinese')
class DwIntelligentCustomerService:
def __init__(self, knowledge_base_path="knowledge_base.json", deepseek_api_key="your_deepseek_api_key"):
self.knowledge_base_path = knowledge_base_path
self.deepseek_api_key = deepseek_api_key
self.conversation_context = {} # 添加对话上下文管理
self.user_preferences = {} # 添加用户偏好记录
# 确保使用一致的分词器和模型
self.tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
self.model = BertModel.from_pretrained('bert-base-chinese')
self.device = paddle.get_device()
self.model.to(self.device)
self.load_knowledge_base()
def preprocess_text(self, text):
# 统一问题格式,提升匹配容错性
text = text.lower() # 转换为小写
text = re.sub(r'[^\w\s]', '', text) # 去除标点符号
return text
def load_knowledge_base(self):
if os.path.exists(self.knowledge_base_path):
with open(self.knowledge_base_path, 'r', encoding='utf-8') as f:
data = json.load(f)
self.knowledge_base = data.get("knowledge", {"questions": [], "answers": []})
self.knowledge_version = data.get("version", "1.0.0") # 添加版本控制
self.last_updated = data.get("last_updated", str(datetime.now()))
else:
self.knowledge_base = {"questions": [], "answers": []}
self.knowledge_version = "1.0.0"
self.last_updated = str(datetime.now())
# 预计算知识库中所有问题的向量
self.knowledge_vectors = self._encode_questions(self.knowledge_base["questions"])
# 调试信息
print(f"Loaded knowledge base with {len(self.knowledge_base['questions'])} questions.")
print(f"Knowledge vectors shape: {self.knowledge_vectors.shape}")
def _encode_questions(self, questions):
if not questions:
print("No questions to encode.")
return np.array([]) # 如果没有问题,返回空的 NumPy 数组
inputs = self.tokenizer(questions, return_tensors='pd', padding=True, truncation=True, max_length=512)
with paddle.no_grad():
outputs = self.model(**inputs)
return outputs[0][:, 0, :].numpy() # 取 [CLS] token 的向量
def save_knowledge_base(self):
data = {
"knowledge": self.knowledge_base,
"version": self.knowledge_version,
"last_updated": str(datetime.now())
}
with open(self.knowledge_base_path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=4)
def find_similar_question(self, user_question):
# 文本预处理
preprocessed_question = self.preprocess_text(user_question)
# 将用户问题转换为向量
user_vector = self._encode_questions([preprocessed_question])[0]
# 调试信息
print(f"User vector shape: {user_vector.shape}")
# 计算与知识库中所有问题的余弦相似度
if self.knowledge_vectors.size == 0: # 检查 knowledge_vectors 是否为空
print("Knowledge vectors are empty.")
return None # 如果知识库为空,直接返回 None
# 调试信息
print(f"Knowledge vectors shape: {self.knowledge_vectors.shape}")
# 将 user_vector 和 knowledge_vectors 转换为 PaddlePaddle 张量
user_vector_tensor = paddle.to_tensor(user_vector).unsqueeze(0)
knowledge_vectors_tensor = paddle.to_tensor(self.knowledge_vectors)
# 调试信息
print(f"User vector tensor shape: {user_vector_tensor.shape}")
print(f"Knowledge vectors tensor shape: {knowledge_vectors_tensor.shape}")
# 计算余弦相似度
similarities = cosine_similarity(user_vector_tensor, knowledge_vectors_tensor)
# 调试信息
print(f"Similarities tensor: {similarities}")
print(f"Similarities shape: {similarities.shape}")
# 检查 similarities 是否为空
if similarities.shape[0] == 0:
print("Similarities are empty.")
return None # 如果 similarities 为空,直接返回 None
# 找到最相似的问题及其索引
best_idx = paddle.argmax(similarities)
# 如果相似度超过阈值,则返回对应答案
if similarities[best_idx] > 0.7: # 调整阈值以适应业务需求
return self.knowledge_base["answers"][best_idx]
return None
def get_ai_response(self, user_question):
# 添加上下文管理
messages = self.conversation_context.get("messages", [
{"role": "system", "content": "You are a helpful assistant."}
])
messages.append({"role": "user", "content": user_question})
print("调用 DeepSeek API")
# 调用 DeepSeek API
url = "https://api.deepseek.com/v1/chat/completions"
headers = {
"Authorization": f"Bearer {self.deepseek_api_key}",
"Content-Type": "application/json"
}
payload = {
"model": "deepseek-chat", # 替换为 DeepSeek 的模型名称
"messages": messages
}
try:
print(f"Sending request to {url} with payload: {json.dumps(payload, ensure_ascii=False, indent=4)}")
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status() # 检查请求是否成功
print(f"Response: {response.text}")
response_data = response.json()
ai_response = response_data['choices'][0]['message']['content']
return ai_response, True # 返回 AI 回复和成功标志
except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
print(f"Response content: {response.text}")
ai_response = "无法获取回复,请稍后再试。"
except requests.exceptions.RequestException as e:
print(f"Error calling DeepSeek API: {e}")
ai_response = "无法获取回复,请稍后再试。"
return ai_response, False # 返回 AI 回复和失败标志
def respond_to_user(self, user_question, user_id=None):
# 添加用户偏好记录
if user_id:
self.user_preferences[user_id] = self.user_preferences.get(user_id, {})
self.user_preferences[user_id]["last_interaction"] = str(datetime.now())
answer = self.find_similar_question(user_question)
if answer:
return answer
else:
ai_answer, success = self.get_ai_response(user_question)
if success:
self.add_question_answer(user_question, ai_answer)
# 添加反馈机制
if user_id:
self.user_preferences[user_id]["last_question"] = user_question
self.user_preferences[user_id]["last_response"] = ai_answer
self.user_preferences[user_id]["feedback"] = None
return ai_answer
def add_question_answer(self, question, answer):
self.knowledge_base["questions"].append(question)
self.knowledge_base["answers"].append(answer)
# 计算新问题的向量并更新 knowledge_vectors
new_vector = self._encode_questions([question])
if self.knowledge_vectors.size == 0:
self.knowledge_vectors = new_vector
else:
self.knowledge_vectors = np.vstack((self.knowledge_vectors, new_vector))
self.save_knowledge_base()
def add_feedback(self, user_id, feedback):
# 添加反馈机制
if user_id in self.user_preferences:
self.user_preferences[user_id]["feedback"] = feedback
# 根据反馈优化知识库
if feedback == "negative":
last_question = self.user_preferences[user_id].get("last_question")
if last_question in self.knowledge_base["questions"]:
index = self.knowledge_base["questions"].index(last_question)
self.knowledge_base["answers"][index] = self.get_ai_response(last_question)[0]
self.save_knowledge_base()
def update_knowledge_base(self):
# 定期更新知识库的示例方法
self.knowledge_version = str(float(self.knowledge_version) + 0.1)
self.save_knowledge_base()
# 示例用法
service = DwIntelligentCustomerService(deepseek_api_key="token")
service.add_question_answer("如何重置密码?", "请访问重置密码页面并按照指示操作。")
response = service.respond_to_user("登录遇到问题")
print(response)