FastGPT 是一个基于 LLM 大语言模型的知识库问答系统,提供开箱即用的数据处理、模型调用等能力。同时可以通过 Flow 可视化进行工作流编排,从而实现复杂的问答场景!
##安装 Docker
[root@node1 ~]# curl -fsSL https://get.docker.com | bash -s docker --mirror Aliyun
systemctl enable --now docker
##安装 docker-compose
[root@node1 ~]# wget https://github.com/docker/compose/releases/download/v2.24.5/docker-compose-linux-x86_64
[root@node1 ~]# cp -rf docker-compose-linux-x86_64 /usr/local/bin/docker-compose
[root@node1 ~]# chmod +x /usr/local/bin/docker-compose
2.1 模型部署
安装glm-4-9b-chat
GLM-4-9B 是智谱 AI 推出的最新一代预训练模型 GLM-4 系列中的开源版本。 在语义、数学、推理、代码和知识等多方面的数据集测评中,GLM-4-9B 及其人类偏好对齐的版本 GLM-4-9B-Chat 均表现出较高的性能。 除了能进行多轮对话,GLM-4-9B-Chat 还具备网页浏览、代码执行、自定义工具调用(Function Call)和长文本推理(支持最大 128K 上下文)等高级功能
##glm4 api及demo下载
[root@node1 ~]#git clone https://github.com/THUDM/GLM-4
#安装git lfs大文件存储插件
[root@node1 ~]#curl-s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh|sudobash
[root@node1 ~]#apt install git-lfs
# 激活插件
[root@node1 ~]#git lfs install
# 下载模型文件
[root@node1 ~]#git clone https://www.modelscope.cn/ZhipuAI/glm-4-9b-chat.git
编辑trans_cli_demo.py,将MODEL_PATH指向模型所在路径
启动命令行调试
启动API
2.2 启动m3e模型
非常棒的中文开源embedding项目,收集和整理了较多的中文高质量数据集,uniem也是一个不错的框架
[root@node1 data]# docker run -d -p 6008:6008 --name=m3e-large-api registry.cn-hangzhou.aliyuncs.com/fastgpt_docker/m3e-large-api:latest
m3e接口地址:ip:6008
默认api_key: sk-aaabbbcccdddeeefffggghhhiiijjjkkk
2.3 配置one-api
通过标准的 OpenAI API 格式聚合各种大模型,开箱即用 。
基于docker运行one-api
docker run --name one-api -d --restart always -p 3000:3000 -e TZ=Asia/Shanghai -v /home/ubuntu/data/one-api:/data justsong/one-api
访问ip:3000登录系统,初始账号密码root/123456
在one-api中添加chatglm3渠道
-
类型:自定义渠道。
-
名称:chatglm4-9b-chat
-
模型名称可以自定义:chatglm4-9b-chat
-
密钥随便填:sk-aaabbbcccdddeeefffggghhhiiijjjkkk
-
代理地址填入 ChatGLM4-9B-chat 的 API 地址,此处我填的本地地址(注意:本地调试不能填127.0.0.1,需要填写ipv4地址,可以通过ip -a命令查看)
点击渠道,返回测试成功,说明配置成功
在one-api中添加m3e渠道
-
类型:自定义渠道。
-
名称:m3e
-
模型名称可以自定义:m3e
-
密钥随便填:sk-aaabbbcccdddeeefffggghhhiiijjjkkk (默认)
-
代理地址填入m3e的 API 地址,此处我填的本地地址(注意:本地调试不能填127.0.0.1,需要填写ipv4地址,可以通过ip -a命令查看)
点击测试,返回以下结果说明配置成功
-
点击令牌,添加新令牌名称:fastGPT
-
过期时间:永不过期
-
取消无限额度:设置成取消无限额度
配置完成之后提交,点击复制ChatGPT Next Web
2.4 FastGPT
拉取配置文件
[root@node1 data]# mkdir fastgpt
[root@node1 data]# cd fastgpt
[root@node1 data]# curl -O https://raw.githubusercontent.com/labring/FastGPT/main/files/deploy/fastgpt/docker-compose.yml
[root@node1 data]# curl -O https://raw.githubusercontent.com/labring/FastGPT/main/projects/app/data/config.json
修改 docker-compose.yml 配置文件
因为one-api占用了本机的3000端口,所有修改映射到本地的端口为3020,默认映射端口为3000
对应上篇文章的操作保存的令牌,修改docker-compose.yml中的DEFAULT_ROOT_PSW(fastGPT默认密码)、OPENAI_BASE_URL(API 接口的地址,需要加/v1)和CHAT_API_KEY(API 接口的凭证)参数。localhost换成ipv4地址,key换成生成的令牌
修改config.json配置文件
{
"feConfigs": {
"lafEnv": "https://laf.dev"
},
"systemEnv": {
"openapiPrefix": "fastgpt",
"vectorMaxProcess": 15,
"qaMaxProcess": 15,
"pgHNSWEfSearch": 100
},
"llmModels": [
{
"model": "llama3-8b-chinese-chat-ollama-q8",
"name": "Ollama",
"maxContext": 8000,
"avatar": "/imgs/model/openai.svg",
"maxResponse": 8000,
"quoteMaxToken": 8000,
"maxTemperature": 1.2,
"charsPointsPrice": 0,
"censor": false,
"vision": false,
"datasetProcess": true,
"usedInClassify": true,
"usedInExtractFields": true,
"usedInToolCall": true,
"usedInQueryExtension": true,
"toolChoice": true,
"functionCall": true,
"customCQPrompt": "",
"customExtractPrompt": "",
"defaultSystemChatPrompt": "",
"defaultConfig": {}
},
{
"model": "llama3:8b",
"name": "Ollama",
"maxContext": 8000,
"avatar": "/imgs/model/openai.svg",
"maxResponse": 8000,
"quoteMaxToken": 8000,
"maxTemperature": 1.2,
"charsPointsPrice": 0,
"censor": false,
"vision": false,
"datasetProcess": true,
"usedInClassify": true,
"usedInExtractFields": true,
"usedInToolCall": true,
"usedInQueryExtension": true,
"toolChoice": true,
"functionCall": true,
"customCQPrompt": "",
"customExtractPrompt": "",
"defaultSystemChatPrompt": "",
"defaultConfig": {}
},
{
"model": "moonshot-v1-8k",
"name": "kimi",
"maxContext": 8000,
"avatar": "/imgs/model/openai.svg",
"maxResponse": 8000,
"quoteMaxToken": 8000,
"maxTemperature": 1.2,
"charsPointsPrice": 0,
"censor": false,
"vision": false,
"datasetProcess": true,
"usedInClassify": true,
"usedInExtractFields": true,
"usedInToolCall": true,
"usedInQueryExtension": true,
"toolChoice": true,
"functionCall": true,
"customCQPrompt": "",
"customExtractPrompt": "",
"defaultSystemChatPrompt": "",
"defaultConfig": {}
},
{
"model": "qwen:14b",
"name": "qwen-14b",
"maxContext": 8000,
"avatar": "/imgs/model/openai.svg",
"maxResponse": 8000,
"quoteMaxToken": 8000,
"maxTemperature": 1.2,
"charsPointsPrice": 0,
"censor": false,
"vision": false,
"datasetProcess": true,
"usedInClassify": true,
"usedInExtractFields": true,
"usedInToolCall": true,
"usedInQueryExtension": true,
"toolChoice": true,
"functionCall": true,
"customCQPrompt": "",
"customExtractPrompt": "",
"defaultSystemChatPrompt": "",
"defaultConfig": {}
},
{
"model": "gpt-3.5-turbo",
"name": "gpt-3.5-turbo",
"maxContext": 16000,
"avatar": "/imgs/model/openai.svg",
"maxResponse": 4000,
"quoteMaxToken": 13000,
"maxTemperature": 1.2,
"charsPointsPrice": 0,
"censor": false,
"vision": false,
"datasetProcess": true,
"usedInClassify": true,
"usedInExtractFields": true,
"usedInToolCall": true,
"usedInQueryExtension": true,
"toolChoice": true,
"functionCall": true,
"customCQPrompt": "",
"customExtractPrompt": "",
"defaultSystemChatPrompt": "",
"defaultConfig": {}
},
{
"model": "gpt-4-0125-preview",
"name": "gpt-4-turbo",
"avatar": "/imgs/model/openai.svg",
"maxContext": 125000,
"maxResponse": 4000,
"quoteMaxToken": 100000,
"maxTemperature": 1.2,
"charsPointsPrice": 0,
"censor": false,
"vision": false,
"datasetProcess": false,
"usedInClassify": true,
"usedInExtractFields": true,
"usedInToolCall": true,
"usedInQueryExtension": true,
"toolChoice": true,
"functionCall": false,
"customCQPrompt": "",
"customExtractPrompt": "",
"defaultSystemChatPrompt": "",
"defaultConfig": {}
},
{
"model": "glm4-9b-chat",
"name": "glm4-9b-chat",
"avatar": "/imgs/model/openai.svg",
"maxContext": 128000,
"maxResponse": 4000,
"quoteMaxToken": 100000,
"maxTemperature": 1.2,
"charsPointsPrice": 0,
"censor": false,
"vision": true,
"datasetProcess": false,
"usedInClassify": false,
"usedInExtractFields": false,
"usedInToolCall": false,
"usedInQueryExtension": false,
"toolChoice": true,
"functionCall": false,
"customCQPrompt": "",
"customExtractPrompt": "",
"defaultSystemChatPrompt": "",
"defaultConfig": {}
}
],
"vectorModels": [
{
"model": "llama3-8b-chinese-chat-ollama-q8",
"name": "llama3-8b-chinese-chat-ollama-q8",
"avatar": "/imgs/model/openai.svg",
"charsPointsPrice": 0,
"defaultToken": 512,
"maxToken": 3000,
"weight": 100,
"dbConfig": {},
"queryConfig": {}
},
{
"model": "llama3:8b",
"name": "llama3-8b",
"avatar": "/imgs/model/openai.svg",
"charsPointsPrice": 0,
"defaultToken": 512,
"maxToken": 3000,
"weight": 100,
"dbConfig": {},
"queryConfig": {}
},
{
"model": "m3e",
"name": "m3e",
"avatar": "/imgs/model/openai.svg",
"charsPointsPrice": 0,
"defaultToken": 512,
"maxToken": 3000,
"weight": 100,
"dbConfig": {},
"queryConfig": {}
},
{
"model": "qwen-14b",
"name": "qwen:14b",
"avatar": "/imgs/model/openai.svg",
"charsPointsPrice": 0,
"defaultToken": 512,
"maxToken": 3000,
"weight": 100,
"dbConfig": {},
"queryConfig": {}
}
],
"reRankModels": [],
"audioSpeechModels": [
{
"model": "tts-1",
"name": "OpenAI TTS1",
"charsPointsPrice": 0,
"voices": [
{
"label": "Alloy",
"value": "alloy",
"bufferId": "openai-Alloy"
},
{
"label": "Echo",
"value": "echo",
"bufferId": "openai-Echo"
},
{
"label": "Fable",
"value": "fable",
"bufferId": "openai-Fable"
},
{
"label": "Onyx",
"value": "onyx",
"bufferId": "openai-Onyx"
},
{
"label": "Nova",
"value": "nova",
"bufferId": "openai-Nova"
},
{
"label": "Shimmer",
"value": "shimmer",
"bufferId": "openai-Shimmer"
}
]
}
],
"whisperModel": {
"model": "whisper-1",
"name": "Whisper1",
"charsPointsPrice": 0
},
"ChatModels": [
{
"model": "llama3:8b",
"name": "Ollama",
"contextMaxToken": 8000,
"quoteMaxToken": 4000,
"maxTemperature": 1.2,
"price": 0,
"defaultSystem": ""
},
{
"model": "moonshot-v1-8k",
"name": "kimi",
"contextMaxToken": 8000,
"quoteMaxToken": 8000,
"maxTemperature": 1.2,
"price": 0,
"defaultSystem": ""
},
{
"model": "qwen:14b",
"name": "qwen-14b",
"contextMaxToken": 8000,
"quoteMaxToken": 4000,
"maxTemperature": 1.2,
"price": 0,
"defaultSystem": ""
}
],
"QAModel": {
"model": "llama3:8b",
"name": "Ollama",
"maxToken": 16000,
"price": 0
},
"ExtractModel": {
"model": "llama3:8b",
"name": "Ollama",
"functionCall": false,
"maxToken": 16000,
"price": 0,
"prompt": ""
},
"CQModel": {
"model": "llama3:8b",
"name": "Ollama",
"functionCall": false,
"maxToken": 16000,
"price": 0,
"prompt": ""
}
}
在chatModels、qaModels、cqModels、extractModels模块中添加chatglm-9b-chat模型配置(模型名称是在one-api中创建的,以上配置仅供参考),在qaModels、cqModels、vectorModels模块中添加m3e模型配置
2.4.1 启动FastGPT
# 创建 mongo 密钥
[root@node1 data]# openssl rand -base64 756 > ./mongodb.key
[root@node1 data]# chmod 600 ./mongodb.key
[root@node1 data]# chown 999:root ./mongodb.key
# 启动容器
[root@node1 data]# docker-compose pull
[root@node1 data]# docker-compose up -d
启动之后访问ip:3020登录,账号密码为root和在配置文件中密码登录。
2.4.2 工作流编排
通过“应用”---“高级编排”功能,进行自定义Rag工作流
2.5 知识库创建
设置知识库Embedding模型和LLM模型