ragflow之初体验

        随着国内大语言模型(LLM)的应用日益广泛,构建面向企业私有数据的模型已成为一个重要研究方向。基于检索增强生成(RAG)的知识库与大语言模型的混合编排,为这一过程提供了强大的加速器,使得私有化部署更加高效和便捷。

        ragflow 是RAG领域中较为理想的开源解决方案,在企业私有数据集成管理和知识库构建方面展现出显著优势,其灵活的架构设计和高效的知识库构建能力,使得数据检索和信息生成更加精准和高效。

        本文将分享ragflow的部署方法和使用心得,欢迎各位批评指正。

1 环境准备

类型

版本

GPU

Tesla V100S / 32G x 8

CPU

Intel(R) Xeon(R) Gold 5218 / 64c

内存

376G

硬盘

SATA 4T

OS

CentOS7

体系结构

x64

CUDA

11.4

公网访问

代理

        由于ragflow依赖的docker版本至少为24.0.9,而现有的GPU主机上还运行了其他低版本的docker容器,为了不影响这些存量应用容器,我们考虑二进制堆叠搭建一个新的docker运行环境,目标运行环境各组件的版本如下:

组件名称

版本

runc

1.1.12

docker-proxy

24.0.9

docker-init

0.19.0

dockerd

24.0.9

docker

24.0.9

ctr

1.7.13

containerd-shim-runc-v2

1.7.13

containerd

1.7.13

docker-compose

2.26.1

        【注意】docker运行环境、docker-compose的二进制版本从此处下载:https://download.docker.com/linux/static/stable/x86_64/、https://github.com/docker/compose/releases?expanded=true&page=2&q=

1.1 containerd环境构建

1.1.1 containerd配置文件

        将docker二进制文件解压至/data/labs/runtimes/docker-24/目录下,生成一个默认配置文件,并在此基础上修改。

./containerd config default > containerd-config.toml

【注意】文件完整内容如下,所有路径应考虑显式设置,尽量避免使用默认值,以防与已有docker运行环境产生冲突!

>>> containerd-config.toml <<<

disabled_plugins = []

imports = []

oom_score = 0

plugin_dir = "/data/labs/runtimes/docker-24/containerd-plugin"

required_plugins = []

root = "/data/labs/runtimes/docker-24/containerd-root"

state = "/data/labs/runtimes/docker-24/containerd-state"

temp = "/data/labs/runtimes/docker-24/containerd-temp"

version = 2

[cgroup]

  path = "/data/labs/runtimes/docker-24/sys-fs-cgroup/containerd"

[debug]

  address = "/data/labs/runtimes/docker-24/containerd-debug.sock"

  format = ""

  gid = 0

  level = ""

  uid = 0

[grpc]

  address = "/data/labs/runtimes/docker-24/containerd-grpc.sock"

  gid = 0

  max_recv_message_size = 16777216

  max_send_message_size = 16777216

  tcp_address = "127.0.0.1:2375"

  tcp_tls_ca = "/data/labs/runtimes/docker-24/ca.crt"

  tcp_tls_cert = "/data/labs/runtimes/docker-24/tls.crt"

  tcp_tls_key = "/data/labs/runtimes/docker-24/tls.key"

  uid = 0

[metrics]

  address = "127.0.0.1:1338"

  grpc_histogram = false

[plugins]

  [plugins."io.containerd.gc.v1.scheduler"]

    deletion_threshold = 0

    mutation_threshold = 100

    pause_threshold = 0.02

schedule_delay = "0s"

    startup_delay = "100ms"

  [plugins."io.containerd.grpc.v1.cri"]

    cdi_spec_dirs = ["/data/labs/runtimes/docker-24/cdi-cfgs", "/data/labs/runtimes/docker-24/cdi-run"]

    device_ownership_from_security_context = false

    disable_apparmor = false

    disable_cgroup = false

    disable_hugetlb_controller = true

    disable_proc_mount = false

    disable_tcp_service = true

    drain_exec_sync_io_timeout = "0s"

    enable_cdi = false

    enable_selinux = false

    enable_tls_streaming = false

    enable_unprivileged_icmp = false

    enable_unprivileged_ports = false

    ignore_image_defined_volumes = false

    image_pull_progress_timeout = "5m0s"

    max_concurrent_downloads = 3

    max_container_log_line_size = 16384

    netns_mounts_under_state_dir = false

    restrict_oom_score_adj = false

    sandbox_image = "registry.k8s.io/pause:3.8"

    selinux_category_range = 1024

    stats_collect_period = 10

    stream_idle_timeout = "4h0m0s"

    stream_server_address = "127.0.0.1"

    stream_server_port = "0"

    systemd_cgroup = false

    tolerate_missing_hugetlb_controller = true

    unset_seccomp_profile = ""

disable_rdt = true

    [plugins."io.containerd.grpc.v1.cri".cni]

      bin_dir = "/data/labs/runtimes/docker-24/cni/bin"

      conf_dir = "/data/labs/runtimes/docker-24/cni/net.d"

      conf_template = ""

      ip_pref = ""

      max_conf_num = 1

      setup_serially = false

    [plugins."io.containerd.grpc.v1.cri".containerd]

      default_runtime_name = "runc"

      disable_snapshot_annotations = true

      discard_unpacked_layers = false

      ignore_blockio_not_enabled_errors = false

      ignore_rdt_not_enabled_errors = false

      no_pivot = false

      snapshotter = "overlayfs"

      address = "/data/labs/runtimes/docker-24/containerd.sock"

      [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]

        base_runtime_spec = ""

        cni_conf_dir = "/data/labs/runtimes/docker-24/cni/net.d"

        cni_max_conf_num = 0

        container_annotations = []

        pod_annotations = []

        privileged_without_host_devices = false

        privileged_without_host_devices_all_devices_allowed = false

        runtime_engine = ""

        runtime_path = "/data/labs/runtimes/docker-24/runc"

        runtime_root = "/data/labs/runtimes/docker-24/runc-root"

        runtime_type = ""

        sandbox_mode = ""

        snapshotter = ""

        [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime.options]

      [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]

        [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]

          base_runtime_spec = ""

          cni_conf_dir = "/data/labs/runtimes/docker-24/cni/net.d"

          cni_max_conf_num = 0

          container_annotations = []

          pod_annotations = []

          privileged_without_host_devices = false

          privileged_without_host_devices_all_devices_allowed = false

          runtime_engine = ""

          runtime_path = "/data/labs/runtimes/docker-24/runc"

          runtime_root = ""

          runtime_type = "io.containerd.runc.v2"

          sandbox_mode = "podsandbox"

          snapshotter = ""

          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]

            BinaryName = ""

            CriuImagePath = "/data/labs/runtimes/docker-24/criu-image"

            CriuPath = ""

            CriuWorkPath = "/data/labs/runtimes/docker-24/criu-work"

            IoGid = 0

            IoUid = 0

            NoNewKeyring = false

            NoPivotRoot = false

            Root = ""

            ShimCgroup = ""

            SystemdCgroup = false

      [plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]

        base_runtime_spec = ""

        cni_conf_dir = "/data/labs/runtimes/docker-24/cni/net.d"

        cni_max_conf_num = 0

        container_annotations = []

        pod_annotations = []

        privileged_without_host_devices = false

        privileged_without_host_devices_all_devices_allowed = false

        runtime_engine = ""

        runtime_path = "/data/labs/runtimes/docker-24/runc"

        runtime_root = "/data/labs/runtimes/docker-24/runc-root"

        runtime_type = ""

        sandbox_mode = ""

        snapshotter = ""

        [plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime.options]

    [plugins."io.containerd.grpc.v1.cri".image_decryption]

      key_model = "node"

    [plugins."io.containerd.grpc.v1.cri".registry]

      config_path = "/data/labs/runtimes/docker-24/cri-registry.json"

      [plugins."io.containerd.grpc.v1.cri".registry.auths]

      [plugins."io.containerd.grpc.v1.cri".registry.configs]

      [plugins."io.containerd.grpc.v1.cri".registry.headers]

      [plugins."io.containerd.grpc.v1.cri".registry.mirrors]

    [plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming]

      tls_cert_file = "/data/labs/runtimes/docker-24/tls.crt"

      tls_key_file = "/data/labs/runtimes/docker-24/tls.key"

  [plugins."io.containerd.internal.v1.opt"]

    path = "/data/labs/runtimes/docker-24/containerd-options"

  [plugins."io.containerd.internal.v1.restart"]

    interval = "10s"

  [plugins."io.containerd.internal.v1.tracing"]

    sampling_ratio = 1.0

    service_name = "containerd"

  [plugins."io.containerd.metadata.v1.bolt"]

    content_sharing_policy = "shared"

  [plugins."io.containerd.monitor.v1.cgroups"]

    no_prometheus = false

  [plugins."io.containerd.nri.v1.nri"]

    disable = true

    disable_connections = false

    plugin_config_path = "/data/labs/runtimes/docker-24/nri/conf.d"

    plugin_path = "/data/labs/runtimes/docker-24/nri/plugins"

    plugin_registration_timeout = "5s"

    plugin_request_timeout = "2s"

    socket_path = "/data/labs/runtimes/docker-24/nri/nri.sock"

  # runc的版本应与当前docker版本相匹配

  [plugins."io.containerd.runtime.v1.linux"]

    no_shim = false

    runtime = "/data/labs/runtimes/docker-24/runc"

    runtime_root = "/data/labs/runtimes/docker-24/runc-runtime-root"

    shim = "/data/labs/runtimes/docker-24/containerd-shim-runc-v2"

    shim_debug = false

  [plugins."io.containerd.runtime.v2.task"]

    platforms = ["linux/amd64"]

    sched_core = false

  [plugins."io.containerd.service.v1.diff-service"]

    default = ["walking"]

  [plugins."io.containerd.service.v1.tasks-service"]

    blockio_config_file = "/data/labs/runtimes/docker-24/blockio.toml"

    rdt_config_file = "/data/labs/runtimes/docker-24/rdt.toml"

  #[plugins."io.containerd.snapshotter.v1.aufs"]

  #  root_path = "/data/labs/runtimes/docker-24/aufs"

  [plugins."io.containerd.snapshotter.v1.blockfile"]

    fs_type = ""

    mount_options = []

    root_path = ""

scratch_file = ""

  #[plugins."io.containerd.snapshotter.v1.devmapper"]

  #  disabled = true

  #  async_remove = false

  #  base_image_size = ""

  #  discard_blocks = false

  #  fs_options = ""

  #  fs_type = ""

  #  pool_name = ""

  #  root_path = ""

  [plugins."io.containerd.snapshotter.v1.native"]

    root_path = "/data/labs/runtimes/docker-24/native-root"

  [plugins."io.containerd.snapshotter.v1.overlayfs"]

    mount_options = []

    root_path = "/data/labs/runtimes/docker-24/overlayfs-root"

    sync_remove = false

    upperdir_label = false

  [plugins."io.containerd.snapshotter.v1.zfs"]

    root_path = "/data/labs/runtimes/docker-24/zfs-root"

  [plugins."io.containerd.tracing.processor.v1.otlp"]

    endpoint = ""

    insecure = false

    protocol = ""

  [plugins."io.containerd.transfer.v1.local"]

    config_path = "/data/labs/runtimes/docker-24/transfer.json"

    max_concurrent_downloads = 3

    max_concurrent_uploaded_layers = 3

    [[plugins."io.containerd.transfer.v1.local".unpack_config]]

      differ = ""

      platform = "linux/amd64"

      snapshotter = "overlayfs"

[proxy_plugins]

[stream_processors]

  [stream_processors."io.containerd.ocicrypt.decoder.v1.tar"]

    accepts = ["application/vnd.oci.image.layer.v1.tar+encrypted"]

    args = ["--decryption-keys-path", "/data/labs/runtimes/docker-24/containerd/ocicrypt/keys"]

    env = ["OCICRYPT_KEYPROVIDER_CONFIG=/data/labs/runtimes/docker-24/containerd/ocicrypt/ocicrypt_keyprovider.conf"]

    path = "ctd-decoder"

    returns = "application/vnd.oci.image.layer.v1.tar"

  [stream_processors."io.containerd.ocicrypt.decoder.v1.tar.gzip"]

    accepts = ["application/vnd.oci.image.layer.v1.tar+gzip+encrypted"]

    args = ["--decryption-keys-path", "/data/labs/runtimes/docker-24/containerd/ocicrypt/keys"]

    env = ["OCICRYPT_KEYPROVIDER_CONFIG=/data/labs/runtimes/docker-24/containerd/ocicrypt/ocicrypt_keyprovider.conf"]

    path = "ctd-decoder"

    returns = "application/vnd.oci.image.layer.v1.tar+gzip"

[timeouts]

  "io.containerd.timeout.bolt.open" = "0s"

  "io.containerd.timeout.metrics.shimstats" = "2s"

  "io.containerd.timeout.shim.cleanup" = "5s"

  "io.containerd.timeout.shim.load" = "5s"

  "io.containerd.timeout.shim.shutdown" = "3s"

  "io.containerd.timeout.task.state" = "2s"

[ttrpc]

  address = "/data/labs/runtimes/docker-24/containerd-ttrpc.sock"

  gid = 0

  uid = 0

1.1.2 containerd的启动

【启动脚本】

>>> start-containerd.sh <<<

#!/bin/bash

nohup ./containerd --config $(pwd)/containerd-config.toml 1>>nohup-containerd.log 2>>nohup-containerd.log &

echo $! > containerd.pid

【停止脚本】

>>> stop-containerd.sh <<<

#!/bin/bash

pid=$(cat containerd.pid)

if [ ! -z "$pid" ]; then

   kill -9 $pid 2>/dev/null

fi

1.2 dockerd环境构建

1.2.1 为dockerd创建新的网桥(隔离的必要措施)

brctl show

brctl addbr docker1

ip link set dev docker1 up

ip addr add 172.24.0.0/24 dev docker1

ip addr show docker1

1.2.2 dockerd配置文件

【注意】在dockerd应指向containerd的GRPC协议socket地址,以进行通信和协作;其他路径应与原有dockerd环境相互隔离,避免相互影响。

>>> daemon.json <<<

{

  "bridge": "docker1",

  "containerd": "/data/labs/runtimes/docker-24/containerd-grpc.sock",

  "data-root": "/data/labs/runtimes/docker-24/docker-data",

  "debug": false,

  "default-runtime": "runc-1.1.12",

  "runtimes": {

    "runc-1.1.12": {

      "path": "/data/labs/runtimes/docker-24/runc",

      "runtimeArgs": []

    }

  },

  "exec-opts": ["native.cgroupdriver=cgroupfs", "proxy.path=/data/labs/runtimes/docker-24/docker-proxy"],

  "exec-root": "/data/labs/runtimes/docker-24/docker-exec",

  "host": [

    "unix:///data/labs/runtimes/docker-24/docker.sock"

  ],

  "init-path": "/data/labs/runtimes/docker-24/docker-init",

  "pidfile": "/data/labs/runtimes/docker-24/docker.pid",

  "proxies": {

    "http-proxy": "http://192.168.0.100:7890",

    "https-proxy": "http://192.168.0.100:7890",

    "no-proxy": "localhost,127.0.0.1"

  },

  "log-driver": "json-file",

  "log-level": "error",

  "log-opts": {

    "max-size": "10m",

    "max-file": "3"

  },

  "max-concurrent-downloads": 10,

  "storage-driver": "overlay2",

  "userland-proxy": false

}

1.2.3 dockerd的启动

【启动脚本】

>>> start-dockerd.sh <<<

#!/bin/bash

nohup ./dockerd --config-file $(pwd)/daemon.json 1>>$(pwd)/dockerd.log 2>>$(pwd)/dockerd.log &

echo $! > dockerd.pid

【停止脚本】

>>> stop-dockerd.sh <<<

#!/bin/bash

pid=$(cat dockerd.pid)

if [ ! -z "$pid" ]; then

   kill -9 $pid 2>/dev/null

fi

1.3 ragflow搭建

一、下载ragflow源码:git clone https://github.com/infiniflow/ragflow.git

【注意】当前ragflow版本为v0.12.0

二、修改docker-compose配置清单,在ragflow/docker目录下

【注意】ragflow开箱自带了通义千问的云服务,但无法通过代理访问,因此无论在docker-compose.yml还是.env中设置代理均无效。由于GPU资源充足,后续将本地部署qwen2.5:72b模型,用于知识库检索增强。

三、修改数据库初始化脚本init.sql

>>> init.sql <<<

CREATE DATABASE IF NOT EXISTS rag_flow;

USE rag_flow;

-- 添加以下内容:设置 root 用户允许从任何 IP 地址连接,因为ragflow-server通过容器私有地址访问ragflow-mysql,这个容器的IP是不确定的

ALTER USER 'root'@'localhost' IDENTIFIED BY 'infini_rag_flow';

CREATE USER 'root'@'%' IDENTIFIED BY 'infini_rag_flow';

GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' WITH GRANT OPTION;

FLUSH PRIVILEGES;

【注意】若mysql初始化失败,再次尝试应首先删除mysql数据目录,位置在:${docker-data}/volumes/docker_mysql_data/_data下。

四、(可选步骤)为了避免与服务器其他容器产生冲突,我们将所有容器到宿主机的映射端口关闭,仅保留必要的ragflow-server的前端开放端口

五、调整mysql的卷挂载路径,修改容器内路径为/init.sql

mysql:

  volumes:

    #- ./init.sql:/data/application/init.sql:ro

    - ./init.sql:/init.sql:ro

【注意】本人在测试时发现,可能因mysql容器默认账户为非root,导致/data/application/init.sql路径挂载失败,调整为/init.sql运行正常

六、使用新的docker运行环境启动ragflow容器

DOCKER_HOST=unix:///data/labs/runtimes/docker-24/docker.sock MACOS= /data/labs/runtimes/docker-24/docker-compose -f docker-compose.yml up -d

1.4 本地LLM部署

这里使用ollama部署qwen2.5:72b模型,大约需要57G显存。

一、为使得ollama可以通过代理更新模型包,可以在/etc/systemd/system/ollama.service中增加以下配置:

[Service]

Environment="OLLAMA_HOST=<GPU主机IP>:8088"

Environment="OLLAMA_MODELS=/data/labs/ollama"

Environment="HTTP_PROXY=http://192.168.0.100:7890"

Environment="HTTPS_PROXY=http://192.168.0.100:7890"

二、下载和运行

OLLAMA_HOST=192.168.0.101:8088 ollama pull qwen2.5:72b

OLLAMA_HOST=192.168.0.101:8088 ollama run qwen2.5:72b

三、ollama后台运行LLM,目前没有找到解决方案,而采用了tmux终端复用方案,具体命令为:

【创建会话】 tmux new -s ollama-session

【退出会话】 Ctrl+b 再按d键

【查询会话】 tmux list-sessions

【重进会话】 tmux attach -t ollama-session

2 ragflow体验

        注册与登录完成后即可开始体验。

2.1 配置知识库

        默认项也许是最优项,这里的选项基本可以不用动了。需要注意的是,在 RAG系统中,嵌入模型(Embedding Model)是用于将文本转化为向量表示的模型。这些向量表示(也称为嵌入)能够捕捉文本的语义信息,使得相似的文本在向量空间中距离较近,从而便于进行相似性计算和检索。这里嵌入模型选择bge-large-zh-v1.5的理由是,其在生成高质量的文本嵌入方面具有优势。

        文本嵌入(Text Embedding)是将文本数据转化为固定维度向量表示的过程,这些向量可以捕捉文本的语义信息,使相似的文本在向量空间中距离较近,从而便于进行相似度计算、分类和聚类任务。举例说明:1)我们有个新闻文章数据库,每篇文章都有一个标题和内容;2)生成嵌入:即使用预训练的嵌入模型将每篇文章的内容转化为向量表示;3)查询嵌入:用户输入一个查询,我们将查询文本转化为向量表示;4)计算相似度:计算查询向量与每篇文章向量的相似度,找到最相似的文章。

2.2 上传一个Excel知识库文档并解析

        这里以“系统漏洞列表”Excel做为示例演示,约66000行数据,用了大约3个小时解析完成。

2.3 在模型提供商中添加ollama语言模型

2.4 创建聊天助理

2.5 使用不同嵌入模型的效果比较

        知识库原始数据部分条目如下:(敏感信息已隐藏)

安全管理

系统安全管理

季度

系统满足各项安全管理要求,不符合标准,扣2分/次;

人员安全管理

季度

人员管理使用满足各项安全管理要求,包括人员出入、账号管理等,不符合标准,扣2分/次;

安全事故

季度

由于****团队运维人员人为原因造成1次安全事故,导致系统故障、被***或***通报,每次扣5分;

        分别配置两个使用不同嵌入模型的知识库,一个为bge-large-zh-1.5、一个为商汤的piccolo-large-zh-v2,聊天选项均保持一致,得到的回复如下:

bge嵌入式模型:

piccolo嵌入式模型:

        【回复内容评价】bge嵌入模型的回复显得有些啰嗦,而piccolo回复内容更加确信和简洁。

3 总结

        本文主要阐述了ragflow的构建过程及其基本使用体验。尽管部署过程稍显繁琐,但其界面设计友好,操作流畅,用户体验极佳。各类语言聊天模型、嵌入模型可以灵活插装或进行参数调整,以获得不同风格的回复内容。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值