elasticserach实践(持续更新)

部署

ELK 版本 7.17.10
docker 版本 20.10.21
docker-compose 版本 1.21.1
主要使用docker-compose.yml 进行编写容器拉起elsticserarch,kibana,logstash 并且开启ssl认证
docker-compose部署方式会相对传统的docker run 部署会方便点和k8s的部署方式比较相似,都是可以通过yml配置内容拉起容器 (个人观念)
后续规划接入其他组件

主要docker-compose.yml

主要 Elastic , kibana 使用一个docker-compose.yml
一个es服务和一个可视化操作界面 使用https 认证
主要使用的官方提供的docker-compose样例
可以参考官方文档: https://github.com/elastic/elasticsearch/blob/8.12/docs/reference/setup/install/docker/docker-compose.yml

version: "2.2"
services:
#证书生成逻辑
  create_certs:
    image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
    volumes:
      - /opt/module/docker-compose/elk-prod/certs:/usr/share/elasticsearch/config/certs
    user: "0"
    container_name: es_create_certs
    command: >
      bash -c '
        if [ x${ELASTIC_PASSWORD} == x ]; then
          echo "Set the ELASTIC_PASSWORD environment variable in the .env file";
          exit 1;
        elif [ x${KIBANA_PASSWORD} == x ]; then
          echo "Set the KIBANA_PASSWORD environment variable in the .env file";
          exit 1;
        fi;
        if [[ ! -f config/certs/ca.zip ]]; then
          bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip;
          unzip config/certs/ca.zip -d config/certs;

          bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key;
          unzip config/certs/certs.zip -d config/certs;
        fi;
        echo "Setting file permissions"
        chown -R 1000:0 /certs
        echo "Waiting for Elasticsearch availability";
        until curl -s --cacert config/certs/ca/ca.crt https://es01:9200 | grep -q "missing authentication credentials"; do sleep 30; done;
        echo "Setting kibana_system password";
        until curl -s -X POST --cacert config/certs/ca/ca.crt -u "elastic:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" https://es01:9200/_security/user/kibana_system/_password -d "{\"password\":\"${KIBANA_PASSWORD}\"}" | grep -q "^{}"; do sleep 10; done;
        echo "All done!";
      '
    healthcheck:
      test: ["CMD-SHELL", "[ -f config/certs/es01/es01.crt ]"]
      interval: 1s
      timeout: 5s
      retries: 120
#es01节点配置
  es01:
    depends_on:
      create_certs:
        condition: service_healthy
    image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
    restart: always     
    volumes:
      - /opt/module/docker-compose/elk-prod/certs:/usr/share/elasticsearch/config/certs
      - /opt/module/docker-compose/elk-prod/es/plugins:/usr/share/elasticsearch/plugins
      - /opt/module/docker-compose/elk-prod/es/data/es01:/usr/share/elasticsearch/data
    ports:
      - ${ES_PORT}:9200
    environment:
      - node.name=es02
      - cluster.name=${CLUSTER_NAME}
      - cluster.initial_master_nodes=es01,es02,es03
      - discovery.seed_hosts=es02,es03
      - ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
      - "ES_JAVA_OPTS=-Xms256m -Xmx256m"          
      - bootstrap.memory_lock=true
      - xpack.security.enabled=true
      - xpack.security.http.ssl.enabled=true
      - xpack.security.http.ssl.key=certs/es01/es01.key
      - xpack.security.http.ssl.certificate=certs/es01/es01.crt
      - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.enabled=true
      - xpack.security.transport.ssl.key=certs/es01/es01.key
      - xpack.security.transport.ssl.certificate=certs/es01/es01.crt
      - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.verification_mode=certificate
    ulimits:
      memlock:
        soft: -1
        hard: -1
    healthcheck:
      test:
        [
          "CMD-SHELL",
          "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
        ]
      interval: 10s
      timeout: 10s
      retries: 120
#es02节点配置
  es02:
    depends_on:
      - es01
    image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
    restart: always             
    volumes:
      - /opt/module/docker-compose/elk-prod/certs:/usr/share/elasticsearch/config/certs
      - /opt/module/docker-compose/elk-prod/es/plugins:/usr/share/elasticsearch/plugins
      - /opt/module/docker-compose/elk-prod/es/data/es02:/usr/share/elasticsearch/data
    environment:
      - node.name=es02
      - cluster.name=${CLUSTER_NAME}
      - cluster.initial_master_nodes=es01,es02,es03
      - discovery.seed_hosts=es01,es03
      - bootstrap.memory_lock=true
      - "ES_JAVA_OPTS=-Xms256m -Xmx256m"
      - xpack.security.enabled=true
      - xpack.security.http.ssl.enabled=true
      - xpack.security.http.ssl.key=certs/es02/es02.key
      - xpack.security.http.ssl.certificate=certs/es02/es02.crt
      - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.enabled=true
      - xpack.security.transport.ssl.key=certs/es02/es02.key
      - xpack.security.transport.ssl.certificate=certs/es02/es02.crt
      - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.verification_mode=certificate
    ulimits:
      memlock:
        soft: -1
        hard: -1
    healthcheck:
      test:
        [
          "CMD-SHELL",
          "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
        ]
      interval: 10s
      timeout: 10s
      retries: 120
#es03节点配置
  es03:
    depends_on:
      - es02
    image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
    restart: always             
    volumes:
      - /opt/module/docker-compose/elk-prod/certs:/usr/share/elasticsearch/config/certs
      - /opt/module/docker-compose/elk-prod/es/plugins:/usr/share/elasticsearch/plugins
      - /opt/module/docker-compose/elk-prod/es/data/es03:/usr/share/elasticsearch/data
    environment:
      - node.name=es03
      - cluster.name=${CLUSTER_NAME}
      - cluster.initial_master_nodes=es01,es02,es03
      - discovery.seed_hosts=es01,es02
      - bootstrap.memory_lock=true
      - "ES_JAVA_OPTS=-Xms256m -Xmx256m"
      - xpack.security.enabled=true
      - xpack.security.http.ssl.enabled=true
      - xpack.security.http.ssl.key=certs/es03/es03.key
      - xpack.security.http.ssl.certificate=certs/es03/es03.crt
      - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.enabled=true
      - xpack.security.transport.ssl.key=certs/es03/es03.key
      - xpack.security.transport.ssl.certificate=certs/es03/es03.crt
      - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.verification_mode=certificate
    ulimits:
      memlock:
        soft: -1
        hard: -1
    healthcheck:
      test:
        [
          "CMD-SHELL",
          "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
        ]
      interval: 10s
      timeout: 10s
      retries: 120
#kibana节点配置
  kibana:
    depends_on:
      es01:
        condition: service_healthy
      es02:
        condition: service_healthy
      es03:
        condition: service_healthy
    restart: always    
    image: docker.elastic.co/kibana/kibana:${STACK_VERSION}  
    volumes:
      - /opt/module/docker-compose/elk-prod/certs:/usr/share/kibana/config/certs
      - /opt/module/docker-compose/elk-prod/kibana/plugins:/usr/share/kibana/plugins
      - /opt/module/docker-compose/elk-prod/kibana/data:/usr/share/kibana/data
    ports:
      - ${KIBANA_PORT}:5601
    environment:
      - SERVERNAME=kibana
      - ELASTICSEARCH_HOSTS=https://es01:9200
      - ELASTICSEARCH_USERNAME=kibana_system
      - ELASTICSEARCH_PASSWORD=${KIBANA_PASSWORD}
      - ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt
    healthcheck:
      test:
        [
          "CMD-SHELL",
          "curl -s -I http://localhost:5601 | grep -q 'HTTP/1.1 302 Found'",
        ]
      interval: 10s
      timeout: 10s
      retries: 120


相关参数说明

healthcheck 通过命令去做一个检测,检测容器是否正常运行 |
depends_on 让当前容器依据上一个容器状态去执行部署,这样方便排除问题。否则三个容器同时启动你可能很难知道哪一个出了问题
ES_JAVA_OPTS es一定要注意内存的分配

create_certs

这步主要用于生成证书
instances.yml 文件

instances:
  - name: es01
    dns:
        - es01
        - localhost
    ip:
        - 127.0.0.1
  - name: es02
    dns:
        - es02
        - localhost
    ip:
        - 127.0.0.1
  - name: es03
    dns:
        - es03
        - localhost
    ip:
        - 127.0.0.1
  - name: kibana
    dns:
        - kibana
        - localhost
    ip:
        - 127.0.0.1
  - name: logstash
    dns:
        - logstash
        - localhost
    ip:
        - 127.0.0.1

生成之后会在挂载的目录下出现所有的证书目录他会按照你的instances配置生成相对目录,我这里是提前生成了logstash 的基础证书,如果你没有规划的话可以不生成,后续有外部的logstash需要集成可在es集群里重新生成即可
logstash 证书后续需要重新配置
在这里插入图片描述

logstash

logstash我这里是单独使用了一个docker-compose 去单独装配logstash,你也可以把他们都合起来

version: "2.2"
services:
#logstah 配置
  logstash:
    image: logstash:${STACK_VERSION}
    restart: always      
    volumes:
      - /opt/module/docker-compose/elk-prod/certs/logstash:/etc/logstash/config/certs
      - /opt/module/docker-compose/elk-prod/logstash/config/logstash.yml:/usr/share/logstash/config/logstash.yml
      - /opt/module/docker-compose/elk-prod/logstash/config/pipeline:/usr/share/logstash/pipeline
      - /opt/module/docker-compose/elk-prod/logstash/data:/usr/share/logstash/data
      - /opt/module/docker-compose/elk-prod/logstash/plugins:/usr/share/logstash/plugins
    ports:
      - ${LOGSTASH_PORT}:4560

logstash ym l文件这边使用在kibana 创建logstash用户

node.name: logstash
xpack.monitoring.enabled: true
xpack.monitoring.elasticsearch.username: logstash_system
xpack.monitoring.elasticsearch.password: 'password'
xpack.monitoring.elasticsearch.hosts: [ 'https://es01:9200' ]
xpack.monitoring.elasticsearch.ssl.certificate_authority: /etc/logstash/config/certs/ca.crt

logstash.conf示例

input {
 beats {
    port => 5044
    ssl => true
    ssl_key => '/etc/logstash/config/certs/logstash.pkcs8.key'
    ssl_certificate => '/etc/logstash/config/certs/logstash.crt'
  }
  file {
    path => "/usr/share/logstash/data/movies.csv"
    start_position => "beginning"
    sincedb_path => "/dev/null"
  }
}
filter {
  csv {
    separator => ","
    columns => ["id","content","genre"]
  }

  mutate {
    split => { "genre" => "|" }
    remove_field => ["path", "host","@timestamp","message"]
  }

  mutate {

    split => ["content", "("]
    add_field => { "title" => "%{[content][0]}"}
    add_field => { "year" => "%{[content][1]}"}
  }

  mutate {
    convert => {
      "year" => "integer"
    }
    strip => ["title"]
    remove_field => ["path", "host","@timestamp","message","content"]
  }

}
output {
   elasticsearch {
    hosts => ["https://es01:9200"]
    index => "movies-%{+YYYY.MM.dd}"
    document_id => "%{id}"
    cacert => '/etc/logstash/config/certs/ca.crt'
    user => 'logstash_writer'
    password => 'xxx'
   }
  stdout {}
}

这里可以看到我们使用的密钥是logstash.pkcs8.key 详情可以参考官方文档
https://www.elastic.co/cn/blog/configuring-ssl-tls-and-https-to-secure-elasticsearch-kibana-beats-and-logstash#prepare-logstash
切记docker部署一定要挂载配置文件的目录,否则默认目录logstash里有默认配置他是不带证书配置的可能会导致你logstash的应用实例一直报权限错误

数据类型

这里我拿个我创建的索引模板做个赘述
这个索引是一个文章索引,里面有安装ik分词器(后续去写如何引入分词器)

{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 2,
    "analysis": {
      "analyzer": {
        "ik_analyzer": {
          "type": "custom",
          "tokenizer": "ik_max_word"
        },
        "ik_smart": {
          "type": "custom",
          "tokenizer": "ik_smart"
        }
      }
    }
  },
  "aliases": {
    "knowledge_detail": {}
  },
  "mappings": {
    "properties": {
      "system_from": {
        "type": "keyword"
      },
      "author_code": {
        "type": "keyword"
      },
      "article_author": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        },
        "analyzer": "ik_analyzer",
        "copy_to": "navigation_search"
      },
      "tag": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        },
        "analyzer": "ik_analyzer",
        "copy_to": "navigation_search"
      },
      "article_title": {
        "type": "text",
        "analyzer": "ik_analyzer",
        "copy_to": "navigation_search"
      },
      "article_info": {
        "type": "text",
        "analyzer": "ik_analyzer",
        "copy_to": "navigation_search"
      },
      "navigation_search": {
        "type": "text",
        "analyzer": "ik_analyzer"
      },
      "is_private": {
        "type": "integer",
        "null_value": 0
      },
      "hit_count": {
        "type": "integer",
        "null_value": 0
      },
      "collect_count": {
        "type": "integer",
        "null_value": 0
      },
      "created_time": {
        "type": "date",
        "format": "yyyy-MM-dd HH:mm:ss"
      },
      "updated_time": {
        "type": "date",
        "format": "yyyy-MM-dd HH:mm:ss"
      }
    }
  }
}

这个索引的字段信息在 mappings.properties 下
Es是一个非关系性数据库所以他没有我们关系数据库的行列格式,他个每个数据都是一个json格式文档。

text

text 字母意思就是文本,可以看我这里的 article_author 字段配置

"article_author": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        },
        "analyzer": "ik_analyzer",
        "copy_to": "navigation_search"
      }

首先是type 代表类型的意思
这种类型字段默认会被Es的分词器给分词,用于索引的编排,可以进行全文本搜索
它默认会带一个叫做keyword(关键字)的字段属性这个是专门用于对该字段做一些聚合操作的
有关分词器
Es默认的是standard 分词这种分词是一种简单的分词,对于中文支持其实就是一般了,像我上述的字段属性分词是默认使用了ik分词器(后续提供按照方式)

ik分词器的安装

ik分词器插件官方地址: https://github.com/infinilabs/analysis-ik
一定要下和es的版本一致的,否则插件导入时会报错
在这个目录下放在新建一个ik文件夹 /usr/share/elasticsearch/plugins
如果你是用的我上面的部署方式,放在挂载目录即可,放入后es日志会输出识别结果,然后重启下es节点容器
可以使用如下命令,查看ik分词器是否安装

POST _analyze
{
  "text":"Tom & Viv",
  "analyzer": "ik_smart"
}

顺便在下方附一张图片对比es 默认的分词和ik分词的效果
standard 分词器效果
在这里插入图片描述
在这里插入图片描述
中文会被分词成一个个的字,大写英文字母会被转换为小写字母这一点要注意,后续使用term查询会导致大写查不出来
ik分词效果图
在这里插入图片描述
在这里插入图片描述
在中文上会支持好些,英文上差距不是很大

向量搜索技术

思考:什么是向量搜索? 为什么要用向量搜索?

1. 搜索效果 : 希望搜索引擎等达到理解我输入的文字背后的意义
2. 输入的方式: 输入方式不在仅仅局限于文本,利用图片等其他方式作为输入方式
3. 关联性: 能在输入的主题领域里或者上下文都能产生关联

未完待续

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值