部署
ELK 版本 7.17.10
docker 版本 20.10.21
docker-compose 版本 1.21.1
主要使用docker-compose.yml 进行编写容器拉起elsticserarch,kibana,logstash 并且开启ssl认证
docker-compose部署方式会相对传统的docker run 部署会方便点和k8s的部署方式比较相似,都是可以通过yml配置内容拉起容器 (个人观念)
后续规划接入其他组件
主要docker-compose.yml
主要 Elastic , kibana 使用一个docker-compose.yml
一个es服务和一个可视化操作界面 使用https 认证
主要使用的官方提供的docker-compose样例
可以参考官方文档: https://github.com/elastic/elasticsearch/blob/8.12/docs/reference/setup/install/docker/docker-compose.yml
version: "2.2"
services:
#证书生成逻辑
create_certs:
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
volumes:
- /opt/module/docker-compose/elk-prod/certs:/usr/share/elasticsearch/config/certs
user: "0"
container_name: es_create_certs
command: >
bash -c '
if [ x${ELASTIC_PASSWORD} == x ]; then
echo "Set the ELASTIC_PASSWORD environment variable in the .env file";
exit 1;
elif [ x${KIBANA_PASSWORD} == x ]; then
echo "Set the KIBANA_PASSWORD environment variable in the .env file";
exit 1;
fi;
if [[ ! -f config/certs/ca.zip ]]; then
bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip;
unzip config/certs/ca.zip -d config/certs;
bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key;
unzip config/certs/certs.zip -d config/certs;
fi;
echo "Setting file permissions"
chown -R 1000:0 /certs
echo "Waiting for Elasticsearch availability";
until curl -s --cacert config/certs/ca/ca.crt https://es01:9200 | grep -q "missing authentication credentials"; do sleep 30; done;
echo "Setting kibana_system password";
until curl -s -X POST --cacert config/certs/ca/ca.crt -u "elastic:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" https://es01:9200/_security/user/kibana_system/_password -d "{\"password\":\"${KIBANA_PASSWORD}\"}" | grep -q "^{}"; do sleep 10; done;
echo "All done!";
'
healthcheck:
test: ["CMD-SHELL", "[ -f config/certs/es01/es01.crt ]"]
interval: 1s
timeout: 5s
retries: 120
#es01节点配置
es01:
depends_on:
create_certs:
condition: service_healthy
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
restart: always
volumes:
- /opt/module/docker-compose/elk-prod/certs:/usr/share/elasticsearch/config/certs
- /opt/module/docker-compose/elk-prod/es/plugins:/usr/share/elasticsearch/plugins
- /opt/module/docker-compose/elk-prod/es/data/es01:/usr/share/elasticsearch/data
ports:
- ${ES_PORT}:9200
environment:
- node.name=es02
- cluster.name=${CLUSTER_NAME}
- cluster.initial_master_nodes=es01,es02,es03
- discovery.seed_hosts=es02,es03
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
- "ES_JAVA_OPTS=-Xms256m -Xmx256m"
- bootstrap.memory_lock=true
- xpack.security.enabled=true
- xpack.security.http.ssl.enabled=true
- xpack.security.http.ssl.key=certs/es01/es01.key
- xpack.security.http.ssl.certificate=certs/es01/es01.crt
- xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
- xpack.security.transport.ssl.enabled=true
- xpack.security.transport.ssl.key=certs/es01/es01.key
- xpack.security.transport.ssl.certificate=certs/es01/es01.crt
- xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
- xpack.security.transport.ssl.verification_mode=certificate
ulimits:
memlock:
soft: -1
hard: -1
healthcheck:
test:
[
"CMD-SHELL",
"curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
]
interval: 10s
timeout: 10s
retries: 120
#es02节点配置
es02:
depends_on:
- es01
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
restart: always
volumes:
- /opt/module/docker-compose/elk-prod/certs:/usr/share/elasticsearch/config/certs
- /opt/module/docker-compose/elk-prod/es/plugins:/usr/share/elasticsearch/plugins
- /opt/module/docker-compose/elk-prod/es/data/es02:/usr/share/elasticsearch/data
environment:
- node.name=es02
- cluster.name=${CLUSTER_NAME}
- cluster.initial_master_nodes=es01,es02,es03
- discovery.seed_hosts=es01,es03
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms256m -Xmx256m"
- xpack.security.enabled=true
- xpack.security.http.ssl.enabled=true
- xpack.security.http.ssl.key=certs/es02/es02.key
- xpack.security.http.ssl.certificate=certs/es02/es02.crt
- xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
- xpack.security.transport.ssl.enabled=true
- xpack.security.transport.ssl.key=certs/es02/es02.key
- xpack.security.transport.ssl.certificate=certs/es02/es02.crt
- xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
- xpack.security.transport.ssl.verification_mode=certificate
ulimits:
memlock:
soft: -1
hard: -1
healthcheck:
test:
[
"CMD-SHELL",
"curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
]
interval: 10s
timeout: 10s
retries: 120
#es03节点配置
es03:
depends_on:
- es02
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
restart: always
volumes:
- /opt/module/docker-compose/elk-prod/certs:/usr/share/elasticsearch/config/certs
- /opt/module/docker-compose/elk-prod/es/plugins:/usr/share/elasticsearch/plugins
- /opt/module/docker-compose/elk-prod/es/data/es03:/usr/share/elasticsearch/data
environment:
- node.name=es03
- cluster.name=${CLUSTER_NAME}
- cluster.initial_master_nodes=es01,es02,es03
- discovery.seed_hosts=es01,es02
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms256m -Xmx256m"
- xpack.security.enabled=true
- xpack.security.http.ssl.enabled=true
- xpack.security.http.ssl.key=certs/es03/es03.key
- xpack.security.http.ssl.certificate=certs/es03/es03.crt
- xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
- xpack.security.transport.ssl.enabled=true
- xpack.security.transport.ssl.key=certs/es03/es03.key
- xpack.security.transport.ssl.certificate=certs/es03/es03.crt
- xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
- xpack.security.transport.ssl.verification_mode=certificate
ulimits:
memlock:
soft: -1
hard: -1
healthcheck:
test:
[
"CMD-SHELL",
"curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
]
interval: 10s
timeout: 10s
retries: 120
#kibana节点配置
kibana:
depends_on:
es01:
condition: service_healthy
es02:
condition: service_healthy
es03:
condition: service_healthy
restart: always
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
volumes:
- /opt/module/docker-compose/elk-prod/certs:/usr/share/kibana/config/certs
- /opt/module/docker-compose/elk-prod/kibana/plugins:/usr/share/kibana/plugins
- /opt/module/docker-compose/elk-prod/kibana/data:/usr/share/kibana/data
ports:
- ${KIBANA_PORT}:5601
environment:
- SERVERNAME=kibana
- ELASTICSEARCH_HOSTS=https://es01:9200
- ELASTICSEARCH_USERNAME=kibana_system
- ELASTICSEARCH_PASSWORD=${KIBANA_PASSWORD}
- ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt
healthcheck:
test:
[
"CMD-SHELL",
"curl -s -I http://localhost:5601 | grep -q 'HTTP/1.1 302 Found'",
]
interval: 10s
timeout: 10s
retries: 120
相关参数说明
healthcheck 通过命令去做一个检测,检测容器是否正常运行 |
depends_on 让当前容器依据上一个容器状态去执行部署,这样方便排除问题。否则三个容器同时启动你可能很难知道哪一个出了问题
ES_JAVA_OPTS es一定要注意内存的分配
create_certs
这步主要用于生成证书
instances.yml 文件
instances:
- name: es01
dns:
- es01
- localhost
ip:
- 127.0.0.1
- name: es02
dns:
- es02
- localhost
ip:
- 127.0.0.1
- name: es03
dns:
- es03
- localhost
ip:
- 127.0.0.1
- name: kibana
dns:
- kibana
- localhost
ip:
- 127.0.0.1
- name: logstash
dns:
- logstash
- localhost
ip:
- 127.0.0.1
生成之后会在挂载的目录下出现所有的证书目录他会按照你的instances配置生成相对目录,我这里是提前生成了logstash 的基础证书,如果你没有规划的话可以不生成,后续有外部的logstash需要集成可在es集群里重新生成即可
logstash 证书后续需要重新配置
logstash
logstash我这里是单独使用了一个docker-compose 去单独装配logstash,你也可以把他们都合起来
version: "2.2"
services:
#logstah 配置
logstash:
image: logstash:${STACK_VERSION}
restart: always
volumes:
- /opt/module/docker-compose/elk-prod/certs/logstash:/etc/logstash/config/certs
- /opt/module/docker-compose/elk-prod/logstash/config/logstash.yml:/usr/share/logstash/config/logstash.yml
- /opt/module/docker-compose/elk-prod/logstash/config/pipeline:/usr/share/logstash/pipeline
- /opt/module/docker-compose/elk-prod/logstash/data:/usr/share/logstash/data
- /opt/module/docker-compose/elk-prod/logstash/plugins:/usr/share/logstash/plugins
ports:
- ${LOGSTASH_PORT}:4560
logstash ym l文件这边使用在kibana 创建logstash用户
node.name: logstash
xpack.monitoring.enabled: true
xpack.monitoring.elasticsearch.username: logstash_system
xpack.monitoring.elasticsearch.password: 'password'
xpack.monitoring.elasticsearch.hosts: [ 'https://es01:9200' ]
xpack.monitoring.elasticsearch.ssl.certificate_authority: /etc/logstash/config/certs/ca.crt
logstash.conf示例
input {
beats {
port => 5044
ssl => true
ssl_key => '/etc/logstash/config/certs/logstash.pkcs8.key'
ssl_certificate => '/etc/logstash/config/certs/logstash.crt'
}
file {
path => "/usr/share/logstash/data/movies.csv"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
csv {
separator => ","
columns => ["id","content","genre"]
}
mutate {
split => { "genre" => "|" }
remove_field => ["path", "host","@timestamp","message"]
}
mutate {
split => ["content", "("]
add_field => { "title" => "%{[content][0]}"}
add_field => { "year" => "%{[content][1]}"}
}
mutate {
convert => {
"year" => "integer"
}
strip => ["title"]
remove_field => ["path", "host","@timestamp","message","content"]
}
}
output {
elasticsearch {
hosts => ["https://es01:9200"]
index => "movies-%{+YYYY.MM.dd}"
document_id => "%{id}"
cacert => '/etc/logstash/config/certs/ca.crt'
user => 'logstash_writer'
password => 'xxx'
}
stdout {}
}
这里可以看到我们使用的密钥是logstash.pkcs8.key 详情可以参考官方文档
https://www.elastic.co/cn/blog/configuring-ssl-tls-and-https-to-secure-elasticsearch-kibana-beats-and-logstash#prepare-logstash
切记docker部署一定要挂载配置文件的目录,否则默认目录logstash里有默认配置他是不带证书配置的可能会导致你logstash的应用实例一直报权限错误
数据类型
这里我拿个我创建的索引模板做个赘述
这个索引是一个文章索引,里面有安装ik分词器(后续去写如何引入分词器)
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 2,
"analysis": {
"analyzer": {
"ik_analyzer": {
"type": "custom",
"tokenizer": "ik_max_word"
},
"ik_smart": {
"type": "custom",
"tokenizer": "ik_smart"
}
}
}
},
"aliases": {
"knowledge_detail": {}
},
"mappings": {
"properties": {
"system_from": {
"type": "keyword"
},
"author_code": {
"type": "keyword"
},
"article_author": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "ik_analyzer",
"copy_to": "navigation_search"
},
"tag": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "ik_analyzer",
"copy_to": "navigation_search"
},
"article_title": {
"type": "text",
"analyzer": "ik_analyzer",
"copy_to": "navigation_search"
},
"article_info": {
"type": "text",
"analyzer": "ik_analyzer",
"copy_to": "navigation_search"
},
"navigation_search": {
"type": "text",
"analyzer": "ik_analyzer"
},
"is_private": {
"type": "integer",
"null_value": 0
},
"hit_count": {
"type": "integer",
"null_value": 0
},
"collect_count": {
"type": "integer",
"null_value": 0
},
"created_time": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"updated_time": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
这个索引的字段信息在 mappings.properties 下
Es是一个非关系性数据库所以他没有我们关系数据库的行列格式,他个每个数据都是一个json格式文档。
text
text 字母意思就是文本,可以看我这里的 article_author 字段配置
"article_author": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "ik_analyzer",
"copy_to": "navigation_search"
}
首先是type 代表类型的意思
这种类型字段默认会被Es的分词器给分词,用于索引的编排,可以进行全文本搜索
它默认会带一个叫做keyword(关键字)的字段属性这个是专门用于对该字段做一些聚合操作的
有关分词器
Es默认的是standard 分词这种分词是一种简单的分词,对于中文支持其实就是一般了,像我上述的字段属性分词是默认使用了ik分词器(后续提供按照方式)
ik分词器的安装
ik分词器插件官方地址: https://github.com/infinilabs/analysis-ik
一定要下和es的版本一致的,否则插件导入时会报错
在这个目录下放在新建一个ik文件夹 /usr/share/elasticsearch/plugins
如果你是用的我上面的部署方式,放在挂载目录即可,放入后es日志会输出识别结果,然后重启下es节点容器
可以使用如下命令,查看ik分词器是否安装
POST _analyze
{
"text":"Tom & Viv",
"analyzer": "ik_smart"
}
顺便在下方附一张图片对比es 默认的分词和ik分词的效果
standard 分词器效果
中文会被分词成一个个的字,大写英文字母会被转换为小写字母这一点要注意,后续使用term查询会导致大写查不出来
ik分词效果图
在中文上会支持好些,英文上差距不是很大
向量搜索技术
思考:什么是向量搜索? 为什么要用向量搜索?
1. 搜索效果 : 希望搜索引擎等达到理解我输入的文字背后的意义
2. 输入的方式: 输入方式不在仅仅局限于文本,利用图片等其他方式作为输入方式
3. 关联性: 能在输入的主题领域里或者上下文都能产生关联
未完待续