es–基础–7.2.3–跨机房灾备–实践–借助消息队列实现双写
备注
这个实验我没有弄,因为下载不了容器
参考内容:https://cloud.tencent.com/developer/article/1928886
1、环境准备
中间件 | 版本 | 备注 |
---|---|---|
Docker | 20.10.12 | 无 |
Docker Compose | 1.29.2 | 无 |
kibana | 7.16.2 | 无 |
elasticsearch | 7.16.2 | 无 |
logstash | 7.16.2 | 无 |
zookeeper | latest | 无 |
1.1、创建docker-compose.yml
mkdir -p /tmp/mq
cd /tmp/mq
vim docker-compose.yml
内容
version: '3.8'
services:
# 集群 cluster01
# Elasticsearch
es01:
image: docker.elastic.co/elasticsearch/elasticsearch:7.16.2
container_name: es01
environment:
# 节点名
- node.name=es01
# 集群名
- cluster.name=cluster01
# 指定单节点启动
- discovery.type=single-node
# 开启内存锁定
- bootstrap.memory_lock=true
# 设置内存大小
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
# 启用安全
- xpack.security.enabled=true
# 设置 elastic 用户密码
- ELASTIC_PASSWORD=test123
ulimits:
memlock:
soft: -1
hard: -1
# 映射到主机名的端口 宿主机端口:容器端口
ports:
- 9200:9200
volumes:
- data01:/usr/share/elasticsearch/data
networks:
- elastic
# Kibana
kib01:
image: docker.elastic.co/kibana/kibana:7.16.2
container_name: kib01
ports:
- 5601:5601
environment:
# Elasticsearch 连接信息
ELASTICSEARCH_URL: http://es01:9200
ELASTICSEARCH_HOSTS: '["http://es01:9200"]'
ELASTICSEARCH_USERNAME: elastic
ELASTICSEARCH_PASSWORD: test123
networks:
- elastic
# 集群 cluster2
es02:
image: docker.elastic.co/elasticsearch/elasticsearch:7.16.2
container_name: es02
environment:
- node.name=es02
- cluster.name=cluster02
- discovery.type=single-node
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
- xpack.security.enabled=true
- ELASTIC_PASSWORD=test123
ulimits:
memlock:
soft: -1
hard: -1
ports:
- 9201:9200
volumes:
- data02:/usr/share/elasticsearch/data
networks:
- elastic
kib02:
image: docker.elastic.co/kibana/kibana:7.16.2
container_name: kib02
ports:
- 5602:5601
environment:
ELASTICSEARCH_URL: http://es02:9200
ELASTICSEARCH_HOSTS: '["http://es02:9200"]'
ELASTICSEARCH_USERNAME: elastic
ELASTICSEARCH_PASSWORD: test123
networks:
- elastic
zookeeper:
image: bitnami/zookeeper:latest
container_name: zookeeper
user: root
ports:
- 2181:2181
environment:
- ALLOW_ANONYMOUS_LOGIN=yes
networks:
- elastic
# 消息队列
kafka:
image: bitnami/kafka:latest
container_name: kafka
user: root
ports:
- 9092:9092
environment:
- KAFKA_BROKER_ID=1
- KAFKA_CFG_LISTENERS=PLAINTEXT://:9092
- KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092
- KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181
- ALLOW_PLAINTEXT_LISTENER=yes
depends_on:
- zookeeper
networks:
- elastic
# 消费者
logstash01:
image: docker.elastic.co/logstash/logstash:7.16.2
container_name: logstash01
ports:
- 9600:9600
volumes:
- ./logstash-cluster01.conf:/usr/share/logstash/pipeline/logstash.conf
networks:
- elastic
logstash02:
image: docker.elastic.co/logstash/logstash:7.16.2
container_name: logstash02
ports:
- 9601:9600
volumes:
- ./logstash-cluster02.conf:/usr/share/logstash/pipeline/logstash.conf
networks:
- elastic
# 存储卷
volumes:
data01:
driver: local
data02:
driver: local
# 网络
networks:
elastic:
driver: bridge
这里使用 Logstash 来消费 Kafka 集群中的数据,然后写入 Elasticsearch 集群。集群 cluster01 和 cluster02 分别对应一组 Logstash,使用不同的消费者组id。
Logstash 的配置文件如下:
logstash-cluster01.conf
input {
kafka {
codec => "json"
topics => ["index-1"]
group_id => "logstash_cluster01"
consumer_threads => 1
bootstrap_servers => "kafka:9092"
}
}
output {
elasticsearch {
hosts => ["es01:9200"]
index => "index-1"
user => "elastic"
password => "test123"
}
}
1.2、启动docker-compose.yml
cd /tmp/mq
docker-compose up -d
1.3、查看容器状态
$ docker-compose ps
Name Command State Ports
----------------------------------------------------------------------------------------------------------------------------
es01 /bin/tini -- /usr/local/bi ... Up 0.0.0.0:9200->9200/tcp,:::9200->9200/tcp, 9300/tcp
es02 /bin/tini -- /usr/local/bi ... Up 0.0.0.0:9201->9200/tcp,:::9201->9200/tcp, 9300/tcp
kafka /opt/bitnami/scripts/kafka ... Up 0.0.0.0:9092->9092/tcp,:::9092->9092/tcp
kib01 /bin/tini -- /usr/local/bi ... Up 0.0.0.0:5601->5601/tcp,:::5601->5601/tcp
kib02 /bin/tini -- /usr/local/bi ... Up 0.0.0.0:5602->5601/tcp,:::5602->5601/tcp
logstash01 /usr/local/bin/docker-entr ... Up 5044/tcp, 0.0.0.0:9600->9600/tcp,:::9600->9600/tcp
logstash02 /usr/local/bin/docker-entr ... Up 5044/tcp, 0.0.0.0:9601->9600/tcp,:::9601->9600/tcp
zookeeper /opt/bitnami/scripts/zooke ... Up 0.0.0.0:2181->2181/tcp,:::2181->2181/tcp, 2888/tcp, 3888/tcp, 8080/tcp
2、创建 Topic
2.1、使用 exec 命令进入 Kafka 容器中
docker-compose exec kafka bash
2.2、创建 Topic index-1 ,Logstash 将会消费这个 Topic 的消息并将数据写入 Elasticsearch 的 index-1 索引中
kafka-topics.sh --bootstrap-server 127.0.0.1:9092 \
--create --topic index-1 --partitions 1 --replication-factor 1
2.3、查看创建的 Topic
# 列出 topic
kafka-topics.sh --bootstrap-server 127.0.0.1:9092 --list
# 返回结果
index-1
# 查看 topic index-1 分区和副本信息
kafka-topics.sh --bootstrap-server 127.0.0.1:9092 \
--describe --topic index-1
# 返回结果
Topic: index-1 TopicId: ppi1f_loTn2p6BEGjM640w PartitionCount: 1 ReplicationFactor: 1 Configs: segment.bytes=1073741824
Topic: index-1 Partition: 0 Leader: 1 Replicas: 1 Isr: 1
3、验证同步
3.1、往 Topic index-1 中写入 3 条消息
kafka-console-producer.sh --bootstrap-server 127.0.0.1:9092 \
--topic index-1
# 输入 JSON 格式的数据
> {"name":"tom", "age": 18}
> {"name":"mike", "age": 19}
> {"name":"lisa", "age": 20}
通过 Kibana 分别在 cluster01 和 cluster02 集群上查看索引 index-1 的数据,可以看到 Logstash 成功从 Kafka 消费消息并写入 Elasticsearch。
GET index-1/_search
# 返回结果
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "Oa09CX4BZrgW9YGKOkMg",
"_score" : 1.0,
"_source" : {
"@version" : "1",
"@timestamp" : "2021-12-30T02:48:34.242Z",
"name" : "tom",
"age" : 18
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "Oq09CX4BZrgW9YGKRkO3",
"_score" : 1.0,
"_source" : {
"@version" : "1",
"@timestamp" : "2021-12-30T02:48:37.708Z",
"name" : "mike",
"age" : 19
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "O609CX4BZrgW9YGKWEPF",
"_score" : 1.0,
"_source" : {
"@version" : "1",
"@timestamp" : "2021-12-30T02:48:42.330Z",
"name" : "lisa",
"age" : 20
}
}
]
}
}
4、故障测试
4.1、模拟 Elasticsearch 集群故障
docker-compose stop es01
接着继续往 Kafka 中写入 2 条消息。
kafka-console-producer.sh --bootstrap-server 127.0.0.1:9092 \
--topic index-1
# 输入 JSON 格式的数据
> {"name":"peter", "age": 18}
> {"name":"frank", "age": 19}
此时 es01 已经无法正常访问。
此时查看 logstash01 的日志,可以看到连接 es01 报错的信息。
$ docker-compose logs logstash01
logstash01 | [2021-12-30T03:03:26,075][WARN ][logstash.outputs.elasticsearch][main] Attempted to resurrect connection to dead ES instance, but got an error {:url=>"http://elastic:xxxxxx@es01:9200/", :exception=>LogStash::Outputs::ElasticSearch::HttpClient::Pool::HostUnreachableError, :message=>"Elasticsearch Unreachable: [http://elastic:xxxxxx@es01:9200/][Manticore::SocketException] Connect to es01:9200 [es01/192.168.32.4] failed: Connection refused (Connection refused)"}
logstash01 | [2021-12-30T03:03:26,959][ERROR][logstash.outputs.elasticsearchmonitoring][.monitoring-logstash][d1482d954dd93478867963ea3f24534a09f4484fd64cc67ae3933128a42d4838] Attempted to send a bulk request but there are no living connections in the pool (perhaps Elasticsearch is unreachable or down?) {:message=>"No Available connections", :exception=>LogStash::Outputs::ElasticSearch::HttpClient::Pool::NoConnectionAvailableError, :will_retry_in_seconds=>64}
查询集群 cluster02 可以看到数据可以正常写入,不受影响。
{
"took" : 694,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "Oa09CX4BZrgW9YGKOkMg",
"_score" : 1.0,
"_source" : {
"@version" : "1",
"@timestamp" : "2021-12-30T02:48:34.242Z",
"name" : "tom",
"age" : 18
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "Oq09CX4BZrgW9YGKRkO3",
"_score" : 1.0,
"_source" : {
"@version" : "1",
"@timestamp" : "2021-12-30T02:48:37.708Z",
"name" : "mike",
"age" : 19
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "O609CX4BZrgW9YGKWEPF",
"_score" : 1.0,
"_source" : {
"@version" : "1",
"@timestamp" : "2021-12-30T02:48:42.330Z",
"name" : "lisa",
"age" : 20
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "Pa1GCX4BZrgW9YGK2UOd",
"_score" : 1.0,
"_source" : {
"@version" : "1",
"@timestamp" : "2021-12-30T02:59:05.139Z",
"name" : "peter",
"age" : 18
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "Pq1GCX4BZrgW9YGK7EMI",
"_score" : 1.0,
"_source" : {
"@version" : "1",
"@timestamp" : "2021-12-30T02:59:09.854Z",
"name" : "frank",
"age" : 19
}
}
]
}
}
现在我们重新启动 es01,将会重试之前写入 es01 失败的数据。
docker-compose start es01
在集群 cluster01 上查询数据,可以看到在集群宕机期间产生的 2 条数据也成功追加进来了。
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 6,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "lrROCX4BVL35GcLwNcG1",
"_score" : 1.0,
"_source" : {
"age" : 19,
"@version" : "1",
"name" : "frank",
"@timestamp" : "2021-12-30T03:07:07.466Z"
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "lLRLCX4BVL35GcLws8Hi",
"_score" : 1.0,
"_source" : {
"age" : 18,
"@version" : "1",
"name" : "peter",
"@timestamp" : "2021-12-30T02:59:05.139Z"
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "tv49CX4BAcbk4YbwWMvF",
"_score" : 1.0,
"_source" : {
"age" : 20,
"@version" : "1",
"name" : "lisa",
"@timestamp" : "2021-12-30T02:48:42.330Z"
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "tf49CX4BAcbk4YbwRsu4",
"_score" : 1.0,
"_source" : {
"age" : 19,
"@version" : "1",
"name" : "mike",
"@timestamp" : "2021-12-30T02:48:37.708Z"
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "tP49CX4BAcbk4YbwOsse",
"_score" : 1.0,
"_source" : {
"age" : 18,
"@version" : "1",
"name" : "tom",
"@timestamp" : "2021-12-30T02:48:34.243Z"
}
}
]
}
}
4.2、模拟消费者故障
为了方便我们观察,先清空 cluster01 和 cluster02 集群上 index-1 索引中的数据。
POST index-1/_delete_by_query
{
"query": {
"match_all": {}
}
}
接下来模拟消费者故障的场景,停止 logstash01。
docker-compose stop logstash01
往 Kafka 中写入 3 条消息。
kafka-console-producer.sh --bootstrap-server 127.0.0.1:9092 \
--topic index-1
# 输入 JSON 格式的数据
> {"name":"cris", "age": 16}
> {"name":"james", "age": 17}
> {"name":"kavin", "age": 18}
此时查询 cluster01 集群的索引 index-1 是没有数据的,因为 logstash01 已经停止工作。
GET index-1/_search
# 返回结果
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
cluster02 集群上的索引 index-1 数据正常写入。
GET index-1/_search
# 返回结果
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "Qq11CX4BZrgW9YGKTUNS",
"_score" : 1.0,
"_source" : {
"@version" : "1",
"@timestamp" : "2021-12-30T03:49:49.228Z",
"name" : "cris",
"age" : 16
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "Q611CX4BZrgW9YGKX0O_",
"_score" : 1.0,
"_source" : {
"@version" : "1",
"@timestamp" : "2021-12-30T03:49:54.135Z",
"name" : "james",
"age" : 17
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "RK11CX4BZrgW9YGKb0N_",
"_score" : 1.0,
"_source" : {
"@version" : "1",
"@timestamp" : "2021-12-30T03:49:58.166Z",
"name" : "kavin",
"age" : 18
}
}
]
}
}
此时分别查看 logstash01 和 logstash02 在 Kafka 上的消费情况。logstash01 所属的消费者组 logstash_cluster01 中已经没有活跃的成员了,并且剩余未消费的消息数(LAG)为 3,这 3 条就是我们在停止 logstash01 后写入的 3 条消息;logstash_cluster02 消费者组已经成功消费了所有的消息。
# 进入 Kafka 容器
$ docker-compose exec kafka bash
# 查看消费者组
root@e23acd56e9e2:/# kafka-consumer-groups.sh --bootstrap-server 127.0.0.1:9092 --list
# 两个 logstash 分别属于不同的消费者组
logstash_cluster01
logstash_cluster02
# 查看 logstash01 的消费情况
root@e23acd56e9e2:/# kafka-consumer-groups.sh --bootstrap-server 127.0.0.1:9092 --describe --group logstash_cluster01
Consumer group 'logstash_cluster01' has no active members.
GROUP TOPIC PARTITION CURRENT-OFFSET LOG-END-OFFSET LAG CONSUMER-ID HOST CLIENT-ID
logstash_cluster01 index-1 0 5 8 3 - - -
# 查看 logstash02 的消费情况
root@e23acd56e9e2:/# kafka-consumer-groups.sh --bootstrap-server 127.0.0.1:9092 --describe --group logstash_cluster02
GROUP TOPIC PARTITION CURRENT-OFFSET LOG-END-OFFSET LAG CONSUMER-ID HOST CLIENT-ID
logstash_cluster02 index-1 0 8 8 0 logstash-0-0925ce14-66c9-42a8-ae19-5d8186afada3 /192.168.32.5 logstash-0
现在我们重新启动 logstash01,logstash01 会根据在 Kafka 中消费的 offset 继续消费消息 。
docker-compose start logstash01
查看 cluster01 上的数据,可以看到 logstash01 已经将这剩余的 3 条数据写入 Elasticsearch 中了。
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "N6WCCX4BEydPylz69S3g",
"_score" : 1.0,
"_source" : {
"age" : 16,
"name" : "cris",
"@version" : "1",
"@timestamp" : "2021-12-30T04:04:44.503Z"
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "NaWCCX4BEydPylz69S3f",
"_score" : 1.0,
"_source" : {
"age" : 18,
"name" : "kavin",
"@version" : "1",
"@timestamp" : "2021-12-30T04:04:44.527Z"
}
},
{
"_index" : "index-1",
"_type" : "_doc",
"_id" : "NqWCCX4BEydPylz69S3g",
"_score" : 1.0,
"_source" : {
"age" : 17,
"name" : "james",
"@version" : "1",
"@timestamp" : "2021-12-30T04:04:44.526Z"
}
}
]
}
}
5、清理现场
用于后续的测试
cd /tmp/mq
# -v 参数表示删除 volume
docker-compose down -v