一、Filebeat
### --- Filebeat
~~~ Filebeat主要是为了解决Logstash工具比较消耗资源比较重的问题,
~~~ 因为Logstash是Java语言编写,
~~~ 所以官方推出了一些轻量级的采集工具,推出了Beats系列,其中使用比较广泛的是Filebeat。
~~~ 官方地址:https://www.elastic.co/guide/en/beats/filebeat/7.3/index.html
~~~ 补充:Filebeat与Logstash区别
~~~ Logstash是运行在jvm,资源消耗比较大,
~~~ 启动一个Logstash就需要消耗500M左右的内存(这就是为什么Logstash启动特别慢的原因),
~~~ 而filebeat只需要10来M内存资源。
~~~ 常用的ELK日志采集方案中,
~~~ 大部分的做法就是将所有节点的日志内容通过filebeat发送Kafka集群,
~~~ Logstash消费kafka数据,Logstash根据配置文件进行过滤。
~~~ 然后将过滤之后的文件输送到elasticsearch中,通过kibana去展示。
二、Filebeat安装
### --- hadoop02下载Filebeat
~~~ # 下载filebeat版本包
[root@hadoop02 software]# wget -c https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-7.3.0-linux-x86_64.tar.gz
### --- 解压filebeat版本包
~~~ # 解压filebeat版本包
[root@hadoop02 software]# tar -zxvf filebeat-7.3.0-linux-x86_64.tar.gz -C ../servers/es/
~~~ # 更改版本包名称
[root@hadoop02 ~]# cd /opt/yanqi/servers/es/
[root@hadoop02 es]# mv filebeat-7.3.0-linux-x86_64/ filebeat
### --- 修改配置文件
~~~ # 修改配置文件:收集nginx的access与error日志发送到Kafka中。
[root@hadoop02 ~]# vim /opt/yanqi/servers/es/filebeat/filebeat.yml
~~~ # 写入配置文件内容
filebeat.inputs:
- type: log
paths:
- /usr/local/nginx/logs/access.log
fields:
app: www
type: nginx-access
fields_under_root: true
- type: log
paths:
- /usr/local/nginx/logs/error.log
fields:
app: www
type: nginx-error
fields_under_root: true
output.kafka:
hosts: ["hadoop01:9092","hadoop02:9092","hadoop03:9092"]
topic: "nginx_access_log"
三、验证filebeat服务
### --- 查看nginx输出的JSON日志数据
~~~ # nginx日志输出JSON数据
[root@hadoop02 ~]# tail -f /usr/local/nginx/logs/access.log
{ "@timestamp": "2021-11-27T00:34:33+08:00", "remote_addr": "115.195.145.230", "remote_user": "-", "body_bytes_sent": "0", "request_time": "0.000", "status": "304", "request_uri": "/", "request_method": "GET", "http_referrer": "-", "http_x_forwarded_for": "-", "http_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36"}
### --- 启动kafka服务
~~~ # hadoop01~03:启动zookeeper服务
[root@hadoop02 ~]# ./zk.sh start
~~~ # 启动kafka服务:建议前台启动,可以查看打印输出日志
[root@hadoop02 ~]# kafka-server-start.sh -daemon /opt/yanqi/servers/kafka_2.12/config/server.properties
### --- 在kafka中创建主题:并启动消费者
~~~ # 创建kafka.topic主题
[root@hadoop02 ~]# kafka-topics.sh --create --zookeeper hadoop01:2181,hadoop02:2181,hadoop03:2181/myKafka --replication-factor 1 --partitions 3 --topic nginx_access_log
~~~ # 查看创建的主题
[root@hadoop02 ~]# kafka-topics.sh --zookeeper localhost:2181/myKafka --list
nginx_access_log
~~~ # 启动kafka消费者
[root@hadoop02 ~]# kafka-console-consumer.sh --bootstrap-server hadoop01:9092,hadoop02:9092,hadoop03:9092 --topic nginx_access_log --from-beginning
~~~ # 等待消费数据:
{"@timestamp":"2021-11-26T16:34:38.405Z","@metadata":{"beat":"filebeat","type":"_doc","version":"7.3.0","topic":"nginx_access_log"},"type":"nginx-access","app":"www","ecs":{"version":"1.0.1"},"host":{"name":"hadoop02"},"agent":{"type":"filebeat","ephemeral_id":"3f5b5145-dc50-4a62-a8a1-e43e61800d49","hostname":"hadoop02","id":"dfbd0913-f1fa-4b28-a120-2e9575d387fa","version":"7.3.0"},"log":{"offset":9384,"file":{"path":"/usr/local/nginx/logs/access.log"}},"message":"{ \"@timestamp\": \"2021-11-27T00:34:33+08:00\", \"remote_addr\": \"115.195.145.230\", \"remote_user\": \"-\", \"body_bytes_sent\": \"0\", \"request_time\": \"0.000\", \"status\": \"304\", \"request_uri\": \"/\", \"request_method\": \"GET\", \"http_referrer\": \"-\", \"http_x_forwarded_for\": \"-\", \"http_user_agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36\"}","input":{"type":"log"}}
### --- 启动filebeat
~~~ # 启动filebeat服务
[root@hadoop02 ~]# /opt/yanqi/servers/es/filebeat/filebeat -e -c /opt/yanqi/servers/es/filebeat/filebeat.yml
~~~ # 查看打印的nginx日志数据:
2021-11-27T00:34:51.398+0800 INFO [monitoring] log/log.go:145 Non-zero metrics in the last 30s {"monitoring": {"metrics": {"beat":{"cpu":{"system":{"ticks":40,"time":{"ms":2}},"total":{"ticks":100,"time":{"ms":8},"value":100},"user":{"ticks":60,"time":{"ms":6}}},"handles":{"limit":{"hard":65535,"soft":65535},"open":10},"info":{"ephemeral_id":"3f5b5145-dc50-4a62-a8a1-e43e61800d49","uptime":{"ms":240023}},"memstats":{"gc_next":4955232,"memory_alloc":3891728,"memory_total":14355280,"rss":307200},"runtime":{"goroutines":49}},"filebeat":{"events":{"added":1,"done":1},"harvester":{"open_files":1,"running":1}},"libbeat":{"config":{"module":{"running":0}},"output":{"events":{"acked":1,"batches":1,"total":1}},"outputs":{"kafka":{"bytes_read":60,"bytes_write":700}},"pipeline":{"clients":2,"events":{"active":0,"published":1,"total":1},"queue":{"acked":1}}},"registrar":{"states":{"current":2,"update":1},"writes":{"success":1,"total":1}},"system":{"load":{"1":0.08,"15":0.06,"5":0.07,"norm":{"1":0.04,"15":0.03,"5":0.035}}}}}}2021-11-27T00:34:51.398+0800 INFO [monitoring] log/log.go:145 Non-zero metrics in the last 30s {"monitoring": {"metrics": {"beat":{"cpu":{"system":{"ticks":40,"time":{"ms":2}},"total":{"ticks":100,"time":{"ms":8},"value":100},"user":{"ticks":60,"time":{"ms":6}}},"handles":{"limit":{"hard":65535,"soft":65535},"open":10},"info":{"ephemeral_id":"3f5b5145-dc50-4a62-a8a1-e43e61800d49","uptime":{"ms":240023}},"memstats":{"gc_next":4955232,"memory_alloc":3891728,"memory_total":14355280,"rss":307200},"runtime":{"goroutines":49}},"filebeat":{"events":{"added":1,"done":1},"harvester":{"open_files":1,"running":1}},"libbeat":{"config":{"module":{"running":0}},"output":{"events":{"acked":1,"batches":1,"total":1}},"outputs":{"kafka":{"bytes_read":60,"bytes_write":700}},"pipeline":{"clients":2,"events":{"active":0,"published":1,"total":1},"queue":{"acked":1}}},"registrar":{"states":{"current":2,"update":1},"writes":{"success":1,"total":1}},"system":{"load":{"1":0.08,"15":0.06,"5":0.07,"norm":{"1":0.04,"15":0.03,"5":0.035}}}}}}
附录一:输出数据通过JSON格式化之后格式
### --- nginx输出的日志JSON格式数据
~~~ # nginx日志输出JSON数据
[root@hadoop02 ~]# tail -f /usr/local/nginx/logs/access.log
{
"@timestamp": "2021-11-27T00:34:33+08:00",
"remote_addr": "115.195.145.230",
"remote_user": "-",
"body_bytes_sent": "0",
"request_time": "0.000",
"status": "304",
"request_uri": "/",
"request_method": "GET",
"http_referrer": "-",
"http_x_forwarded_for": "-",
"http_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36"
}
### --- kafka.topic消费者消费数据JSON格式化之后形式
~~~ # kafka消费者输出JSON数据
{
"@timestamp": "2021-11-26T16:34:38.405Z",
"@metadata": {
"beat": "filebeat",
"type": "_doc",
"version": "7.3.0",
"topic": "nginx_access_log"
},
"type": "nginx-access",
"app": "www",
"ecs": {
"version": "1.0.1"
},
"host": {
"name": "hadoop02"
},
"agent": {
"type": "filebeat",
"ephemeral_id": "3f5b5145-dc50-4a62-a8a1-e43e61800d49",
"hostname": "hadoop02",
"id": "dfbd0913-f1fa-4b28-a120-2e9575d387fa",
"version": "7.3.0"
},
"log": {
"offset": 9384,
"file": {
"path": "/usr/local/nginx/logs/access.log"
}
},
"message": "{ \"@timestamp\": \"2021-11-27T00:34:33+08:00\", \"remote_addr\": \"115.195.145.230\", \"remote_user\": \"-\", \"body_bytes_sent\": \"0\", \"request_time\": \"0.000\", \"status\": \"304\", \"request_uri\": \"/\", \"request_method\": \"GET\", \"http_referrer\": \"-\", \"http_x_forwarded_for\": \"-\", \"http_user_agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36\"}",
"input": {
"type": "log"
}
}
### --- filebeat输出数据JSON格式化之后形式
~~~ # filebeat输出JSON数据
{
"monitoring": {
"metrics": {
"beat": {
"cpu": {
"system": {
"ticks": 40,
"time": {
"ms": 2
}
},
"total": {
"ticks": 100,
"time": {
"ms": 8
},
"value": 100
},
"user": {
"ticks": 60,
"time": {
"ms": 6
}
}
},
"handles": {
"limit": {
"hard": 65535,
"soft": 65535
},
"open": 10
},
"info": {
"ephemeral_id": "3f5b5145-dc50-4a62-a8a1-e43e61800d49",
"uptime": {
"ms": 240023
}
},
"memstats": {
"gc_next": 4955232,
"memory_alloc": 3891728,
"memory_total": 14355280,
"rss": 307200
},
"runtime": {
"goroutines": 49
}
},
"filebeat": {
"events": {
"added": 1,
"done": 1
},
"harvester": {
"open_files": 1,
"running": 1
}
},
"libbeat": {
"config": {
"module": {
"running": 0
}
},
"output": {
"events": {
"acked": 1,
"batches": 1,
"total": 1
}
},
"outputs": {
"kafka": {
"bytes_read": 60,
"bytes_write": 700
}
},
"pipeline": {
"clients": 2,
"events": {
"active": 0,
"published": 1,
"total": 1
},
"queue": {
"acked": 1
}
}
},
"registrar": {
"states": {
"current": 2,
"update": 1
},
"writes": {
"success": 1,
"total": 1
}
},
"system": {
"load": {
"1": 0.08,
"5": 0.07,
"15": 0.06,
"norm": {
"1": 0.04,
"5": 0.035,
"15": 0.03
}
}
}
}
}
}