一、filebeat
vim filebeat.yml
###
filebeat.inputs:
- type: log
enabled: true ### 开启功能
paths:
- /opt/Web/log/word-*.log ### 监控路径
fields:
type: Pre-word ### 设置类型,打个标签
fields_under_root: true ### 顶级字段
include_lines: ['Download start','Download end'] ### 日志中包含这些字段的,都会被筛选进入kafka
output.kafka:
hosts: ["10.0.0.1:9092","10.0.0.2:9092","10.0.0.3:9092"]
topic: "%{[type]}" ### 根据类型或者打的标签,作为kafka的topic
partition.hash:
reachable_only: false
hash: ['type']
setup.template.settings:
index.number_of_shards: 3
index.number_of_replicas: 1
启动:nohup /etc/filebeat/filebeat -e -c /etc/filebeat/filebeat.yml &
二、kafka
vim /opt/kafka/config/server.properties
broker.id=1
listeners=PLAINTEXT://10.0.0.1:9092 ### 另外的节点修改IP
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/data/kafka/kafka-logs ### 自定义日志存放路径
num.partitions=2
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=120
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
offsets.topic.num.partitions=50
offsets.topic.replication.factor=2
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
default.replication.factor=2
replica.fetch.bytes=5048576
delete.topic.enable=true
zookeeper.connect=10.0.0.1:2181,10.0.0.2:2181,10.0.0.3:2181 ### zookeeper集群
zookeeper.connection.timeout.ms=30000
zookeeper.session.timeout.ms=30000
group.initial.rebalance.delay.ms=0
查看topic内容:kafka-console-consumer.sh --bootstrap-server 10.0.0.1:9092 --topic Pre-word
三、logstash
vim /opt/logstash/config/conf.d/word.conf
###
input {
kafka {
bootstrap_servers => '10.0.0.1:9092,10.0.0.2:9092,10.0.0.3:9092'
group_id => 'Pre-word'
client_id => 'Pre-word-' ### 自定义,多台logstash实例消费同一个topics时,client_id需要指定不同的名字,没有指定会自主生成不同的名字
topics => "Pre-word"
codec => "json"
auto_offset_reset => "latest" ### 最新数据
consumer_threads => 2 ### 设置与副本数量相同
decorate_events => "true"
type => 'Pre-Raysync'
}
}
filter {
if [type] == "Pre-word" {
if "Download start" in [message] {
grok {
match => { "message" => "%{WORD:Date}\s+%{TIME:time}\s+(?<Threadid>\d+)\s+%{DATA:component}\]\s+\[%{DATA:LoginStatusId}\]\s+(?<STATUS>\S+\s+\S+)\.\s+(?<ID>\S+=\d+)\s+(?<FilePath>\S+=\S+)\s+(?<downloadStartPos>\S+=\d+)\s+(?<downloadLength>\S+=\d+)" }
}
mutate {
remove_field => ["@version"] ### 移除字段
}
}
else if "Download end" in [message] { ### 用的是else if
grok {
match => { "message" => "%{WORD:data}\s+%{TIME:time}\s+(?<Threadid>\d+)\s+%{DATA:component}\]\s+\[(?<LoginStatusId>\d+)\]\s+(?<status>\S+\s+\S+)\.\s+(?<id>\S+=\d+)\s+(?<FilePath>\S+=\S+)\s+(?<error_code>\S+=\d+)\s+(?<Seek>\S+=\d+)\s+(?<ReadSize>\S+=\d+)\s+(?<hash>\S+)" }
}
mutate {
remove_field => ["@version"]
}
}
else { ### 都没匹配到情况
drop {}
}
}
}
output {
if "Pre-word" in [type] {
elasticsearch {
hosts => ["10.10.0.1:9200", "10.10.0.2:9200","10.10.0.3:9200"]
user => "elastic"
password => "es@@123"
index => "pre-word-%{+YYYY.MM.dd}" ### 必须是小写
}
}
}
检查配置文件:/opt/logstash/bin/logstash -f /opt/logstash/config/conf.d/word.conf -t