数据流程图如下:
一、配置Filebeat
编辑filebeat.yml文件,内容如下:
# ====================== 输入 ===============================
filebeat.inputs:
- type: log
enabled: true
paths:
- /data/log/hadoop-yarn/hadoop-cmf-yarn-NODEMANAGER*.out
- /data/yarn/container-logs/application_*/container_*/std*
# ====================== 输出 ===============================
output.kafka:
enabled: true
hosts: ["ip01:9092", "ip02:9092"]
topic: 'topic01'
kerberos.enabled: true
kerberos.auth_type: "keytab"
kerberos.keytab: "xxx.keytab"
kerberos.config_path: "/etc/krb5.conf"
kerberos.realm: "xxx.COM"
kerberos.service_name: "kafka"
kerberos.username: "xxx"
启动Filebeat,执行命令 ./filebeat -e -c filebeat.yml
二、配置Logstash
编辑logstash.conf,内容如下:
# ***********************************************************************************
# logstash 执行规则配置
# ***********************************************************************************
# ============================= 输入 ===================================
input {
kafka {
id => "kafka-input-mot"
bootstrap_servers => "ip01:9092,ip02:9092"
group_id => "logstash-group-1"
topics => ["mot","test"]
jaas_path => "/opt/logstash-7.9.3/jaas.conf"
kerberos_config => "/etc/krb5.conf"
security_protocol => "SASL_PLAINTEXT"
sasl_kerberos_service_name => "kafka"
add_field => {"piplinetype" => "mot"}
}
kafka {
id => "kafka-input-bigdatalog"
bootstrap_servers => "ip01:9092,ip02:9092"
group_id => "logstash-group-2"
topics => ["topic01"]
jaas_path => "/opt/logstash-7.9.3/jaas.conf"
kerberos_config => "/etc/krb5.conf"
security_protocol => "SASL_PLAINTEXT"
sasl_kerberos_service_name => "kafka"
add_field => {"piplinetype" => "topic01"}
}
}
# ============================= 过滤 ====================================
filter {
json {source => "message"}
if [piplinetype] == "mot" {
mutate {
remove_field => [ "message" ]
}
}
}
# ============================= 输出 ====================================
# -----------------------------------------------------------------------
output {
if [piplinetype] == "mot" { # mot数据输出到es
elasticsearch{
hosts=>["ip01:9200","ip02:9200"]
index => "mot"
}
}else if [piplinetype] == "bigdatalog" { # yarn日志输出到es
if [log][file][path] =~ "/data/log/hadoop-yarn/hadoop-cmf-yarn-NODEMANAGER.*\.out" {
elasticsearch{
hosts=>["ip01:9200","ip02:9200"]
index => "bigdatalog-yarn-nm"
}
}else if [log][file][path] =~ "/data/yarn/container-logs/application_.*/container_.*/stdout" {
elasticsearch{
hosts=>["ip01:9200","ip02:9200"]
index => "bigdatalog-yarn-application-out"
}
}else if [log][file][path] =~ "/data/yarn/container-logs/application_.*/container_.*/stderr" {
elasticsearch{
hosts=>["ip01:9200","ip02:9200"]
index => "bigdatalog-yarn-application-err"
}
}
}
}