logstash通过模板向指定索引传输数据

最新推荐文章于 2023-04-24 11:59:40 发布

Automato

最新推荐文章于 2023-04-24 11:59:40 发布

阅读量992

点赞数

分类专栏： elasticsearch 文章标签： elk

本文链接：https://blog.csdn.net/Automato/article/details/129276343

版权

elasticsearch 专栏收录该内容

3 篇文章

订阅专栏

博客介绍了logstash向es传输数据时，需根据数据文件配置指定pipeline采集数据，给出了logstash的pipeline文件配置和es的模板示例。还提到存在多个pipeline文件时，可统一配置在pipeline.yml文件中，在logstash启动时启动多个pipeline实例。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

背景

在logstash向es传输数据时，需要根据数据文件配置指定pipeline采集数据

实例

logstash的pipeline文件配置

input {
  # input中可以存在多个file输入
  file {
    # 指定日志数据的来源
    path => "/var/log/iotplatform/audit/*/*.log"
    # sicendb_path 记录日志数据的读取，在logstash重启后，根据文件内记录继续读取数据
    sincedb_path => "{{logstash_conf_dir}}/logstash-iotplatform"
    # 在logstash启动后是否从文件开头读取数据，配置为beginning后忽略sicendb_path
    #start_position => beginning
    # 为读取的文件添加type类型
    type => "auditlog"
    # 通配日期开头的文件内容，当不符合时，默认向上合并行
    codec => multiline {
          # Grok pattern names are valid! :)
          pattern => "(?:^|\n)\[*%{TIMESTAMP_ISO8601}\]*"
          negate => true
          what => previous
          auto_flush_interval => 1
    }
  }
  file {
    path => "/var/log/iotplatform/monitor/*/*.log"
    sincedb_path => "{{logstash_conf_dir}}/logstash-iotplatform"
    #start_position => beginning
    type => "monitorlog"
    codec => multiline {
          # Grok pattern names are valid! :)
          pattern => "(?:^|\n)\[*%{TIMESTAMP_ISO8601}\]*"
          negate => true
          what => previous
          auto_flush_interval => 1
    }
  }
}
 
filter {  
    # 通过mutate将字符串分隔，添加字段
    mutate {
      split => {"message" => "|"}
      add_field => { "date" => "%{[message][0]}" }
      add_field => { "platform" => "%{[message][1]}" }
      add_field => { "product" => "%{[message][2]}" }
      add_field => { "module" => "%{[message][3]}" }
      add_field => { "hostAndPort" => "%{[message][4]}" }
      add_field => { "ip" => "%{[message][5]}" }
      add_field => { "region" => "%{[message][6]}" }
      add_field => { "logType" => "%{[message][7]}" }
      add_field => { "logLevel" => "%{[message][8]}" }
      add_field => { "username" => "%{[message][9]}" }
      add_field => { "token" => "%{[message][10]}" }
      add_field => { "uri" => "%{[message][11]}" }
      add_field => { "httpMethod" => "%{[message][12]}" }
      add_field => { "resultCode" => "%{[message][13]}" }
      add_field => { "action" => "%{[message][14]}" }
      add_field => { "extension" => "%{[message][15]}" }
      add_field => { "content" => "%{[message][16]}" }
    }
     # 删除message字段
     mutate{
          remove_field => ["message"]
          # 将resultCode转化为integer类型
          convert => { "resultCode" => "integer" }
     }
     # 转化date的时区
     grok{
          match => [
                   "date", "%{TIMESTAMP_ISO8601:logdate}"
                   ]
     }
     # 定义logdate的时间格式
     date {
          match => ["logdate", "yyyy-MM-dd HH:mm:ss,SSS"]
     }
}
 
output {
  # 通过file中定义的type类型，输出到指定的索引中
  if [type] == "auditlog" {
    elasticsearch {
      # es的地址，例：[localhost:9200,localhost2:9200]
      hosts => [{{elastic_discovery_seed_hosts}}]
      # 输入的索引名称，-%{+YYYY-MM-dd}按天自动建立索引
      index => "iotplatform-audit-%{+YYYY-MM-dd}"
      # 是否使用模板创建索引，在模板中可提前定义索引的字段类型
      manage_template => true
      # 索引模板文件
      template => "{{logstash_conf_dir}}/iotplatform_template.json"
      # 模板在es中定义的名称，可自定义，不必与文件名称相同
      template_name => "iotplatform_template"
      # 在logstash重启后，是否使用模板文件覆盖es中已存在的索引模板
      template_overwrite =>"true"
      # 失败重试次数
      retry_on_conflict => 5
    }
  }
  if [type] == "monitorlog" {
    elasticsearch {
      hosts => [{{elastic_discovery_seed_hosts}}]
      index => "iotplatform-monitor-%{+YYYY-MM-dd}"
      manage_template => true
      template => "{{logstash_conf_dir}}/iotplatform_template.json"
      template_name => "iotplatform_template"
      template_overwrite =>"true"
      retry_on_conflict => 5
    }
  }
}

es的模板示例：

{
    # 模板通配的索引类型，在创建以iotplatform_template为开头的索引时使用该模板
    "index_patterns" : "iotplatform_template*",
    # 自定义配置
    "settings" : {
      "index" : {
        # 搜索返回结果数最大10000条数据设置
        "max_result_window" : "10000",
        # 索引刷新时间，默认1s
        "refresh_interval" : "1s",
        # 指索引要做多少个分片，只能在创建索引时指定，后期无法修改
        "number_of_shards" : "1",
        # 每个分片有多少个副本，后期可以动态修改
        "number_of_replicas" : "0",
        "blocks" : {
          # 索引是否开启只读
          "read_only_allow_delete" : "false"
        }
      }
    },
    # 索引字段映射
    # 在索引创建完成后，无法直接更新字段，只能通过重建索引来更新
    "mappings" : {
      "properties" : {
        "date" : {
          "type" : "text"
        },
        "logType" : {
          "type" : "keyword"
        },
        "product" : {
          "type" : "keyword"
        },
        "extension" : {
          "type" : "text"
        },
        "module" : {
          "type" : "keyword"
        },
        "ip" : {
          "type" : "keyword"
        },
        "hostAndPort" : {
          "type" : "keyword"
        },
        "resultCode" : {
          "type" : "integer"
        },
        "httpMethod" : {
          "type" : "keyword"
        },
        "uri" : {
          "type" : "keyword"
        },
        "platform" : {
          "type" : "keyword"
        },
        "content" : {
          "type" : "text"
        },
        "token" : {
          "type" : "keyword"
        },
        "times" : {
          "type" : "keyword"
        },
        "logLevel" : {
          "type" : "keyword"
        },
        "action" : {
          "type" : "keyword"
        },
        "region" : {
          "type" : "keyword"
        },
        "username" : {
          "type" : "keyword"
        }
      }
    }
}

扩展pipeline.yml

存在多个pipeline文件时，可以将它们统一配置在pipeline.yml文件中，在logstash启动时达成启动多个pipeline实例

#  - pipeline.id: test
#   pipeline.workers: 1
#   pipeline.batch.size: 125
#   pipeline.batch.delay: 50
#   path.config: "/tmp/logstash/*.config"

id 管道名词，唯一
workers  处理线程数（Filter、Output使用）
batch.size  每个线程批处理大小
batch.delay  每个线程批处理事件最大等待时间（默认50ms）
Config 管道引用的配置文件
注：可使用config.string直接配置，例：config.string: "input { generator {} } filter { sleep { time => 1 } } output { stdout { codec => dots } }"

实例：
- pipeline.id: mylostash1
  path.config: "{{logstash_conf_dir}}/{{logstash_conf_name}}"

- pipeline.id: mylostash2
  path.config: "{{logstash_conf_dir}}/{{logstash_dt_conf_name}}"

注：管道之间不能存在空格，即空行要顶格开头