3.14-filebeat.yml配置详解

最新推荐文章于 2024-07-24 16:54:22 发布
疯狂学习的白菜
最新推荐文章于 2024-07-24 16:54:22 发布
阅读量2.2k
点赞数 4
分类专栏： fiilebeat
本文链接：https://blog.csdn.net/xcvbxv01/article/details/88567989
版权
fiilebeat 专栏收录该内容
2 篇文章 0 订阅
订阅专栏
一.filebeat.yml 配置详解
------------------------------------
filebeat:
  prospectors:
    -
      # 指定要监控的日志，可以指定具体得文件或者目录
      # Paths that should be crawled and fetched. Glob based paths.
      paths:
        - /var/log/*.log


      # 指定被监控的文件的编码类型，使用plain和utf-8都是可以处理中文日志的
      # Some sample encodings:
      #   plain, utf-8, utf-16be-bom, utf-16be, utf-16le, big5, gb18030, gbk,
      #    hz-gb-2312, euc-kr, euc-jp, iso-2022-jp, shift-jis, ...
      encoding: plain


# 指定文件的输入类型log(默认)或者stdin
      # Possible options are:
      # * log: Reads every line of the log file (default)
      # * stdin: Reads the standard in
      input_type: log


# 包含输入中符合正则表达式列表的那些行（默认包含所有行）
# include_lines执行完毕之后会执行exclude_lines
      include_lines: ["^ERR", "^WARN"]


# 在输入中排除符合正则表达式列表的那些行（过滤掉符合条件行）
      exclude_lines. By default, no lines are dropped.
      exclude_lines: ["^DBG"]


      # 忽略掉符合正则表达式列表的文件
      exclude_files: [".gz$"]



      # 向输出的每一条日志添加额外的信息，比如“level:debug”，方便后续对日志进行分组统计。
      # 默认情况下，会在输出信息的fields子目录下以指定的新增fields建立子目录，例如fields.level
      # 这个得意思就是会在es中多添加一个字段，格式为 "filelds":{"level":"debug"}
      fields:
        level: debug
        review: 1



      # 如果该选项设置为true，则新增fields成为顶级目录，而不是将其放在fields目录下。
      # 自定义的field会覆盖filebeat默认的field
      # 如果设置为true，则在es中新增的字段格式为："level":"debug"
      fields_under_root: false



      # Filebeat忽略，指定时间段以外修改的日志的内容，比如2h（两个小时）或者5m(5分钟)。
      ignore_older: 2h


      # 如果一个文件在某个时间段内没有发生过更新，则关闭监控的文件handle。默认1h
      close_older: 1h


      # 设定Elasticsearch输出时的document的type字段 可以用来给日志进行分类。Default: log
      document_type: log



      # Filebeat以多快的频率去prospector指定的目录下面检测文件更新（比如是否有新增文件）
      # 如果设置为0s，则Filebeat会尽可能快地感知更新（占用的CPU会变高）。默认是10s
      scan_frequency: 10s



      # 每个harvester监控文件时，使用的buffer的大小
      harvester_buffer_size: 16384


# 最大日志时间字节数
      # 日志文件中增加一行算一个日志事件，max_bytes限制在一次日志事件中最多上传的字节数，多出的字节会被丢弃
      max_bytes: 10485760



# 多行日志合并为单条日志
      # 适用于日志中每一条日志占据多行的情况，比如各种语言的报错信息调用栈
      multiline:
        # 多行日志开始的那一行匹配的pattern（例如：以'['开始）
        pattern: ^\[
        # 是否需要对pattern条件转置使用，不翻转设为true，反转设置为false。  【建议设置为true】
        negate: true
        # 匹配pattern后，与前面（before）还是后面（after）的内容合并为一条日志
        match: after
  # 合并的最多行数（包含匹配pattern的那一行，默认500）
        max_lines: 500
  # 多行合并发送事件最大时长
        # Default is 5s.
        # 到了timeout之后，即使没有匹配一个新的pattern（发生一个新的事件），也把已经匹配的日志事件发送出去
        timeout: 5s

      # 如果设置为true，Filebeat从文件尾开始监控文件新增内容，把新增的每一行文件作为一个事件依次发送，
      # 而不是从文件开始处重新发送所有内容
      tail_files: false

   # 每 1 秒检查一次
      # Filebeat检测到某个文件到了EOF（文件结尾）之后，每次等待多久再去检测文件是否有更新，默认为1s
      backoff: 1s

# 每次检查持续 10秒
      # Filebeat检测到某个文件到了EOF之后，等待检测文件更新的最大时间，默认是10秒
      max_backoff: 10s


      # 定义到达max_backoff的速度，默认因子是2，到达max_backoff后，变成每次等待max_backoff那么长的时间才backoff一次，
      # 直到文件有更新才会重置为backoff
      # 根据现在的默认配置是这样的，每隔1s检测一下文件变化，如果连续检测两次之后文件还没有变化，下一次检测间隔时间变为10s
      backoff_factor: 2


      # 一旦文件名称的变化，就关闭这个文件
   #该配置选项建议只在windows
      force_close_files: false


    # Additional prospector
    -
      # 使用标准输入（控制台）
      input_type: stdin


  # spooler的大小，spooler中的事件数量超过这个阈值的时候会清空发送出去（不论是否到达超时时间）
  spool_size: 2048


  # 是否采用异步发送模式（实验功能）
  publish_async: false


  # spooler的超时时间，如果到了超时时间，spooler也会清空发送出去（不论是否到达容量的阈值）
  idle_timeout: 5s


  # 记录filebeat处理日志文件的位置的文件，默认是在启动的根目录下
  registry_file: .filebeat


  # 如果要在本配置文件中引入其他位置的配置文件，可以写在这里（需要写完整路径），但是只处理prospector的部分
  config_dir:


######### Libbeat Config ############
# Base config file used by all other beats for using libbeat features


########### Output ##################
# 可以使用多个输出
output:

 ############# Elasticsearch as output###############
（这是默认的，filebeat收集后放到es里）（自行可以修改，比如我有时候想filebeat收集后，然后到redis，再到es，就可以注销这行）
  elasticsearch:　　　　　　　　　　　　
    # es hosts and port
    hosts: ["localhost:9200"]

    # 可选项配置
    #协议配置
    #protocol: "https"
    #username: "admin"
    #password: "s3cr3t"

    # 每个es节点上worker的数量
    #worker: 1

    # 自动索引名称，默认是filebeat
    # [filebeat-]YYYY.MM.DD keys.
    #index: "filebeat"

    # 匹配模板
    #template:
    # 模板名（默认是filebeat）
    #name: "filebeat"

    # 模板路径
    #path: "filebeat.template.json"

    # 覆盖已经存在的模板
    #overwrite: false

    # Optional HTTP Path
    #path: "/elasticsearch"

    # Proxy server url
    #proxy_url: http://proxy:3128

    #建立索引失败尝试的次数，默认为3
    #max_retries: 3

    # The maximum number of events to bulk in a single Elasticsearch bulk API index request. # The default is 50.
    #bulk_max_size: 50
    # Configure http request timeout before failing an request to Elasticsearch.
    #timeout: 90
    # The number of seconds to wait for new events between two bulk API index requests. If `bulk_max_size` is reached before this interval expires, addition bulk index requests are made.
    #flush_interval: 1
    # Boolean that sets if the topology is kept in Elasticsearch. The default is false. This option makes sense only for Packetbeat.
    #save_topology: false
    # The time to live in seconds for the topology information that is stored in # Elasticsearch. The default is 15 seconds.
    #topology_expire: 15
    # tls configuration. By default is off.
    #tls: off
    # List of root certificates for HTTPS server verifications
    #certificate_authorities: ["/etc/pki/root/ca.pem"]
    # Certificate for TLS client authentication
    #certificate: "/etc/pki/client/cert.pem"
    # Client Certificate Key
    #certificate_key: "/etc/pki/client/cert.key"
    # Controls whether the client verifies server certificates and host name.If insecure is set to true, all server host names and certificates will be  accepted. In this mode TLS based connections are susceptible to  man-in-the-middle attacks. Use only for testing.
    #insecure: true
    # Configure cipher suites to be used for TLS connections
    #cipher_suites: []
    # Configure curve types for ECDHE based cipher suites
    #curve_types: []
    # Configure minimum TLS version allowed for connection to logstash
    #min_version: 1.0
    # Configure maximum TLS version allowed for connection to logstash
    #max_version: 1.2


    ################ Logstash as output #################
    logstash:
    # The Logstash hosts
    #hosts: ["localhost:5044"]
    # Number of workers per Logstash host.
    #worker: 1
    # The maximum number of events to bulk into a single batch window. The # default is 2048.
    #bulk_max_size: 2048
    # Set gzip compression level.
    #compression_level: 3
    # Optional load balance the events between the Logstash hosts
    #loadbalance: true
    # Optional index name. The default index name depends on the each beat. For Packetbeat, the default is set to packetbeat, for Topbeat  top topbeat and for Filebeat to filebeat.
    #index: filebeat
    # Optional TLS. By default is off.
    #tls:
    # List of root certificates for HTTPS server verifications
    #certificate_authorities: ["/etc/pki/root/ca.pem"]
    # Certificate for TLS client authentication
    #certificate: "/etc/pki/client/cert.pem"
    # Client Certificate Key #certificate_key: "/etc/pki/client/cert.key" Controls whether the client verifies server certificates and host name.  If insecure is set to true, all server host names and certificates will be  accepted. In this mode TLS based connections are susceptible to  man-in-the-middle attacks. Use only for testing.
    #insecure: true
    # Configure cipher suites to be used for TLS connections
    #cipher_suites: []
    # Configure curve types for ECDHE based cipher suites
    #curve_types: []

    ################### File as output ################
    file:
    # Path to the directory where to save the generated files. The option is mandatory.
    #path: "/tmp/filebeat"
    # Name of the generated files. The default is `filebeat` and it generates files: `filebeat`, `filebeat.1`, `filebeat.2`, etc.
    #filename: filebeat
    # Maximum size in kilobytes of each file. When this size is reached, the files are  rotated. The default value is 10 MB.
    #rotate_every_kb: 10000
    # Maximum number of files under path. When this number of files is reached, the  oldest file is deleted and the rest are shifted from last to first. The default  is 7 files.
    #number_of_files: 7

    ######## Console output ################
     console:
    # Pretty print json event
    #pretty: false

    ########### Shipper ##############################
    shipper:
    # The name of the shipper that publishes the network data. It can be used to group  all the transactions sent by a single shipper in the web interface.  If this options is not defined, the hostname is used.
    #name:
    # The tags of the shipper are included in their own field with each  transaction published. Tags make it easy to group servers by different  logical properties.
    #tags: ["service-X", "web-tier"]
    # Uncomment the following if you want to ignore transactions created  by the server on which the shipper is installed. This option is useful  to remove duplicates if shippers are installed on multiple servers.
    #ignore_outgoing: true
    # How often (in seconds) shippers are publishing their IPs to the topology map.  The default is 10 seconds.
    #refresh_topology_freq: 10
    # Expiration time (in seconds) of the IPs published by a shipper to the topology map.  All the IPs will be deleted afterwards. Note, that the value must be higher than  refresh_topology_freq. The default is 15 seconds.
    #topology_expire: 15
    # Internal queue size for single events in processing pipeline
    #queue_size: 1000
    # Configure local GeoIP database support.  If no paths are not configured geoip is disabled.
    #geoip:
    #paths:
    # - "/usr/share/GeoIP/GeoLiteCity.dat"
    # - "/usr/local/var/GeoIP/GeoLiteCity.dat"
    ################ Logging ######################
    # 建议在开发时期开启日志并把日志调整为debug或者info级别，在生产环境下调整为error级别 # 开启日志 必须设置to_files 属性为true
    logging:
    配置beats日志。日志可以写入到syslog也可以是轮滚日志文件。默认是syslog  tail -f /var/log/messages
    #to_syslog: true

    日志发送到轮滚文件
    #to_files: false

    # To enable logging to files, to_files option has to be set to true  to_files设置为true才可以开启轮滚日志记录 files:  The directory where the log files will written to. # 指定日志路径
    #path: /var/log/mybeat

    # 指定日志名称
    #name: mybeat

    # 默认文件达到10M就会滚动生成新文件
    rotateeverybytes: 10485760

    Number of rotated log files to keep. Oldest files will be deleted first. # 保留日志文件周期。 默认 7天。值范围为2 到 1024
    #keepfiles: 7
    # Enable debug output for selected components. To enable all selectors use ["*"] # Other available selectors are beat, publish, service # Multiple selectors can be chained.
    #selectors: [ ]
    # Sets log level. The default log level is error. # Available log levels are: critical, error, warning, info, debug # 日志级别，默认是error
    #level: error



二、FileBeat高级配置以及常用参数调节
---------------------------------
    Filebeat的配置文件是/etc/filebeat/filebeat.yml，遵循YAML语法。具体可以配置如下几个项目：

    Filebeat
    Output
    Shipper
    Logging(可选)
    Run Options（可选）

    1.path:指定要监控的日志,不递归
    例如：/var/log/* /*.log
    则只会去/var/log目录的所有子目录中寻找以”.log”结尾的文件，而不会寻找/var/log目录下以”.log”结尾的文件。

    2.encoding：指定被监控的文件的编码类型，使用plain和utf-8都是可以处理中文日志的.

    3.input_type：指定文件的输入类型log(默认)或者stdin（控制台）

    4.exclude_lines：在输入中排除符合正则表达式列表的那些行

    5.include_lines：包含输入中符合正则表达式列表的那些行（默认包含所有行），include_lines执行完毕之后会执行exclude_lines

    6.exclude_files：忽略掉符合正则表达式列表的文件（默认为每一个符合paths定义的文件都创建一个harvester）

    7.fields：向输出的每一条日志添加额外的信息，比如“level:debug”，方便后续对日志进行分组统计。默认情况下，会在输出信息的fields子目录下以指定的新增fields建立子目录，例如fields.level


    8.fields_under_root：如果该选项设置为true，则新增fields成为顶级目录，而不是将其放在fields目录下。自定义的field会覆盖filebeat默认的field。例如添加如下配置：



    9.ignore_older：可以指定Filebeat忽略指定时间段以外修改的日志内容，比如2h（两个小时）或者5m(5分钟)。

    10.close_older：如果一个文件在某个时间段内没有发生过更新，则关闭监控的文件handle。默认1h,change只会在下一次scan才会被发现

    11.force_close_files：Filebeat会在没有到达close_older之前一直保持文件的handle，如果在这个时间窗内删除文件会有问题，所以可以把force_close_files设置为true，只要filebeat检测到文件名字发生变化，就会关掉这个handle。

    12.scan_frequency：Filebeat以多快的频率去prospector指定的目录下面检测文件更新（比如是否有新增文件），如果设置为0s，则Filebeat会尽可能快地感知更新（占用的CPU会变高）。默认是10s。

    13.document_type：设定Elasticsearch输出时的document的type字段，也可以用来给日志进行分类。

    14.harvester_buffer_size：每个harvester监控文件时，使用的buffer的大小。

    15.max_bytes：日志文件中增加一行算一个日志事件，max_bytes限制在一次日志事件中最多上传的字节数，多出的字节会被丢弃。

    16.multiline：适用于日志中每一条日志占据多行的情况，比如各种语言的报错信息调用栈。这个配置的下面包含如下配置：
    pattern：多行日志开始的那一行匹配的pattern
       negate：是否需要对pattern条件转置使用，不翻转设为true，反转设置为false
    match：匹配pattern后，与前面还是后面的内容合并为一条日志
    max_lines：合并的最多行数（包含匹配pattern的那一行）
    到了之后，即使没有匹配一个新的pattern（发生一个新的事件），也把已经匹配的日志事件发送出去

    17.tail_files：如果设置为true，Filebeat从文件尾开始监控文件新增内容，把新增的每一行文件作为一个事件依次发送，而不是从文件开始处重新发送所有内容。

    18.backoff：Filebeat检测到某个文件到了EOF之后，每次等待多久再去检测文件是否有更新，默认为1s。

    19.max_backoff：Filebeat检测到某个文件到了EOF之后，等待检测文件更新的最大时间，默认是10秒。

    20.backoff_factor：定义到达max_backoff的速度，默认因子是2，到达max_backoff后，变成每次等待max_backoff那么长的时间才backoff一次，直到文件有更新才会重置为backoff。比如：如果设置成1，意味着去使能了退避算法，每隔backoff那么长的时间退避一次。

    21.spool_size:spooler的大小，spooler中的事件数量超过这个阈值的时候会清空发送出去（不论是否到达超时时间）。

    22.idle_timeout:spooler的超时时间，如果到了超时时间，spooler也会清空发送出去（不论是否到达容量的阈值）。

    23.registry_file:记录filebeat处理日志文件的位置的文件

    24.config_dir:如果要在本配置文件中引入其他位置的配置文件，可以写在这里（需要写完整路径），但是只处理prospector的部分。

    25.publish_async：是否采用异步发送模式（实验功能）