一: 数据建模
PUT airquality?include_type_name=true { "mappings": { "doc": { "dynamic": false, "properties": { "@timestamp": { "type": "date" }, "city": { "type": "keyword", "ignore_above": 256 }, "parameter": { "type": "keyword", "ignore_above": 256 }, "status": { "type": "keyword", "ignore_above": 256 }, "value": { "type": "long" } } } }, "settings": { "index": { "number_of_shards": "1", "number_of_replicas": "0" } } }
|
二: Filebeat 配置
# cat filebeat.yml ###################### Filebeat Configuration Example #########################
# This file is an example configuration file highlighting only the most common # options. The filebeat.reference.yml file from the same directory contains all the # supported options with more comments. You can use it as a reference. # # You can find the full configuration reference here: # https://www.elastic.co/guide/en/beats/filebeat/index.html
# For more available modules and options, please see the filebeat.reference.yml sample # configuration file.
#=========================== Filebeat inputs =============================
filebeat.inputs:
# Each - is an input. Most options can be set at the input level, so # you can use different inputs for various configurations. # Below are the input specific configurations.
- type: log
# Change to true to enable this input configuration. enabled: true
# Paths that should be crawled and fetched. Glob based paths. paths: - /data2/log/*.csv #- c:\programdata\elasticsearch\logs\*
# Exclude lines. A list of regular expressions to match. It drops the lines that are # matching any regular expression from the list. #exclude_lines: ['^DBG'] exclude_lines: ["^A ","^The","^Site","^,"]
# Include lines. A list of regular expressions to match. It exports the lines that are # matching any regular expression from the list. #include_lines: ['^ERR', '^WARN']
# Exclude files. A list of regular expressions to match. Filebeat drops the files that # are matching any regular expression from the list. By default, no files are dropped. #exclude_files: ['.gz$']
# Optional additional fields. These fields can be freely picked # to add additional information to the crawled log files for filtering #fields: # level: debug # review: 1
### Multiline options
# Multiline can be used for log messages spanning multiple lines. This is common # for Java Stack Traces or C-Line Continuation
# The regexp Pattern that has to be matched. The example pattern matches all lines starting with [ #multiline.pattern: ^\[
# Defines if the pattern set under pattern should be negated or not. Default is false. #multiline.negate: false
# Match can be set to "after" or "before". It is used to define if lines should be append to a pattern # that was (not) matched before or after or as long as a pattern is not matched based on negate. # Note: After is the equivalent to previous and before is the equivalent to to next in Logstash #multiline.match: after
#============================= Filebeat modules ===============================
filebeat.config.modules: # Glob pattern for configuration loading path: ${path.config}/modules.d/*.yml
# Set to true to enable config reloading reload.enabled: false
# Period on which files under path should be checked for changes #reload.period: 10s
setup.ilm.enabled: false
#==================== Elasticsearch template setting ==========================
setup.template.settings: index.number_of_shards: 1 #index.codec: best_compression #_source.enabled: false
setup.template.name: "airquality" setup.template.pattern: "airquality-*" setup.template.overwrite: true setup.template.enabled: false
#================================ General =====================================
# The name of the shipper that publishes the network data. It can be used to group # all the transactions sent by a single shipper in the web interface. #name:
# The tags of the shipper are included in their own field with each # transaction published. #tags: ["service-X", "web-tier"]
# Optional fields that you can specify to add additional information to the # output. #fields: # env: staging
#============================== Dashboards ===================================== # These settings control loading the sample dashboards to the Kibana index. Loading # the dashboards is disabled by default and can be enabled either by setting the # options here or by using the `setup` command. #setup.dashboards.enabled: false
# The URL from where to download the dashboards archive. By default this URL # has a value which is computed based on the Beat name and version. For released # versions, this URL points to the dashboard archive on the artifacts.elastic.co # website. #setup.dashboards.url:
#============================== Kibana =====================================
# Starting with Beats version 6.0.0, the dashboards are loaded via the Kibana API. # This requires a Kibana endpoint configuration. setup.kibana:
# Kibana Host # Scheme and port can be left out and will be set to the default (http and 5601) # In case you specify and additional path, the scheme is required: http://localhost:5601/path # IPv6 addresses should always be defined as: https://[2001:db8::1]:5601 #host: "localhost:5601"
# Kibana Space ID # ID of the Kibana Space into which the dashboards should be loaded. By default, # the Default Space will be used. #space.id:
#============================= Elastic Cloud ==================================
# These settings simplify using Filebeat with the Elastic Cloud (https://cloud.elastic.co/).
# The cloud.id setting overwrites the `output.elasticsearch.hosts` and # `setup.kibana.host` options. # You can find the `cloud.id` in the Elastic Cloud web UI. #cloud.id:
# The cloud.auth setting overwrites the `output.elasticsearch.username` and # `output.elasticsearch.password` settings. The format is `<user>:<pass>`. #cloud.auth:
#================================ Outputs =====================================
# Configure what output to use when sending the data collected by the beat.
#-------------------------- Elasticsearch output ------------------------------ output.elasticsearch: # Array of hosts to connect to. #hosts: ["localhost:9200"] hosts: ["10.0.0.100:11991","10.0.0.101:11635","10.0.0.102:15469"] pipeline: "airquality" index: "airquality-%{+yyy.MM.dd}"
# Optional protocol and basic auth credentials. #protocol: "https" #username: "elastic" #password: "changeme" #setup.template.name: "air_quality" #setup.template.pattern: "air_quality" #setup.template.overwrite: false #setup.template.enabled: true
#----------------------------- Logstash output -------------------------------- #output.logstash: # The Logstash hosts #hosts: ["localhost:5044"]
# Optional SSL. By default is off. # List of root certificates for HTTPS server verifications #ssl.certificate_authorities: ["/etc/pki/root/ca.pem"]
# Certificate for SSL client authentication #ssl.certificate: "/etc/pki/client/cert.pem"
# Client Certificate Key #ssl.key: "/etc/pki/client/cert.key"
#================================ Processors =====================================
# Configure processors to enhance or manipulate events generated by the beat.
processors: - add_host_metadata: ~ - add_cloud_metadata: ~ - add_docker_metadata: ~ - add_kubernetes_metadata: ~
#================================ Logging =====================================
# Sets log level. The default log level is info. # Available log levels are: error, warning, info, debug #logging.level: debug
# At debug level, you can selectively enable logging only for some components. # To enable all selectors use ["*"]. Examples of other selectors are "beat", # "publish", "service". #logging.selectors: ["*"]
#============================== X-Pack Monitoring =============================== # filebeat can export internal metrics to a central Elasticsearch monitoring # cluster. This requires xpack monitoring to be enabled in Elasticsearch. The # reporting is disabled by default.
# Set to true to enable the monitoring reporter. #monitoring.enabled: false
# Sets the UUID of the Elasticsearch cluster under which monitoring data for this # Filebeat instance will appear in the Stack Monitoring UI. If output.elasticsearch # is enabled, the UUID is derived from the Elasticsearch cluster referenced by output.elasticsearch. #monitoring.cluster_uuid:
# Uncomment to send the metrics to Elasticsearch. Most settings from the # Elasticsearch output are accepted here as well. # Note that the settings should point to your Elasticsearch *monitoring* cluster. # Any setting that is not set is automatically inherited from the Elasticsearch # # output configuration, so if you have the Elasticsearch output configured such # that it is pointing to your Elasticsearch monitoring cluster, you can simply # uncomment the following line. #monitoring.elasticsearch:
#================================= Migration ==================================
# This allows to enable 6.7 migration aliases #migration.6_to_7.enabled: true [root@node-01 filebeat-7.5.1-linux-x86_64]#
三: 数据入库
# cat /data2/log/*.csv|/data2/filebeat-7.5.1-linux-x86_64/filebeat -e -c filebeat.yml -d "publish"
|