1、控制台采集数据,控制台输出数据
bin/logstash -e 'input { stdin { } } output { stdout {} }'
2、文件执行,控制台输出
bin/logstash -f test.conf
bin/logstash -f test.conf --path.data=./
3、date+ruby,转换时间为时间戳形式,规整到分钟
filter{
json{
source => "message"
}
#格式化时间
date{
match => ["ts", "UNIX_MS"]
target => "ts"
}
ruby{
code => "
event.set('time', (event.get('ts').to_i / 60.0 ).round * 60)
"
}
}
#日期格式转时间戳
4、kafka输入,文件输入、输出
文件输入输出
input {
file {
path => "/Users/skyer/Desktop/json.txt"
start_position => beginning #从文件开头按行读取,end为读取文件新增内容
}
}
output {
file {
path => ...
codec => line { format => "custom format: %{message}"}
}
}
kafka
input {
kafka {
bootstrap_servers => "11.168.1.35:9092,11.168.1.36:9092,11.168.1.37:9092"
topics => ["Per_common"]
group_id => "test"
auto_offset_reset => "latest" #earliest指定从最开始消费
# max_partition_fetch_bytes => "3145728"
# consumer_threads => 8
# max_poll_records => "1000"
}
}
output {
kafka {
bootstrap_servers => "10.2.13.31:9092"
topic_id => "aiops-anomaly-kpi-new"
codec => plain {
format => "%{data}"
}
}
}
output {
kafka{
bootstrap_servers => "192.168.112.12:9092"
codec => json
topic_id => "test"
compression_type => "snappy" #compression_type 这个是压缩方式,可以设定为”none”, “gzip”, “snappy”, “lz4”
}
}
5、输出到influxdb
方法1:
output {
influxdb {
host => "11.53.48.119"
port => 8086
db => "aiops_node"
measurement => "node.%{comp_code}.metric.%{ap}"
flush_size => 1000
idle_flush_time => 5
allow_time_override => true
time_precision => "s"
send_as_tags => [] #!!!!当指定tags字段时,可以在influxdb中存储同一时间点的数据
exclude_fields => ["@timestamp", "@version", "sequence", "message", "type", "host", "ap", "comp_code"] #这些内容不输出到influxdb
use_event_fields_for_data_points => true # true 全部写入
}
}
方法2:
output {
influxdb {
host => "11.53.48.119"
port => 8086
db => "aiops_node"
measurement => "node.%{appname}.trancode.%{tc}"
flush_size => 1000
idle_flush_time => 5
allow_time_override => true
time_precision => "ns"
data_points => {
"time" => "%{time}"
"arsp" => "%{arsp}"
"amt" => "%{amt}"
"ssrt" => "%{ssrt}"
"bsrt" => "%{bsrt}"
}
}
}
6、ES(elasticsearch)输入、输出
#es输入,定时抽取es数据
input {
elasticsearch {
hosts => "hostname"
index => "ping-min-2019-11-08"
size => 5000 #根据数据量进行修改
user => "username"
password => "password"
schedule => "* * * * *" #一分钟一次
query => '{"query":{"range":{"_start_at_s":{"gt":"now-2m","lte":"now-1m","time_zone":"+08:00"}}}}' #dsl range查询,获取过去(2-1)分钟的数据,gt-大于,gte-大于等于,由于es时区存在偏差,需要转换成UTC
}
}
#输出
output{
elasticsearch {
hosts => "????"
index => "%{key}-%{+YYYY.MM.dd}" #匹配message中的格式
manage_template => true
template_overwrite => true
user => "elastic"
password => "Bizseer@2020"
}
}
7、插件安装
例:bin/logstash-plugin install logstash-output-influxdb 安装influxdb输出插件
8、logstash pipeline
pipeline :同一进程中运行多个管道
在config/pipeline.yml中添加如下定义
例: !!! 启动logstaash时不加参数默认会从配置文件中读取,当使用参数 -e或-f时,会忽略pipeline.yml文件
使用./bin/logstash -r 当配置文件改动时,自动重新加载
pipeline.id: test1
path.config: "/etc/path/to/test1.config"
pipeline.workers: 3
pipeline.id: test2
path.config: "/etc/path/to/test2.config"
pipeline.workers: 3
9、pipeline-to-pipeline 管道机制
https://elasticsearch.cn/article/617657949830)
10、输出到kafka
kafka{
bootstrap_servers => "192.168.112.12:9092"
codec => json
topic_id => "test"
compression_type => "snappy"
}
11、替换字符串 gsub
Logstash之开启\r\t\n等特殊字符转义
#默认情况下,如果处理的字符中含有\t\n等字符,是不生效的,我们需要开启logstash的字符转义功能,如下:修改logstash的config/logstash.yml,找到config.support_escapes,去掉之前的注释,将值改为true,默认是false
config.support_escapes: true
单引号替换为双引号
mutate {
gsub =>[
"message", "'", '"'
]
}
mutate {
gsub =>[
"message", '\//', ''
]
}
#替换反斜杠
mutate {
gsub =>[
"message", '[\\]', ''
]
}
12、字符串提取 grok+ruby
#原始数据如下
logType=app.bpm, file=/data/logs/instructionsvc/info/info.log, logLevel=INFO, logCollectProtocol=file, ip=10.132.81.69, appId=bpm.basesvc.instructionsvc, id=bpm.basesvc.instructionsvc.0, logCollectTimestamp=2020-06-08 18:35:04.119, isMultiLast=true, version=v1.0.0
grok {
match => {
"message" => "%{DATA:head}appId=%{DATA:appId},%{GREEDYDATA:other}"
}
}
grok {
match => {
"message" => "%{DATA:head}ip=%{DATA:ip},%{GREEDYDATA:other}"
}
}
例:比如 path => /wls/applogs/rtlog/icore-pts2SF2433/icore-pts2SF2433.out,想提取icore-pts 与 icore-pts2SF2433/icore-pts2SF2433.out
第一种方法:用grok 处理
filter {
grok {
match => ["path","/wls/applogs/rtlog/(?<servername>[a-z][a-z-]*)(?<stag>[0-9]*)((?:SF)|(?:WII)|(?:DMZ)|(?:DRServer))(?:%{NUMBER})/%{USERNAME:apppath}"]
add_field => {
"app_path" => "%{apppath}"
"app_name" => "%{servername}"
}
}
}
第二种方法:用Ruby 处理
filter {
ruby {
code=>"
event['app_path']=event['path'].dup.sub!(/\/wls\/applogs\/rtlog/,'')
tmp=event['path'].dup.sub!(/\/wls\/applogs\/rtlog\//,'') #ruby 不允许直接改对象,所以复制 or 克隆一下后再改(加dup)
event['app_name']=tmp.sub!(/\d.*/,'')
"
}
}
13、字符串分割 split
filter {
mutate {
split => ["timestamp","-"]
add_field => {
"index_date" => "%{[timestamp][0]}%{[timestamp][1]}%{[timestamp][2]}"
"index" => "%{data_id}-%{index_date}"
}
}
}
14、解析json array
#以json格式输出到anomaly!!!!!
ruby {
code => '
info = event.get("records")
event.set("time",event.get("time").to_i/60*60)
event.set("kpis",Array.new)
for i in info do
h = Hash.new
h["key"] = "input_record"
h["name"] = i["name"].gsub(",",".")
h["value"] = i["value"]
event.set("kpis",event.get("kpis").push(h))
h = Hash.new
h["key"] = "output_record"
h["name"] = i["name"].gsub(",",".")
h["value"] = i["value"]
event.set("kpis",event.get("kpis").push(h))
end
'
}
split { #split会将kpis列表中的内容一个一个的吐出去1!!!!
field => "kpis"
# terminator => "#" 可以指定分隔符
}
mutate {
add_field => {
"value" => "{[kpis][value]}"
"kpi_key" => "kpi.%{IP}-%{[kpis][key]}"
}
remove_field => ["message","@version","@timestamp","serverIp"]
}
mutate {
convert => {
"value" => "float"
}
}
#以三元组形式输出到anomaly
ruby {
code => '
info = event.get("cpu[cpus]")
event.set("data_cpu", "")
info.each{ |i, item| event.set("data_cpu", event.get("data_cpu") + format("kpi.%s.cpu_%s.cpu,%d,%d;",event.get("hostName"),i["name"],event.get("tm"),i["val"]))}
event.set("data_cpu", event.get("data_cpu").chop)
'
}
#输出到influxdb 方法1
ruby {
code => '
info = event.get("fans")
for i in info do
i["name"] = i["name"].delete "#"
i["name"] = i["name"].delete " "
i["name"] = i["name"].gsub(",",".")
event.set("fans"+i["name"]+"_state1",i["state1"])
event.set("fans"+i["name"]+"_state2",i["state2"])
event.set("fans"+i["name"]+"_state3",i["state3"])
end
'
}
#方法2
ruby {
code => '
info = event.get("fans")
for i in info do
i["name"] = i["name"].delete "#"
i["name"] = i["name"].delete " "
i["name"] = i["name"].gsub(",",".")
field = "fans" + i["name"]
for key in i.keys() do
next if key == "name"
field_name = field + "_" + key
event.set(field_name,i[key])
end
'
}
15、解析json 嵌套
filter {
json {
source => "message"
}
mutate {
add_field => {
"@icmp" => "%{icmp}"
"@mem" => "%{mem}"
"@traffic" => "%{traffic}"
}
}
json{
source => "@traffic"
remove_field => ["@traffic","traffic"]
}
json{
source => "@mem"
remove_field => ["mem","@mem"]
}
json{
source => "@icmp"
remove_field => ["@icmp","icmp"]
}
}
16、中文转英文+translate
#会自动匹配message,如果有需要转换的,则会将修改后的内容输出到destination
filter {
translate {
field => "message"
destination => "destination"
regex => true
exact => false
dictionary => {
"你好" => "hello"
"银联" => "yinlian"
}
}
}
#如果想要覆盖原来的message,使用下面方法
filter {
translate {
field => "message"
destination => "message"
override => true
regex => true
exact => false
dictionary => {
"你好" => "hello"
"银联" => "yinlian"
}
}
}
17、白名单过滤
#输出appname、raw_message字段
ruby {
code => "
event.to_hash.keys.each { |k|
unless ['appname', 'raw_message'].include?(k.to_s)
event.remove(k)
end
} "
}
18、if判断
if ![tm] or ![hostName] or ![traffic] or ![cpu] or ![mem] or ![interfaces]{
drop{}
}
# if "ZH-Area" not in [hostName] {
# drop{}
# }
if !([hostName] =~ "^ZH-Area.") { #正则判断hostname是否以ZH-Area开头
drop{}
}
if [warning] == '' or "13" not in [warning]{
mutate {
add_field => {
"syslog_13" => 0
}
convert => {
"syslog_13" => "integer"
}
}
}else{
mutate {
add_field => {
"syslog_13" => "%{warning[13]}"
}
convert => {
"syslog_13" => "integer"
}
}
}
#判断数组是否为空
if [streams] in [] {
drop{}
}
19、判断字段类型&字段类型转换
#判断字段类型
data.is_a?(Integer)#判断是否为整数Integer类型
data.is_a?(Float)#浮点数类型
data.is_a?(Numeric)#是否数字的判断
data.class #.class方法返回当前数据类型
#类型转换
data.to_i #转换成int
data.to_f #转换成float
data.to_s #转换成string
20、convert类型转换
mutate {
add_field => {
"syslog_13" => "%{warning[13]}"
}
convert => {
"syslog_13" => "integer"
"test" => "float"
}
}
21、大小写转换uppercase/lowercase
filter {
mutate {
uppercase => [ "fieldname" ]
}
}
22、限制字段长度
truncate {
fields => [ "URL" ]
length_bytes => 1000
}
23、多实例消费同一个topic
1、设置相同的topic
2、设置相同的group_id
3、设置不同的client_id
4、consumer_threads多实例相加等于topic的分区数
24、元素组合+拆分
mutate {
add_field =>{"raw" =>’{”kpi_key“:"kpi.%{app}.%{tradecode}.trade",“value”:%{trade},“timestamp”:%{time}}#{“kpi_key”:“kpi.%{app}.%{tradecode}.succyw,“value”:%{succyw},“timestamp”:%{time}}#{“kpi_key”:“kpi.%{app}.%{tradecode}.succxt,“value”:%{succxt},“timestamp”:%{time}}#{“kpi_key”:“kpi.%{app}.%{tradecode}.restime,“value”:%{restime},“timestamp”:%{time}}’}
remove_field=>["message","app","tradecode","time","trade","succyw","succxt","restime","retcode"]
}
split {
field => "raw"
terminator => "#"
remove_field =>["@version","@timestamp"]
}
25、读取oracle数据库
input{
jdbc{
statement_filepath => '/home/bizseer/aiops/test/disk.sql'
use_column_value => true
record_last_run => true
tracking_column => "ts"
tracking_column_type => "numeric"
jdbc_connection_string => 'jdbc:oracle:thin:@//98.254.1.196:1521/warehous'
jdbc_driver_class => 'java::oracle.jdbc.driver.OracleDriver'
jdbc_driver_library => '/home/bizseer/aiops/test/ojdbc6.jar'
jdbc_password => 'itmuser'
jdbc_user => 'itmuser'
jdbc_validate_connection => true
jdbc_paging_enabled => false
jdbc_page_size => 50000
last_run_metadata_path => '/home/bizseer/aiops/test/disk.meta'
schedule => "* * * * *"
}
}
#disk.sql
select itmuser."KLZ_Disk_H".*,to_number(writetime) as ts from itmuser."KLZ_Disk_H" where writetime > to_char(:sql_last_value-500000)
#disk.meta
--- 1201011220000000
26、常见问题
arm环境下报错在Arm的服务器上启动 logstash 报Sending Logstash logs to /home/logstash-7.4.1/logs which is now configured via log4j2.properties
[2019-12-02T14:13:48,010][ERROR][org.logstash.Logstash ] java.lang.IllegalStateException: Logstash stopped processing because of an error: (LoadError) load error: ffi/ffi -- java.lang.NullPointerException: null
问题原因:lib/ruby/stdlib/ffi/platform/aarch64-linux/需要缺失platform.conf文件,导致代码异常报错
解决办法:拷贝lib/ruby/stdlib/ffi/platform/aarch64-linux/types.conf重新命令为platform.conf,将jruby-complete-9.2.8.0.jar重新打包:
https://blog.csdn.net/zhangMY12138/article/details/118961562