fliter中JDBC操作有两个插件:
jdbc_streaming:这个是流式执行的,每来一条数据就会执行一次JDBC操作,具体介绍见官网https://www.elastic.co/guide/en/logstash/current/plugins-filters-jdbc_streaming.html,具体使用如下:
input{
stdin{}
}
filter{
grok{
match => {
"message" => '^{"DEVICE_CODE":(?<code>(.*))}'
}
}
mutate{
remove_field => ["message"]
}
jdbc_streaming{
jdbc_driver_library => "/root/logstashfile/test/mysql-connector-java-5.1.37.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://10.10.0.101:3306/zeus?characterEncoding=UTF-8"
jdbc_user => "root"
jdbc_password => "root"
parameters => { "d_code" =>"code" }
statement => "select DISTINCT(CJDID) from HC_ZS_STATIC_CJD_ZP where DEVICE_CODE= :d_code"
target => "cjdid"
}
mutate{
replace => { "cjdid" => " %{[cjdid][0][CIDID]}"}
}
}
output{
stdout{}
}
注意:parameters 中的code是filed的名称,列子中对应grok中匹配出来的字段,还有输出结果cjdid是一个JSON数组的结构。
通过jdbc_streaming操作,结果中就多了个cjdid字段了,先当于执行了sql操作了。但是这种操作是每来一条数据执行一次,新建一个JDBC链接,资源极大浪费。
jdbc_static:
jdbc_streaming是直接查询其他数据库,jdbc_static提供了另一种思路,logstash自带derby数据库,jdbc_static将远程数据库的数据加载到derby数据库中,在每次消息来到时从自带的数据库中查询数据。相当于中间视图的功能,但要注意这里并不是将数据放到了缓存中,仍然时存储在硬盘中的,只不过相比于jdbc_streaming不需要频繁的去获取数据库链接了,提升了效率。这个组件的具体介绍见官网https://www.elastic.co/guide/en/logstash/current/plugins-filters-jdbc_static.html#plugins-filters-jdbc_static-loader_schedule,下边是案例:
input{
stdin{}
}
filter{
grok{
match => {
"message" => '^{"DEVICE_CODE":(?<code>(.*))}'
}
}
mutate{
remove_field => ["message"]
}
jdbc_static {
loaders => [
{
id => "ZPCJDID"
query => "select DEVICE_CODE,CJDID from HC_ZS_STATIC_CJD_ZP WHERE CJDID!=12"
local_table => "ZPCJDID"
}
]
local_db_objects => [
{
name => "ZPCJDID"
index_columns => ["DEVICE_CODE"]
columns => [
["DEVICE_CODE", "varchar(30)"],
["CJDID", "varchar(20)"]
]
}
]
local_lookups => [
{
query => "select DISTINCT(CJDID) from ZPCJDID WHERE DEVICE_CODE = :device_code"
parameters => {device_code => "code"}
target => "cjdid1"
}
]
jdbc_user => "root"
jdbc_password => "root"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_driver_library => "/root/logstashfile/test/mysql-connector-java-5.1.37.jar"
jdbc_connection_string => "jdbc:mysql://10.10.0.101:3306/zeus?characterEncoding=UTF-8"
add_field => { cjdid => "%{[cjdid1][0][cjdid]}"}
remove_field => ["cjdid1"]
}
}
output{
stdout{}
}
下面来介绍filter中的另一个插件:ruby,将其和jdbc_static结合使用
业务要求:
输入:{"DEVICE_CODE":HC131-GC151226}类似的数据
过滤:mysql中查询相关的CJDID,在判断CJDID是否满足相应要求
输出:满足条件的数据
数据库表数据:
ruby中采用文件方式执行,ruby具体说明见官网:https://www.elastic.co/guide/en/logstash/current/plugins-filters-ruby.html#plugins-filters-ruby-init
代码如下:
logstash配置文件:
input{
stdin{}
}
filter{
grok{
match => {
"message" => '^{"DEVICE_CODE":(?<code>(.*))}'
}
}
mutate{
remove_field => ["message"]
}
jdbc_static {
loaders => [
{
id => "ZPCJDID"
query => "select DEVICE_CODE,CJDID from HC_ZS_STATIC_CJD_ZP WHERE CJDID!=12"
local_table => "ZPCJDID"
}
]
local_db_objects => [
{
name => "ZPCJDID"
index_columns => ["DEVICE_CODE"]
columns => [
["DEVICE_CODE", "varchar(30)"],
["CJDID", "varchar(20)"]
]
}
]
local_lookups => [
{
query => "select DISTINCT(CJDID) from ZPCJDID WHERE DEVICE_CODE = :device_code"
parameters => {device_code => "code"}
target => "cjdid1"
}
]
jdbc_user => "root"
jdbc_password => "root"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_driver_library => "/root/logstashfile/test/mysql-connector-java-5.1.37.jar"
jdbc_connection_string => "jdbc:mysql://10.10.0.101:3306/zeus?characterEncoding=UTF-8"
add_field => { cjdid => "%{[cjdid1][0][cjdid]}"}
remove_field => ["cjdid1"]
}
ruby{
path => "/root/logstashfile/test/rubycode.rb"
script_params => {"ids"=>"NHKS10099|DZQ10028|DZQ10041"}
}
}
output{
stdout{}
}
ruby代码文件rubycode.rb:
def register(params)
@arg = params["ids"]
end
def filter(event)
arg=@arg.split('|')
if arg.include?(event.get('cjdid'))
then
return [event]
else
return []
end
end
现在输入如下类型数据:
{"ANTENNA":"2","ATTACHMENT":"{}","CARD_ID":"","CARRY_CAPACITY":"","COLLECT_TIME":1536223405747,"CONFIDENCE":"1.0","CONTENT1":"CCC123","CONTENT2":"50000013839292","DEVICE_DESCRIPTION":"","DEVICE_ID":"1137","DEVICE_TYPE":"RFID","DIRECTION":"东向","DISCOVER_TYPE":"FIRST_FOUND","DISPLACEMENT":"","EID":"502156335","EPC":"002041730000000000000000","EXAMINE_EXPIRE_DATE":"","FACTORY_DATE":"","FIRST_DISCOVER_TIME":"2018-09-06 16:43:25.747","FORCE_SCRAP_DATE":"","LANE_NUMBER":2,"LAST_DISCOVER_TIME":"2018-09-06 16:43:25.747","LOCATION":"","PLATE_TYPE":"","POWER_RATING":"","READER":"星光大道_锦橙路路口_星光大道方向","READERIP":"11.11.11.57","RECEIVED_TIME":"2018-09-06 16:43:36.325","RECORD_ID":1832890855,"RESULT":"成功","SOURCE":"cq_nms","TIME":"2018-09-06 16:43:25.747","USER126":"cce7c0000000d000d9012ff0","USER92":"0cc52740","VEHICLE_COLOR":"","VEHICLE_TYPE":"2","VEHICLE_USER_TYPE":""}
最后要求存储在hdfs中(只存值),则代码如下
conf文件mysqltoout
input{
kafka {
bootstrap_servers => "10.10.0.110:9092,10.10.0.111:9092,10.10.0.112:9092" #kafka的zk集群地址
group_id => "hdfs" #消费者组,不要和ELK上的消费者一样
client_id => "RFID_ORIGIN"
topics => ["RFID"] #topic
consumer_threads => 1
type => "RFID_ORIGIN"
}
}
filter{
json {
source => "message"
}
jdbc_static {
loaders => [
{
id => "RFIDCJDID"
query => "select EQUIPMENT_RFID,CJDID from HC_ZS_STATIC_CJD_RFID"
local_table => "RFIDCJDID"
},
{
id => "ZPDAZU"
query => "select CJDID,CJDNAME2 from HC_ZS_STATIC_COLLECT_POINT where CJDNAME2='大足区'"
local_table => "ZPDAZU"
}
]
local_db_objects => [
{
name => "RFIDCJDID"
index_columns => ["EQUIPMENT_RFID"]
columns => [
["EQUIPMENT_RFID", "varchar(30)"],
["CJDID", "varchar(20)"]
]
},
{
name => "ZPDAZU"
index_columns => ["CJDID"]
columns => [
["CJDID", "varchar(20)"],
["CJDNAME2", "varchar(30)"]
]
}
]
local_lookups => [
{
query => "select count(1) num from (select a.CJDID,EQUIPMENT_RFID from RFIDCJDID a inner join ZPDAZU b on a.cjdid=b.cjdid ) c where c.EQUIPMENT_RFID= :device_code"
parameters => {device_code => "READERIP"}
target => "num1"
}
]
jdbc_user => "root"
jdbc_password => "root"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_driver_library => "/home/mysql-connector-java-5.1.37.jar"
jdbc_connection_string => "jdbc:mysql://10.10.0.101:3306/zeus?characterEncoding=UTF-8"
staging_directory => "/home/jdbc_static"
add_field => { num => "%{[num1][0][num]}"}
remove_field => ["num1","code"]
}
ruby{
path => "/home/rubycode.rb"
}
mutate{
add_field => { myfields => "ANTENNA|ATTACHMENT|CARD_ID|CARRY_CAPACITY|COLLECT_TIME|CONFIDENCE|CONTENT1|CONTENT2|DEVICE_DESCRIPTION
|DEVICE_ID|DEVICE_TYPE|DIRECTION|DISCOVER_TYPE|DISPLACEMENT|EID|EPC|EXAMINE_EXPIRE_DATE|FACTORY_DATE|FIRST_DISCOVER_TIME|FORCE_SCRAP_DATE
|LANE_NUMBER|LAST_DISCOVER_TIME|LOCATION|PLATE_TYPE|POWER_RATING|READER|READERIP|RECEIVED_TIME|RECORD_ID|RESULT|SOURCE|TIME
|USER126|USER92|VEHICLE_COLOR|VEHICLE_TYPE|VEHICLE_USER_TYPE"}
}
ruby{
path => "/home/strOpt.rb"
# script_params => { "percentage" => "ANTENNA|ATTACHMENT|CARD_ID|CARRY_CAPACITY|COLLECT_TIME|CONFIDENCE|CONTENT1|CONTENT2" }
}
mutate{
remove_field => ["myfields","DIRECTION","RESULT","COLLECT_TIME","EPC","LAST_DISCOVER_TIME","DEVICE_ID","FORCE_SCRAP_DATE"
,"EXAMINE_EXPIRE_DATE","FACTORY_DATE","PLATE_TYPE","CONTENT1","SOURCE","READER","DEVICE_TYPE","ATTACHMENT","CARD_ID"
,"EID","CARRY_CAPACITY","VEHICLE_USER_TYPE","LOCATION","ANTENNA","DISCOVER_TYPE","TIME","DEVICE_DESCRIPTION","num"
,"CONFIDENCE","POWER_RATING","RECEIVED_TIME","VEHICLE_TYPE","FIRST_DISCOVER_TIME","READERIP","USER126","VEHICLE_COLOR"
,"USER92","RECORD_ID","CONTENT2","DISPLACEMENT","LANE_NUMBER"]
}
}
output{
if [type] == "RFID_ORIGIN" {
webhdfs{
host => "10.10.0.110" #hdfs的namenode地址
port => 50070 #webhdfs端口
user => "root" #hdfs运行的用户啊,以这个用户的权限去写hdfs。
path => "/kafkaToHdfs/RFID/RFID-%{+YYYY}-%{+MM}-%{+dd}.log"
}
}
stdout{}
}
strOpt.rb文件如下:
def filter(event)
array=event.get('myfields').split('|')
array.each do |value|
if value=="ANTENNA"
if event.get(value)!=nil
event.set('message',event.get(value).to_s+',')
else
event.set('message',',')
end
elsif value=="ATTACHMENT"
if event.get(value)!=nil
if event.get(value)=="{}"
event.set('message',event.get('message')+',')
else
event.set('message',event.get('message')+event.get(value).to_s.index(1,event.get(value).length-1)+',')
end
else
event.set('message',event.get('message')+',')
end
else
if event.get(value)!=nil
if event.get(value) == ""
event.set('message',event.get('message')+',')
else
event.set('message',event.get('message')+event.get(value).to_s+',')
end
else
event.set('message',event.get('message')+',')
end
end
end
return [event]
end
rubycode.rb文件如下:
def filter(event)
num=event.get('num').to_i
if num>= 1 then
return [event]
else
return []
end
end