LogStash 抽取数据到ElasticSearch
input
input {
stdin {
}
jdbc {
# mysql相关jdbc配置
jdbc_connection_string => "jdbc:mysql://ip:3306/test"
jdbc_user => "root"
jdbc_password => "xxx"
# jdbc连接mysql驱动的文件目录,可去官网下载:https://dev.mysql.com/downloads/connector/j/
jdbc_driver_library => "/home/mt/Desktop/mysql-connector-java-5.1.47/mysql-connector-java-5.1.47-bin.jar"
# the name of the driver class for mysql
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
# mysql文件, 也可以直接写SQL语句在此处,如下:
# statement => "SELECT * from Table_test;"
# statement_filepath => "C:/setup/logstash-7.0.1/config/myconfig/jdbc.sql"
statement => "SELECT * FROM table WHERE id >= :sql_last_value"
# 这里类似crontab,可以定制定时操作,比如每10分钟执行一次同步(分 时 天 月 年)
schedule => "*/1 * * * *"
type => "jdbc"
# 是否记录上次执行结果, 如果为真,将会把上次执行到的 tracking_column 字段的值记录下来,保存到 last_run_metadata_path 指定的文件中
record_last_run => "true"
# 是否需要记录某个column 的值,如果record_last_run为真,可以自定义我们需要 track 的 column 名称,此时该参数就要为 true. 否则默认 track 的是 timestamp 的值.
use_column_value => "true"
# 如果 use_column_value 为真,需配置此参数. track 的数据库 column 名,该 column 必须是递增的. 一般是mysql主键
# 也可以用 select 语句中的别名字段
tracking_column => "id"
last_run_metadata_path => "/home/mt/Desktop/logstash-7.0.1/myconf/last_id"
# 是否清除 last_run_metadata_path 的记录,如果为真那么每次都相当于从头开始查询所有的数据库记录
clean_run => "false"
# 是否将 字段(column) 名称转小写
lowercase_column_names => "false"
#转换字符集
columns_charset => {
"message"=> "UTF-8"
"name"=> "UTF-8"
}
}
};
#SQLServer
jdbc {
type => "xxx"
jdbc_driver_library => "/path/sqljdbc42-6.0.8112.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://ip:1433;DatabaseName=dbname"
jdbc_user => "xxx"
jdbc_password => "xxx"
schedule => "* * * * *"
jdbc_default_timezone => "Asia/Shanghai"
jdbc_page_size => "500"
record_last_run => "true"
statement => "SELECT *,convert (varchar (30),createtime,25) as inserttimestr,CONVERT(BIGINT,lastupdatetime) as lastupdatetimeint FROM xxx WITH(NOLOCK) WHERE CONVERT(BIGINT,lastupdatetime) > :sql_last_value"
use_column_value => "true"
tracking_column => "lastupdatetimeint"
last_run_metadata_path => "/abc/xxx/last_id"
lowercase_column_names => "true"
tracking_column_type => "numeric"
clean_run => "false"
}
filter
filter {
if [type]=="xxx" {
date {
match => [ "inserttimestr" , "YYYY-MM-dd HH:mm:ss.SSS" ]
target => "@timestamp"
remove_field => [ "inserttimestr" ]
remove_field => [ "lastupdatetime" ]
}
}
#为了按照数据生成日期生成索引,定义变量index_day,提供给output使用
ruby {
code => "event.set('index_day', event.timestamp.time.localtime.strftime('%Y.%m.%d'))"
}
}
output
output {
if [type]=="xxx" {
elasticsearch {
hosts => "ip:9200"
index => "logstash-indexname-%{index_day}"
document_id => "%{id}"
}
}
}