1,下载
下载地址(根据自己需要的版本下载):
https://www.elastic.co/cn/downloads/logstash
我这里是使用的6.2.1版本,直接下载就可以了
wget https://artifacts.elastic.co/downloads/logstash/logstash-6.2.1.tar.gz
2,解压
tar -zxvf logstash-6.2.1.tar.gz
将解压后的目录移动到/usr/local/目录下
mv logstash-6.2.1 /usr/local/
cd /usr/local/logstash-6.2.1/
3,安装 logstash 所需依赖 ruby 和 rubygems(注意:需要 ruby 的版本在 1.8.7 以上)
yum install -y ruby rubygems
检查 ruby 版本
ruby -v
输出如下,表示安装成功
4,安装 logstash-input-jdbc
cd /usr/local/logstash-6.2.1/
./bin/logstash-plugin install --no-verify logstash-input-jdbc
5,编写配置文件
我这里的配置文件主要是2个配置文件,mysql同步表文件(mysql.conf)和索引库映射文件(question_template.json),都放在 logstash 的 config 配置文件下
1,mysql.conf
input {
stdin {
}
jdbc {
jdbc_connection_string => "jdbc:mysql://192.168.1.1:3306/java_interview_dev?characterEncoding=utf-8&autoReconnect=true&zeroDateTimeBehavior=convertToNull&useSSL=false&serverTimezone=Asia/Shanghai"
# the user we wish to excute our statement as
jdbc_user => "root"
jdbc_password => "123456"
# the path to our downloaded jdbc driver
jdbc_driver_library => "/usr/local/logstash-6.2.1/lib/mysql-connector-java-8.0.16.jar"
# the name of the driver class for mysql
jdbc_driver_class => "com.mysql.cj.jdbc.Driver"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
#要执行的sql文件
#statement_filepath => "/conf/course.sql"
statement => "SELECT question_id, title, answer, type_ids, DATE_FORMAT(create_time, '%Y-%m-%d %H:%i:%S') AS create_time FROM question WHERE `timestamp` > DATE_ADD(:sql_last_value,INTERVAL 8 HOUR)"
#定时配置
schedule => "* * * * *"
record_last_run => true
#记录最后采集时间点,保存到logstash_metadata文件中
last_run_metadata_path => "/usr/local/logstash-6.2.1/config/logstash_metadata"
}
}
output {
elasticsearch {
#ES的ip地址和端口
hosts => "localhost:9200"
#hosts => ["localhost:9200"]
#ES索引库名称
index => "question_dev"
document_id => "%{question_id}"
document_type => "doc"
template =>"/usr/local/logstash-6.2.1/config/question_template.json"
template_name =>"question_dev"
template_overwrite =>"true"
}
stdout {
#日志输出
codec => json_lines
}
}
2,question_template.json
{
"mappings": {
"doc": {
"properties": {
"question_id": {
"type": "integer"
},
"title": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"answer": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"type_ids": {
"type": "text"
},
"create_time": {
"format": "yyyy-MM-dd HH:mm:ss",
"type": "date"
}
}
}
},
"template": "question_dev"
}
6,运行
/usr/local/logstash-6.2.1/bin/logstash -f /usr/local/logstash-6.2.1/config/mysql.conf
使 logstash 一直保持在后台运行命令:
nohup /usr/local/logstash-6.2.1/bin/logstash -f /usr/local/logstash-6.2.1/config/mysql.conf 2>&1 &
运行前:
索引库里面没有一条数据,
运行后:
运行后我们发现,logstash 会根据 mysql.conf 里面的配置项 statement 执行的sql所查询到的数据全部录入到索引库,默认的 logstash 会每分钟执行一次,可以根据配置的 schedule 定时任务修改
到这里使用 logstash 做es数据采集的过程就已经全部完成了
备注:配置不太明白的可以看我附件上传的教学视频
https://download.csdn.net/download/u012946310/11827678
备注:如果需要同时对多个数据采集并且输出到不同的索引库,参考如下配置:
input {
stdin {
}
#dev数据库问题索引
jdbc {
#采集类型,避免输出时混淆,使用此类型判断输出
type => "dev_question"
jdbc_connection_string => "jdbc:mysql://localhost:3306/cx_blockchain_dev?characterEncoding=utf-8&autoReconnect=true&zeroDateTimeBehavior=convertToNull&useSSL=false&serverTimezone=Asia/Shanghai"
# the user we wish to excute our statement as
jdbc_user => "root"
jdbc_password => "cx123456789cx"
# the path to our downloaded jdbc driver
jdbc_driver_library => "/usr/local/logstash-6.2.1/lib/mysql-connector-java-8.0.16.jar"
# the name of the driver class for mysql
jdbc_driver_class => "com.mysql.cj.jdbc.Driver"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
#要执行的sql文件
#statement_filepath => "/conf/course.sql"
statement => "SELECT question_id, title, `desc`, label_code, answer_count, create_user_id, DATE_FORMAT(create_time, '%Y-%m-%d %H:%i:%S') AS create_time FROM question WHERE `timestamp` > DATE_ADD(:sql_last_value,INTERVAL 8 HOUR)"
#定时配置
#schedule => "*/10 * * * *"
schedule => "* * * * *"
record_last_run => true
#记录最后采集时间点,保存到dev_question_run_log文件中
last_run_metadata_path => "/usr/local/logstash-6.2.1/config/es-conf/dev_question_run_log"
}
#test数据库问题索引
jdbc {
#采集类型,避免输出时混淆,使用此类型判断输出
type => "test_question"
jdbc_connection_string => "jdbc:mysql://localhost:3306/cx_blockchain_test?characterEncoding=utf-8&autoReconnect=true&zeroDateTimeBehavior=convertToNull&useSSL=false&serverTimezone=Asia/Shanghai"
# the user we wish to excute our statement as
jdbc_user => "root"
jdbc_password => "cx123456789cx"
# the path to our downloaded jdbc driver
jdbc_driver_library => "/usr/local/logstash-6.2.1/lib/mysql-connector-java-8.0.16.jar"
# the name of the driver class for mysql
jdbc_driver_class => "com.mysql.cj.jdbc.Driver"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
#要执行的sql文件
#statement_filepath => "/conf/course.sql"
statement => "SELECT question_id, title, `desc`, label_code, answer_count, create_user_id, DATE_FORMAT(create_time, '%Y-%m-%d %H:%i:%S') AS create_time FROM question WHERE `timestamp` > DATE_ADD(:sql_last_value,INTERVAL 8 HOUR)"
#定时配置
#schedule => "*/12 * * * *"
schedule => "* * * * *"
record_last_run => true
#记录最后采集时间点,保存到test_question_run_log文件中
last_run_metadata_path => "/usr/local/logstash-6.2.1/config/es-conf/test_question_run_log"
}
}
output {
#dev_question索引输出
if[type]=="dev_question"{
elasticsearch {
#ES的ip地址和端口
hosts => "localhost:9200"
#hosts => ["localhost:9200"]
#ES索引库名称
index => "dev_question"
document_id => "%{question_id}"
document_type => "doc"
template =>"/usr/local/logstash-6.2.1/config/es-conf/question_template.json"
template_name =>"question"
template_overwrite =>"true"
}
stdout {
#日志输出
codec => json_lines
}
}
#test_question索引输出
if[type]=="test_question"{
elasticsearch {
#ES的ip地址和端口
hosts => "localhost:9200"
#hosts => ["localhost:9200"]
#ES索引库名称
index => "test_question"
document_id => "%{question_id}"
document_type => "doc"
template =>"/usr/local/logstash-6.2.1/config/es-conf/question_template.json"
template_name =>"question"
template_overwrite =>"true"
}
stdout {
#日志输出
codec => json_lines
}
}
}
上面主要新增了一个 type 字段,并且在输出的时候判断 type 字段,以此来区分采集的数据输出到不同的索引库