elasticsearch 使用 Logstash 做数据采集

1,下载
下载地址(根据自己需要的版本下载):
https://www.elastic.co/cn/downloads/logstash

我这里是使用的6.2.1版本,直接下载就可以了

wget https://artifacts.elastic.co/downloads/logstash/logstash-6.2.1.tar.gz

2,解压

tar -zxvf logstash-6.2.1.tar.gz

将解压后的目录移动到/usr/local/目录下

mv logstash-6.2.1 /usr/local/
cd /usr/local/logstash-6.2.1/

3,安装 logstash 所需依赖 ruby 和 rubygems(注意:需要 ruby 的版本在 1.8.7 以上)

yum install -y ruby rubygems

检查 ruby 版本

ruby -v

输出如下,表示安装成功
在这里插入图片描述
4,安装 logstash-input-jdbc

cd /usr/local/logstash-6.2.1/
./bin/logstash-plugin install --no-verify  logstash-input-jdbc

5,编写配置文件
我这里的配置文件主要是2个配置文件,mysql同步表文件(mysql.conf)和索引库映射文件(question_template.json),都放在 logstash 的 config 配置文件下
1,mysql.conf

input {
  stdin {
  }
  jdbc {
  jdbc_connection_string => "jdbc:mysql://192.168.1.1:3306/java_interview_dev?characterEncoding=utf-8&autoReconnect=true&zeroDateTimeBehavior=convertToNull&useSSL=false&serverTimezone=Asia/Shanghai"
  # the user we wish to excute our statement as
  jdbc_user => "root"
  jdbc_password => "123456"
  # the path to our downloaded jdbc driver
  jdbc_driver_library => "/usr/local/logstash-6.2.1/lib/mysql-connector-java-8.0.16.jar"
  # the name of the driver class for mysql
  jdbc_driver_class => "com.mysql.cj.jdbc.Driver"
  jdbc_paging_enabled => "true"
  jdbc_page_size => "50000"
  #要执行的sql文件
  #statement_filepath => "/conf/course.sql"
  statement => "SELECT question_id, title, answer, type_ids, DATE_FORMAT(create_time, '%Y-%m-%d %H:%i:%S') AS create_time FROM question WHERE `timestamp` > DATE_ADD(:sql_last_value,INTERVAL 8 HOUR)"
  #定时配置
  schedule => "* * * * *"
  record_last_run => true
  #记录最后采集时间点,保存到logstash_metadata文件中
  last_run_metadata_path => "/usr/local/logstash-6.2.1/config/logstash_metadata"
  }
}


output {
  elasticsearch {
  #ES的ip地址和端口
  hosts => "localhost:9200"
  #hosts => ["localhost:9200"]
  #ES索引库名称
  index => "question_dev"
  document_id => "%{question_id}"
  document_type => "doc"
  template =>"/usr/local/logstash-6.2.1/config/question_template.json"
  template_name =>"question_dev"
  template_overwrite =>"true"
  }
  stdout {
  #日志输出
  codec => json_lines
  }
}

2,question_template.json

{
  "mappings": {
    "doc": {
      "properties": {
        "question_id": {
          "type": "integer"
        },
        "title": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_smart"
        },
        "answer": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_smart"
        },
        "type_ids": {
          "type": "text"
        },
        "create_time": {
          "format": "yyyy-MM-dd HH:mm:ss",
          "type": "date"
        }
      }
    }
  },
  "template": "question_dev"
}

6,运行

/usr/local/logstash-6.2.1/bin/logstash -f /usr/local/logstash-6.2.1/config/mysql.conf

使 logstash 一直保持在后台运行命令:

nohup /usr/local/logstash-6.2.1/bin/logstash -f /usr/local/logstash-6.2.1/config/mysql.conf 2>&1 &

运行前:
在这里插入图片描述
索引库里面没有一条数据,
运行后:
在这里插入图片描述
运行后我们发现,logstash 会根据 mysql.conf 里面的配置项 statement 执行的sql所查询到的数据全部录入到索引库,默认的 logstash 会每分钟执行一次,可以根据配置的 schedule 定时任务修改

到这里使用 logstash 做es数据采集的过程就已经全部完成了

备注:配置不太明白的可以看我附件上传的教学视频

https://download.csdn.net/download/u012946310/11827678

备注:如果需要同时对多个数据采集并且输出到不同的索引库,参考如下配置:

input {
  stdin {
  }
  #dev数据库问题索引
  jdbc {
	#采集类型,避免输出时混淆,使用此类型判断输出
    type => "dev_question"
    jdbc_connection_string => "jdbc:mysql://localhost:3306/cx_blockchain_dev?characterEncoding=utf-8&autoReconnect=true&zeroDateTimeBehavior=convertToNull&useSSL=false&serverTimezone=Asia/Shanghai"
    # the user we wish to excute our statement as
    jdbc_user => "root"
    jdbc_password => "cx123456789cx"
    # the path to our downloaded jdbc driver
    jdbc_driver_library => "/usr/local/logstash-6.2.1/lib/mysql-connector-java-8.0.16.jar"
    # the name of the driver class for mysql
    jdbc_driver_class => "com.mysql.cj.jdbc.Driver"
    jdbc_paging_enabled => "true"
    jdbc_page_size => "50000"
    #要执行的sql文件
    #statement_filepath => "/conf/course.sql"
    statement => "SELECT question_id, title, `desc`, label_code, answer_count, create_user_id, DATE_FORMAT(create_time, '%Y-%m-%d %H:%i:%S') AS create_time FROM question WHERE `timestamp` > DATE_ADD(:sql_last_value,INTERVAL 8 HOUR)"
    #定时配置
    #schedule => "*/10 * * * *"
    schedule => "* * * * *"
    record_last_run => true
	#记录最后采集时间点,保存到dev_question_run_log文件中
    last_run_metadata_path => "/usr/local/logstash-6.2.1/config/es-conf/dev_question_run_log"
  }
  
  #test数据库问题索引
  jdbc {
	#采集类型,避免输出时混淆,使用此类型判断输出
    type => "test_question"
    jdbc_connection_string => "jdbc:mysql://localhost:3306/cx_blockchain_test?characterEncoding=utf-8&autoReconnect=true&zeroDateTimeBehavior=convertToNull&useSSL=false&serverTimezone=Asia/Shanghai"
    # the user we wish to excute our statement as
    jdbc_user => "root"
    jdbc_password => "cx123456789cx"
    # the path to our downloaded jdbc driver
    jdbc_driver_library => "/usr/local/logstash-6.2.1/lib/mysql-connector-java-8.0.16.jar"
    # the name of the driver class for mysql
    jdbc_driver_class => "com.mysql.cj.jdbc.Driver"
    jdbc_paging_enabled => "true"
    jdbc_page_size => "50000"
    #要执行的sql文件
    #statement_filepath => "/conf/course.sql"
    statement => "SELECT question_id, title, `desc`, label_code, answer_count, create_user_id, DATE_FORMAT(create_time, '%Y-%m-%d %H:%i:%S') AS create_time FROM question WHERE `timestamp` > DATE_ADD(:sql_last_value,INTERVAL 8 HOUR)"
    #定时配置
    #schedule => "*/12 * * * *"
    schedule => "* * * * *"
    record_last_run => true
	#记录最后采集时间点,保存到test_question_run_log文件中
    last_run_metadata_path => "/usr/local/logstash-6.2.1/config/es-conf/test_question_run_log"
  }
}


output {
  #dev_question索引输出
  if[type]=="dev_question"{
     elasticsearch {
       #ES的ip地址和端口
       hosts => "localhost:9200"
       #hosts => ["localhost:9200"]
  	  #ES索引库名称
  	  index => "dev_question"
  	  document_id => "%{question_id}"
  	  document_type => "doc"
  	  template =>"/usr/local/logstash-6.2.1/config/es-conf/question_template.json"
  	  template_name =>"question"
  	  template_overwrite =>"true"
  	}
  	stdout {
  	  #日志输出
  	  codec => json_lines
  	}
  }
  
  #test_question索引输出
  if[type]=="test_question"{
  	elasticsearch {
  	  #ES的ip地址和端口
  	  hosts => "localhost:9200"
  	  #hosts => ["localhost:9200"]
  	  #ES索引库名称
  	  index => "test_question"
  	  document_id => "%{question_id}"
  	  document_type => "doc"
  	  template =>"/usr/local/logstash-6.2.1/config/es-conf/question_template.json"
  	  template_name =>"question"
  	  template_overwrite =>"true"
  	}
  	stdout {
  	  #日志输出
  	  codec => json_lines
  	}
  }
}

上面主要新增了一个 type 字段,并且在输出的时候判断 type 字段,以此来区分采集的数据输出到不同的索引库

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值