1.解压Logstash
2.在logstash根目录新建custom文件夹,用于自定义的一些配置文件,如下图
custom文件夹下的配置文件如下
把数据库驱动拷贝到custom下
首先创建索引库,该语句只适用与7.x以上的版本,(如需要7.x以下的需要mappings 和properties之间加一个文档名(Docement),因为7.x之后就默认文档名(Docement)为_doc,并且不可修改)
PUT /area
{
"settings": {
"refresh_interval": "5s",
"number_of_shards": 1,
"number_of_replicas": 1,
"analysis": {
"filter": {
"pinyin_full_filter": {
"keep_joined_full_pinyin": "true",
"lowercase": "true",
"keep_original": "false",
"keep_first_letter": "false",
"keep_separate_first_letter": "false",
"type": "pinyin",
"keep_none_chinese": "false",
"limit_first_letter_length": "50",
"keep_full_pinyin": "true"
},
"pinyin_simple_filter": {
"type": "pinyin",
"keep_joined_full_pinyin": "true",
"lowercase": "true",
"none_chinese_pinyin_tokenize": "false",
"padding_char": " ",
"keep_original": "true",
"keep_first_letter": "true",
"keep_separate_first_letter": "false",
"keep_full_pinyin": "false"
}
},
"analyzer": {
"pinyinFullIndexAnalyzer": {
"filter": ["asciifolding", "lowercase", "pinyin_full_filter"],
"type": "custom",
"tokenizer": "ik_max_word"
},
"ik_pinyin_analyzer": {
"filter": ["asciifolding", "lowercase", "pinyin_full_filter", "word_delimiter"],
"type": "custom",
"tokenizer": "ik_smart"
},
"ikIndexAnalyzer": {
"filter": ["asciifolding", "lowercase"],
"type": "custom",
"tokenizer": "ik_max_word"
},
"pinyiSimpleIndexAnalyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": ["pinyin_simple_filter", "lowercase"]
}
}
}
},
"mappings":{
"properties":{
"area":{
"type":"text",
"analyzer":"ikIndexAnalyzer",
"fields":{
"ik":{
"type":"text",
"analyzer":"ikIndexAnalyzer"
},
"spy":{
"type":"text",
"analyzer":"pinyiSimpleIndexAnalyzer"
},
"fpy":{
"type":"text",
"analyzer":"pinyinFullIndexAnalyzer"
}
}
}
}
}
编写area.sql
SELECT a.id as id,a.area_name as address FROM area a
编写shipper.conf(自己改下数据库驱动,数据库路径,用户名密码,sql路径)
input {
jdbc{
#type => "archive_files_index"
jdbc_connection_string => "jdbc:mysql://192.168.0.146:3306/face"
jdbc_driver_library => "F:/software/logstash-6.2.4/logstash-6.2.4/custom/mysql-connector-java-5.1.44.jar"
jdbc_driver_class => "com.mysql.jdbc.driver"
jdbc_user => "root"
jdbc_password => "root"
#last_run_metadata_path => "./logstash_jdbc_last_run"#statement => "select * from sys_user where name = :name"
#直接执行sql
#statement => ""
statement_filepath => "F:/software/ELK7.2/logstash-7.2.0/logstash-7.2.0/custom/area.sql"
#是否分页
jdbc_paging_enabled => "true"
jdbc_page_size => "50"#every 5 minutes execute
#schedule => "* * * * * *"
#每分钟同步一次数据
schedule => "* * * * * *"
}
}
output {
elasticsearch{
#elasticsearch端口号
hosts => "localhost:9200"
index => "area"
#document_type => "student"
document_id => "%{id}"
#doc_as_upsert => true
#action => "update"
}
stdout{
#以json的方式进行输出
codec => json_lines
}
}
表数据有点多,我是爬虫爬的全国省 地级市 和 区域信息(这里就给你们10条作为测试数据吧)
CREATE TABLE `area` (
`id` int(4) NOT NULL,
`area_name` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '地区名称',
`parent_id` int(11) NULL DEFAULT NULL COMMENT '上级地区 0-省 1-市 2-县区',
INDEX `id`(`id`) USING BTREE,
INDEX `fk_parent_id`(`parent_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
INSERT INTO `area` VALUES (1, '北京市', 0);
INSERT INTO `area` VALUES (2, '市辖区', 1);
INSERT INTO `area` VALUES (3, '东城区', 2);
INSERT INTO `area` VALUES (4, '西城区', 2);
INSERT INTO `area` VALUES (5, '朝阳区', 2);
INSERT INTO `area` VALUES (6, '丰台区', 2);
INSERT INTO `area` VALUES (7, '石景山区', 2);
INSERT INTO `area` VALUES (8, '海淀区', 2);
INSERT INTO `area` VALUES (9, '门头沟区', 2);
INSERT INTO `area` VALUES (10, '房山区', 2);
一切准备就绪
使用dos命令窗口,切换到logstash下
执行命令: logstash -f ../custom/shipper.conf
因为加了cron表达式,所以每分钟会同步一次
kabana查看数据
GET area/_search
ok,搞定