datax从mysql同步数据到elasticsearch（使用es的动态模板）

最新推荐文章于 2024-09-05 23:08:02 发布

ASN_forever

最新推荐文章于 2024-09-05 23:08:02 发布

阅读量2.9k

点赞数 1

分类专栏： ETL

本文链接：https://blog.csdn.net/ASN_forever/article/details/106340961

版权

ETL 专栏收录该内容

5 篇文章 1 订阅

订阅专栏

elasticsearch中设置动态模板

PUT _template/hkey_transferbill
{ 
  "order":0 //order表示模板的优先级，值越大优先级越高，索引如果匹配了多个模板，则会根据优先级从低到高依次使用模板，高优先级会覆盖低优先级
  "index_patterns": "*", //模板可以匹配的索引，可以使用通配符
  "settings": {
    "number_of_shards": 2,
    "number_of_replicas": 1
  },
  "mappings": {
    "data":{
      "dynamic_templates" : [ {  
        "keyword_field" : {  
          "unmatch" : "*Name",  
          "match_mapping_type" : "string",  
          "mapping" : {  
            "type" : "keyword"
          }  
        }  
      }, {  
        "text_fields" : {  
          "match" : "*Name",  
          "match_mapping_type" : "string",  
          "mapping" : {  
            "type" : "text",
            "fields": {
              "raw": {
                "type": "keyword"
              }
            }
          }  
        }  
      } ],  
      "properties": {
        "rowkey":
        {
          "type":"keyword"
        },
        "age":
        {
          "type":"keyword"
        }
      }
    }
  }
}

datax同步脚本

{
  "job": {
    "setting": {
          "speed": {
            "byte": 8388608,
            "channel": 3
          },
          "errorLimit": {
            "record": 0,
            "percentage": 0.02
          }
        },
    "content": [
      {
        "reader": {
                    "name": "mysqlreader",
                    "parameter": {
                        "username": "xxx",
                        "password": "xxx",
                        "connection": [
                    {
                      "querySql": [
                                    "select id,province,city,area,longitude,latitude,concat_ws(\",\",latitude,longitude) location from geotable"
                                ],
                        "jdbcUrl": [ "jdbc:mysql://xxx:3306/xxx" ]
                    }
                ]
                    }                   
                },
        "writer": {
          "name": "elasticsearchwriter",
          "parameter": {
            "endpoint": "http://xxx:9200",
            "index": "myindex",
            "type": "data",
            "cleanup": true, #true表示插入前清空，即覆盖同步；false则追加同步
            "dynamic": true,  #这里一定要指定为true，否则使用的是datax的模板（就是下面定义的字段类型），而不会使用es的模板
            "settings": {"index" :{"number_of_shards": 2, "number_of_replicas": 1}},            
            "batchSize": 10000,
            "splitter": ",",
            "column": [
              {"name": "pk", "type": "id"},#指定第一个字段为rowkey 
              {"name": "id","type":"text"},#因为dynamic的值为true，表示使用es的动态模板，因此这里的type实际上是不起作用的
              {"name": "province", "type": "text"},
              { "name": "city", "type": "text"},
              { "name": "area", "type": "text"},
              { "name": "longitude","type":"double" },
              { "name": "latitude","type": "double" },
              { "name": "location","type": "geo_point" }                   
            ]
          }
        }
      }
    ]
  }
}

注意：writer中的column元素顺序要和reader中读取的元素顺序一致