阿里云Dataworks将odps数据同步到es（7版本）采用inedx与 updata 方式教程

最新推荐文章于 2023-04-28 16:47:54 发布

了尘自无心

最新推荐文章于 2023-04-28 16:47:54 发布

阅读量1.4k

点赞数

本文链接：https://blog.csdn.net/ADCadc123456789/article/details/113613589

版权

采用update 时需要指定主键又不能使用生成的主键（_id）所以只能在odps 中引入主键

创建索引与mapping

#!/bin/bash
#********************************************************************#
##author:？？？？
##create time:2021-02-03 15:20:11
#********************************************************************#
# 根据条件查询数据
curl -XGET -u"用户名":"密码" '主机:端口号/index_test_es_update/_search' -H 'Content-Type: application/json' -d '{
"query": {
"term": {
"cons_no": "123456"
}
}
}
'
# 查看所有索引 
curl -XGET -u"用户名":"密码" '主机:端口号/ index_test_es_update/_cat/indices?pretty&v'
# 删除索引
curl -XDELETE -u"用户名":"密码" '主机:端口号/index_test_es_update?pretty'
# 查看mapping
curl -XGET -u"用户名":"密码" '主机:端口号/index_test_es_update /_mapping?pretty'
# 创建索引 注意dataworks 中 es column id 类型为 id 不要在创建索引时添加
curl -XPUT -u"用户名":"密码" '主机:端口号/index_test_es_update?pretty' -H 'Content-Type: application/json' -d '
{
"mappings": {
"properties": {
"id": {
"type": "keyword"
},
"data_dt": {
"type": "keyword"
},
"aaa": {
"type": "keyword",
"null_value": "NULL"
}
}
}
}
'

index 方式脚本同步

{
"type": "job",
"steps": [
{
"stepType": "odps",
"parameter": {
"datasource": "odps_first",
"column": [
"id",
"data_dt",
"aaa",
],
"splitPK": "id",
"emptyAsNull": false,
"table": "test_es_index"
},
"name": "Reader",
"category": "reader"
},
{
"stepType": "elasticsearch",
"parameter": {
"column": [
{
"name": "id",
"type": "keyword"
},
{
"name": "data_dt",
"type": "keyword"
},
{
"name": "aaa",
"type": "keyword"
}
],
"index": "index_test_es_index",
"aliasMode": "append",
"type": "_doc",
"splitter": "",
"timeout": 600000,
"multiThread": true,
"actionType": "index",
"cleanup": false,
"datasource": "ES",
"discovery": false,
"trySize": 30,
"alias": "",
"ignoreParseError": false,
"dynamic": true,
"batchSize": 1000,
"compression": true,
"ignoreWriteError": false
},
"name": "Writer",
"category": "writer"
}
],
"version": "2.0",
"order": {
"hops": [
{
"from": "Reader",
"to": "Writer"
}
]
},
"setting": {
"errorLimit": {
"record": ""
},
"speed": {
"concurrent": 2,
"throttle": false
}
}
}

update方式脚本同步

{
"type": "job",
"steps": [
{
"stepType": "odps",
"parameter": {
"datasource": "odps_first",
"column": [
"id",
"id",
"data_dt",
"aaa",
],
"splitPK": "id",
"emptyAsNull": false,
"table": "test_es_update"
},
"name": "Reader",
"category": "reader"
},
{
"stepType": "elasticsearch",
"parameter": {
"column": [
{
"name": "id",
"type": "id"
},
{
"name": "id",
"type": "keyword"
},
{
"name": "data_dt",
"type": "keyword"
},
{
"name": "aaa",
"type": "keyword"
}
],
"index": "index_test_es_update",
"aliasMode": "append",
"type": "_doc",
"splitter": "",
"timeout": 600000,
"multiThread": true,
"actionType": "update",
"cleanup": false,
"datasource": "ES",
"discovery": false,
"trySize": 30,
"alias": "",
"ignoreParseError": false,
"dynamic": true,
"batchSize": 1000,
"compression": true,
"ignoreWriteError": false
},
"name": "Writer",
"category": "writer"
}
],
"version": "2.0",
"order": {
"hops": [
{
"from": "Reader",
"to": "Writer"
}
]
},
"setting": {
"errorLimit": {
"record": ""
},
"speed": {
"concurrent": 2,
"throttle": false
}
}
}

参数说明
“index”: “index_test_es_index”, // 索引
“type”: “_doc”, //索引类型
“timeout”: 600000, //超时时间
“actionType”: “index”, //数据插入方式 inedx or update
“cleanup”: false, // 插入前是否清除之前数据 false 不清除
“dynamic”: true, //true，则使用Elasticsearch的自动mappings，而非使用数据集成的mappings。

了尘自无心

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
阿里云Dataworks将odps数据同步到es（7版本）采用inedx与 updata 方式教程

采用update 时需要指定主键又不能使用生成的主键（_id）所以只能在odps 中引入主键创建索引与mapping#!/bin/bash#********************************************************************###author:？？？？##create time:2021-02-03 15:20:11#*********************************************************
复制链接

扫一扫