datax抽取es数据到hive

收到一个需求:将es集群的数据抽取到大数据平台
在hive创建一个对应数据表

create table if not exists ods.pr_es_test_orc(
						clueId STRING,
						brandId STRING,
						clueEstype STRING
)row format delimited FIELDS TERMINATED BY '|' 
STORED AS orc;

有些主要需要配置的点:
“endpoint” :es的ip地址,
“accessId”:用户名,
“accessKey”: 密码,
“index”: 数据库前缀*,( 其中的*是全匹配 )
“scroll”: 每次读取数据缓存时间,

{
	"job": {
		"setting": {
			"speed": {
				"channel": 7
			}
		},
		"content": [{
			"reader": {
				"name": "elasticsearchreader",
				"parameter": {
					"endpoint": "http://XXX.XXX.XXX.XXX:9200",
					"accessId": "XXXXXXX*",
					"accessKey": "XXXXXXXXXXX",
					"index": "XXXXXX-*",
					"type": "_doc",
					"scroll": "3m",
					"headers": {
					},
					"search": [{
							"query": {
								"bool": {
										"filter":[
                                                   {
                                                        "range":{
                                                                "createdTime":{
                                                                        "boost":1,
                                                                        "from": "${st}", ,
                                                                        "include_lower":true,
                                                                        "include_upper":true,
                                                                        "to": "${et}"
                                                                }
                                                        }
                                                }
                                                ]
								}
							},
							"size": 10
						}],
					"table": {
						"column": [							
							{"name" : "clueId"},
							{"name" : "brandId"},
							{"name" : "clueEstype"}
							]
					}
				}
			},
			"writer": {
				"name": "hdfswriter",
				"parameter": {
                        "defaultFS": "hdfs://${hdfs}",
                        "fileType": "ORC",
                        "path": "/user/hive/warehouse/ods.db/pr_es_test_orc",
					"fileName": "aaaaaa",
					"column": [			
										{"name" : "clueId", "type": "STRING"},
										{"name" : "brandId", "type": "STRING"},
										{"name" : "clueEstype", "type": "STRING"}

					],
					"writeMode": "append",
					"fieldDelimiter": "|",
					"compress": "NONE"
				}
			}
		}]
	}
}
  • 0
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 8
    评论
评论 8
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值