ES 和 hive数据间转换比较麻烦,通常是jdbc 连hive,用ES api入数。
现可用快捷的hive on ES:
1.创建index
curl -X PUT "localhost:9200/kk_hive_es " -H 'Content-Type: application/json' -d'
{
"settings":{
"number_of_shards": 3,
"number_of_replicas": 1
},
"mappings": {
"doc": {
"properties": {
"id":{"type":"text"},
"name":{"type":"text"},
"desc":{"type":"text"},
"date":{
"type":"date",
"format":"yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis"
}
}
}
}
}';
查询
curl -X GET "localhost:9200/kk_hive_es"
删除
curl -X DELETE "localhost:9200/kk_hive_es"
入数据
curl -X PUT "localhost:9200/kk_hive_es/doc/1" -H 'Content-Type: application/json' -d'{
"id":"1001",
"name":"kkk",
"desc":"Java从入门到放弃",
"date":"2018-07-12"
}';
执行put后有返回值
_index索引名称
_type类型名
_version版本号
created:true表示是新创建的。
上面的命令每执行一次version就会加1,-XPUT必须制定id。
2.hive on es:
add jar /data/kk/elasticsearch-hadoop-6.3.1/dist/elasticsearch-hadoop-hive-6.3.1.jar;
drop TABLE kk_hive_es;
CREATE EXTERNAL TABLE kk_hive_es (
id string,
name string,
date timestamp,
desc string
)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES(
'es.nodes' = 'localhost:9200',
'es.index.auto.create' = 'true',
'es.resource' = 'kk_hive_es/doc',
'es.read.metadata' = 'true',
'es.mapping.names' = 'id:_metadata._id, name:name, date:date, desc:desc');
insert into kk_hive_es select * from kk limit 10;