SparkSQL-Hive2ES
一.idea local模式
VM option: -Dspark.master=local
APP01
package com;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.elasticsearch.spark.sql.api.java.JavaEsSparkSQL;
import org.spark_project.guava.collect.ImmutableMap;
import java.util.HashMap;
import java.util.Map;
public class App01 {
public static void main(String[] args) {
String index = "media_all_rs_v/tt";
//是否根据Hive表结构自动创建索引,一般写false,怕结构变形,可以通过根据mapping来创建规范的索引
String index_auto_create="false";
//指定es index的id
String es_mapping_id ="entrance_key";
String table_name="test.user_orc";
String es_nodes="hadoop102:9200";
tableToEs(index,index_auto_create,es_mapping_id,table_name,es_nodes);
}
//数据写入ES
public static void tableToEs(String index,String index_auto_create,String es_mapping_id,String table_name,String es_nodes)
{
SparkSession spark = SparkClient.getSpark();
Dataset<Row> df = spark.sql("select * from test.user_orc");
Map<String, String> map = new HashMap<>();
map.put("es.index.auto.create", "true");
//map.put("es.nodes.wan.only", "true");
map.put("es.nodes", "hadoop102:9200");
//更新时的键,此字段重复时更新,否则插入;此字段必须唯一
map.put("es.mapping.id", "id");
//map.put("es.port", "9200");
//使用upsert模式
map.put("es.write.operation", "upsert");
JavaEsSparkSQL.saveToEs(df,index,map);
//log.info("Spark data from hive to ES index: "+index+" is over,go to alias index! ");
spark.stop();
}
}
放入resource - - > > hive-site.xml
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0