运行环境
spark:2.3.0
elasticsearch:6.2.3
jdk:1.8
参考说明
Apache Spark support | Elasticsearch for Apache Hadoop [6.5] | Elastic
https://www.elastic.co/guide/en/elasticsearch/hadoop/current/install.html
读写方式1
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-hadoop</artifactId>
<version>6.2.3</version>
</dependency>
读写方式2
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-spark-20_2.11</artifactId>
<version>6.2.3</version>
</dependency>
由于仅仅是读取elasticsearch,所以本文章讲的是方式2!!!!!
示例代码1
public static void main(String[] args) {
JavaSparkContext sc = SparkUtil.getJavaSparkContext();
sc.getConf().set("es.index.auto.create", "true");
sc.getConf().set("es.nodes", "localhost:9200");
//第一行记录
Map<String, ?> numbers = ImmutableMap.of("agea", 21, "money", 20000);
//第二行记录
Map<String, ?> airports = ImmutableMap.of("name", "chy", "addr", "南京");
JavaRDD<Map<String, ?>> javaRDD = sc.parallelize(ImmutableList.of(numbers, airports));
//索引名称无限制,索引名称小写
JavaEsSpark.saveToEs(javaRDD, "testsparkjavardd/docs");
}
/**
* 获取 JavaSparkContext
* @return
*/
public static JavaSparkContext getJavaSparkContext() {
SparkConf conf = new SparkConf().setAppName("JavaSpark").setMaster("local[*]");
return new JavaSparkContext(conf);
}
示例代码2
JavaSparkContext sc = SparkUtil.getJavaSparkContext();
sc.getConf().set("es.index.auto.create", "true");
sc.getConf().set("es.nodes", "localhost:9200");
String json1 = "{\"reason\" : \"business\",\"airport\" : \"SFO\",\"id\" : \"3003\"}";
String json2 = "{\"participants\" : 5,\"airport\" : \"OTP\",\"id\" : \"3004\"}";
JavaRDD<String> javaRDD = sc.parallelize(ImmutableList.of(json1, json2));
//指定文档id
//如果id如果动态映射成数字型后,id的值就不能乱输入
Map<String, String> mapid = ImmutableMap.of("es.mapping.id", "id");
//索引名称无限制
JavaEsSpark.saveJsonToEs(javaRDD, "testsparkjson/docs",mapid);
示例代码3
public static void test3() {
JavaSparkContext sc = SparkUtil.getJavaSparkContext();
sc.getConf().set("es.index.auto.create", "true");
sc.getConf().set("es.nodes", "localhost:9200");
EsBean p1 = new EsBean("1003","OTP", 21);
EsBean p2 = new EsBean("1004","MUC", 30);
JavaRDD<EsBean> javaRDD = sc.parallelize(ImmutableList.of(p1, p2));
//指定文档id
//如果id如果动态映射成数字型后,id的值就不能乱输入
Map<String, String> mapid = ImmutableMap.of("es.mapping.id", "id");
//索引名称无限制
JavaEsSpark.saveToEs(javaRDD, "testsparkjavardd/docs", mapid);
}
public static class EsBean implements Serializable {
private String id,name;
private int age;
public EsBean(String id,String name, int age) {
setId(id);
setName(name);
setAge(age);
}
public EsBean() {}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
}
运行效果