es版本:6.3.1
spark版本:2.2.0
导入pom依赖
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-spark-20_2.11</artifactId>
<version>6.3.1</version>
</dependency>
测试程序:
package scala
import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}
import org.elasticsearch.spark._
object SparkWithESDemo {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.master("local[*]")
.appName(SparkWithESDemo.getClass.getSimpleName)
.config("es.nodes", "mini1")
.config("port", "9200")
.config("es.index.auto.create", "true")
.getOrCreate()
val sc = spark.sparkContext
import org.elasticsearch.spark._
val queryStr =
"""
|{
| "query" : {
| "match_all" : {}
| }
|}
|""".stripMargin
sc.esRDD("bigdata", queryStr)
.map(perEle => {
val record = perEle._2
val name = record.getOrElse("name", "")
val author = record.getOrElse("author", "")
val version = record.getOrElse("version", "")
(name, author, version)
}).foreach(println)
spark.stop()
}
}
用spark代码读取es集群数据报如下错误:
Driver stacktrace:
19/11/23 13:39:04 INFO DAGScheduler: Job 0 failed: ma