环境:elasticsearch 1.7
java 1.8
scala 2.11.8
sbt 1.2.6
spark 2.3.2
代码
package ex
import org.apache.spark.sql.SparkSession
import org.elasticsearch.spark.rdd.EsSpark
import org.elasticsearch.spark._
object SparkEs {
def main (args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName("sparkEs")
.config("es.nodes", "10.97.14.131")
.config("es.port", "9200")
.config("es.resource", "api_chat_room/all-type")
.config("es.index.auto.create", "true")
.getOrCreate()
val sc = spark.sparkContext
val otp = Map("teacher_id" -> "1", "status" -> "1", "name" -> "高一大家庭", "class_id" -> "1", "admin" -> "@qq.com","room_id" -> "1", "type" -> "1")
val muc = Map("teacher_id" -> "2", "status" -> "2", "name" -> "高一大家庭", "class_id" -> "2", "admin" -> "@qq.com","room_id" -> "2", "type" -> "2")
val sfo = Map("teacher_id" -> "3", "status" -> "3", "name" -> "高一大家庭", "class_id" -> "3", "admin" -> "@qq.com","room_id" -> "3", "type" -> "3")
val airportsRDD = sc.makeRDD(Seq((1, otp), (2, muc), (3, sfo)))
airportsRDD.saveToEsWithMeta("api_chat_room/all-type")
val rdd = sc.esRDD("api_chat_room/all-type")
val number = rdd.count()
println(number)
println("-----------------------------------")
rdd.foreach(println)
}
}
sbt依赖
name := "SparkEsTest12"
version := "0.1"
scalaVersion := "2.11.8"
libraryDependencies += "org.apache.spark" % "spark-core_2.11" % "2.3.2"
libraryDependencies += "org.apache.spark" % "spark-sql_2.11" % "2.3.2"
libraryDependencies += "org.elasticsearch" %% "elasticsearch-spark-20" % "5.0.0"
问题 1 ClassNotFoundException
[root@zhangchenglong-test bin]# spark-submit --master local --class ex.SparkEs /home/zcltest/sparktest/sparkEsTest/SparkEsTest1.jar
18/11/12 18:56:30 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
java.lang.ClassNotFoundException: ex.SparkEs
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.util.Utils$.classForName(Utils.scala:239)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:851)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:198)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:228)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
之前idea将所有依赖打成一个150mb的jar包,就会报错(极大可能是fat jar包跟spark安装路径下的jars文件夹下的依赖产生冲突导致的。极大可能。)
后来只保留代码源文件5kb,之后将依赖的es插件elasticsearch-spark-20_2.11-5.0.0.jar放入单机版spark的jars文件夹下。重新运行报错消失。