spark+elasticsearch

环境:elasticsearch 1.7

java 1.8

scala 2.11.8

sbt 1.2.6

spark 2.3.2

代码

package ex

import org.apache.spark.sql.SparkSession
import org.elasticsearch.spark.rdd.EsSpark
import org.elasticsearch.spark._

object SparkEs {

  def main (args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
    .appName("sparkEs")
    .config("es.nodes", "10.97.14.131")
    .config("es.port", "9200")
    .config("es.resource", "api_chat_room/all-type")
    .config("es.index.auto.create", "true")
    .getOrCreate()
    val sc = spark.sparkContext
    val otp = Map("teacher_id" -> "1", "status" -> "1", "name" -> "高一大家庭", "class_id" -> "1", "admin" -> "@qq.com","room_id" -> "1", "type" -> "1")
    val muc = Map("teacher_id" -> "2", "status" -> "2", "name" -> "高一大家庭", "class_id" -> "2", "admin" -> "@qq.com","room_id" -> "2", "type" -> "2")
    val sfo = Map("teacher_id" -> "3", "status" -> "3", "name" -> "高一大家庭", "class_id" -> "3", "admin" -> "@qq.com","room_id" -> "3", "type" -> "3")
    
    val airportsRDD = sc.makeRDD(Seq((1, otp), (2, muc), (3, sfo)))
    airportsRDD.saveToEsWithMeta("api_chat_room/all-type")
    
    val rdd = sc.esRDD("api_chat_room/all-type")
    val number = rdd.count()
    println(number)
    println("-----------------------------------")
    rdd.foreach(println)
  }
}

sbt依赖

name := "SparkEsTest12"

version := "0.1"

scalaVersion := "2.11.8"

libraryDependencies += "org.apache.spark" % "spark-core_2.11" % "2.3.2"

libraryDependencies += "org.apache.spark" % "spark-sql_2.11" % "2.3.2"

libraryDependencies += "org.elasticsearch" %% "elasticsearch-spark-20" % "5.0.0"

 

问题 1 ClassNotFoundException

[root@zhangchenglong-test bin]# spark-submit --master local  --class ex.SparkEs   /home/zcltest/sparktest/sparkEsTest/SparkEsTest1.jar
18/11/12 18:56:30 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
java.lang.ClassNotFoundException: ex.SparkEs
	at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
	at java.lang.Class.forName0(Native Method)
	at java.lang.Class.forName(Class.java:348)
	at org.apache.spark.util.Utils$.classForName(Utils.scala:239)
	at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:851)
	at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:198)
	at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:228)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

之前idea将所有依赖打成一个150mb的jar包,就会报错(极大可能是fat jar包跟spark安装路径下的jars文件夹下的依赖产生冲突导致的。极大可能。)

后来只保留代码源文件5kb,之后将依赖的es插件elasticsearch-spark-20_2.11-5.0.0.jar放入单机版spark的jars文件夹下。重新运行报错消失。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值