SparkSql 读取elasticsearch 表数据

                            SparkSql 读取elasticsearch 表数据

1 版本

    Spark 2.3.2     ,elasticsearch 5.3.3  ,scala 2.11

2 Pom.xml 部分依赖

<dependency>
      <groupId>org.elasticsearch</groupId>
      <artifactId>elasticsearch-spark-20_2.11</artifactId>
      <version>5.3.3</version>
    </dependency>

    <dependency>
      <groupId>org.scala-lang</groupId>
      <artifactId>scala-library</artifactId>
      <version>${scala.version}</version>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.11</version>
      <scope>test</scope>
    </dependency>

    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-core_${scala.spark.version}</artifactId>
      <version>${spark.version}</version>
    </dependency>

    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-sql_2.11</artifactId>
      <version>${spark.version}</version>
    </dependency>

    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-streaming_2.11</artifactId>
      <version>${spark.version}</version>
      <exclusions>
        <exclusion>
          <groupId>org.slf4j</groupId>
          <artifactId>slf4j-log4j12</artifactId>
        </exclusion>
        <exclusion>
          <groupId>log4j</groupId>
          <artifactId>log4j</artifactId>
        </exclusion>

      </exclusions>

 3 spark-sql读写ES

   

object App extends App {


  println( "Hello World!" )

  val conf  = new SparkConf().setAppName("app1").setMaster("local[2]")
  conf.set("es.index.auto.create","true"); //在spark中自动创建es中的索引
  conf.set("es.nodes","192.168.220.128");//设置在spark中连接es的url和端口
  conf.set("es.port","9200");
  conf.set("es.nodes.wan.only","true");

  val spark =SparkSession.builder().config(conf).getOrCreate()

  //import spark.implicits._

  //read elasticsearch  layout session table
  val essessionDataFrame = spark.sqlContext.read
    .format("org.elasticsearch.spark.sql")
  .option("inferSchema", "true").load("index/type")
  essessionDataFrame.createOrReplaceTempView("sessionTable")
  essessionDataFrame.show()
 
  val rdd = essessionDataFrame.rdd
  //println(s"rdd = ${rdd}")
  rdd.saveToEs("index/type")

  spark.stop()
  println("over....")
}

   4  RDD 读写elaticsearch
   引用:阿粒_lxf
   链接:https://www.jianshu.com/p/8cf8b89b06e6  来源:简书

import org.apache.spark.rdd.RDD
import org.elasticsearch.spark._ 
object LoadElasticsearchData { 
    def main(args: Array[String]): Unit = { 
        val sc = new SparkContext(
      new SparkConf()
        .setAppName("e2e.computing.test")
        .setMaster("local[*]")
        .set("spark.cassandra.connection.host", "192.168.14.141")
        //.set("es.nodes", "192.168.14.140")
        //192.168.7.130:9200
        .set("es.nodes", "192.168.7.130")
        .set("es.port", "9200")
        .set("es.index.auto.create", "true")
        .set("es.mapping.date.rich", "false")
    )
    // ES的RDD  test_lxf   query = "查询串"  elasticsearch.spark 默认全部查出数据
    val query =
        s"""
           |{
           |  "query": {
           |    "match_all": {}
           |  }
           |}
         """.stripMargin
   val esRdd = sc.esRDD(s"index/type", query) 
}

spark RDD 写ES
import com.ffcs.itm.e2e.test.util
import org.elasticsearch.spark._


object SaveElasticsearch {
  def main(args: Array[String]): Unit = {
    val sc = new SparkContext(
      new SparkConf()
        .setAppName("e2e.computing.test")
        .setMaster("local[*]")
        .set("spark.cassandra.connection.host", "192.168.14.141")
        //.set("es.nodes", "192.168.14.140")
        //192.168.7.130:9200
        .set("es.nodes", "192.168.7.130")
        .set("es.port", "9200")
        .set("es.index.auto.create", "true")
        .set("es.mapping.date.rich", "false")
    )
    
    val airports = Map("OTP" -> "Otopeni", "SFO" -> "San Fran")
    //不存在就新建
    sc.makeRDD(Seq(airports)).saveToEs("index/type")
  }
}


 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值