Spark用dataframe操作ES

本文介绍了一个使用Apache Spark进行数据处理并与Elasticsearch进行数据同步的例子。具体包括如何配置SparkConf来连接Elasticsearch集群,通过SparkContext读取Elasticsearch中的数据,并将DataFrame或RDD写回到Elasticsearch中。
摘要由CSDN通过智能技术生成
直接上代码:
package com.suning.scdc.hspark.goods.test

import scala.collection.Seq
import scala.collection.mutable.LinkedList
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.elasticsearch.spark.sparkContextFunctions
import org.slf4j.LoggerFactory
import org.elasticsearch.spark.rdd.EsSpark
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.SaveMode

object OrderES {
  val logger = LoggerFactory.getLogger(OrderES.getClass)
  var sc: SparkContext = null
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
      .set("es.nodes", "10.37.154.82,10.37.154.83,10.37.154.84")
      .set("cluster.name", "elasticsearch")
      .set("es.port", "9200")

    sc = new SparkContext(conf)
    dfEs(sc)
    //esRdd(sc)
  }

  def esRdd(sc: SparkContext): Unit = {
    //查询合作方为abc的数据
    val query = """{"query":{"match":{"memberId": "7013894650"}}}"""
    val esRdd = sc.esRDD(s"snprime_login/login", query)
    val rdd = esRdd.map(line => {
      val key = line._1
      val value = line._2

      for (tmp <- value) {
        val key1 = tmp._1
        val value1 = tmp._2
      }

      val mp = scala.collection.immutable.Map(
        "orderNo" -> value("memberId").toString(),
        "loginTm" -> value("loginTime").toString(),
        "year" -> "1994")

      (key, mp)

    })
    print("lst=")
    rdd.foreach(println)
    EsSpark.saveToEsWithMeta(rdd, "bmps/order")
  }


  def dfEs(sc: SparkContext): Unit = {
    val spark = SparkSession
      .builder()
      .appName("sql test")
      .master("local")
      .getOrCreate()

    import spark.implicits._
    import spark.sql

    //    //创建dataframe示例
    //    val df = spark.read.json("C:\\Users\\Administrator\\Desktop\\people.json")
    //    df.createOrReplaceTempView("people")
    //    val sqlDF = spark.sql("select * from people")
    //    sqlDF.show()

    val query = """{"query":{"match":{"memberId": "7013894650"}}}"""

  
    val readDf = spark.read.format("org.elasticsearch.spark.sql").load(s"snprime_login/login")
      .select("memberId", "loginTime")

    readDf.show
    
 
    // set primary key for es
    val esmap = Map("es.mapping.id" -> "memberId")

    readDf.write.format("org.elasticsearch.spark.sql").options(esmap).save("bmps/order")
    //readDf.write.mode(SaveMode.Append).format("org.elasticsearch.spark.sql").save("bmps/order")


    //val esDf = sqlContext.esDF(s"snprime_login/login", query)
    //将dataFrame/rdd写入es
    //esRdd.saveToEs("cmall_order/order")
    //resultDf.saveToEs("index/type")
    //         val schema = StructType(
    //      Seq(
    //        StructField("memberId",StringType,true)          
    //        ,StructField("loginTime",StringType,true)
    //      )
    //    )
    //    val schema2 = StructType(List(
    //
    //    StructField("integer_column", IntegerType, nullable = false),
    //
    //    StructField("string_column", StringType, nullable = true),
    //
    //    StructField("date_column", DateType, nullable = true)
    //
    //))
    //
   

  }
}

 

转载于:https://my.oschina.net/u/778683/blog/1828797

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值