SparkSQL根据条件合并多条数据(测试)

import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.spark.sql.{Row, RowFactory, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}

/**
  * Description:  本地测试合并数据,完整版
  * Author: 赖传剑
  * Version: 1.0
  * Date: 2017-12-08
  */
object MergerData5Suite {

  val connectStr = "MM&"

  def main(args: Array[String]): Unit = {

    val conf = new SparkConf().setAppName("MergerData5Suite").setMaster("local[2]")
    // 初始化SparkContext
    val sc = new SparkContext(conf)
    // 初始化SQLContext
    val sqlContext = new SQLContext(sc)

    val dbDF = sqlContext.read.format("jdbc")
      .option("url", "jdbc:postgresql://xx:xx/xx")
      .option("dbtable", "xx")
      .option("user", "xx")
      .option("password", "xx")
      .load()

    dbDF.registerTempTable("xx")
    val priorityDF = sqlContext.sql("select * from xx order by priority")
    val policyCollect = priorityDF.select("retailerid", "priority", "validity").map(row => {
      val retailerid = row.getString(0)
      val priority = row.getString(1)
      val validity = row.getString(2)
      val result = retailerid + connectStr + priority + connectStr + validity
      result
    }).collect()

//    priorityDF.collect()(0).getString(1)

    import sqlContext.implicits._
    // 测试数据
    val seq = List(
      ("a", "value1", "", "2017-11-05", "X0100013", "value8"),
      ("a", "value2", "value3", "2017-11-07", "X0100059", "value9"),
      ("b", "value4", "", "2017-11-19", "Z0100030", "value10"),
      ("b", "value5", "", "2017-11-20", "Z0100030", "value11"),
      ("b", "value6", "value7", "2017-11-10", "X0100059", "value12"))

    val dataFrame = seq.toDF("column1", "column2", "column3", "createTime", "retailerid", "column4")
    val dateFormat = new SimpleDateFormat("yyyy-MM-dd")

    // 将数据表数据映射成(key, columns(通过connectStr连接) + 时间字段索引 + 字段主键ID索引)
    val mappedRdd = dataFrame.rdd.map(item => {
      val createTimeIndex = item.fieldIndex("createTime")
      val retaileridIndex = item.fieldIndex("retailerid")
      val splitsed = item.mkString(connectStr).split(connectStr)
      (splitsed(0), item.mkString(connectStr) + connectStr + createTimeIndex + connectStr + retaileridIndex)
    })

    val keyStringRDD = mappedRdd.reduceByKey((item1, item2) => {

      val itemSplit1 = item1.split(connectStr)
      val itemSplit2 = item2.split(connectStr)

      val item1Length = itemSplit1.length
      val item2Length = itemSplit2.length
      // 获取最大字段数
      val maxd = math.max(item1Length, item2Length)

      // 获取最后一个split索引值
      val lastIndexStr1 = itemSplit1.last
      val lastIndexStr2 = itemSplit2.last

      // 获取倒2个记录索引
      val dateIndexStr1 = itemSplit1(itemSplit1.length - 2)
      val dateIndexStr2 = itemSplit2(itemSplit2.length - 2)

      // 获取倒1个记录索引值
      val date1Str = itemSplit1(dateIndexStr1.toInt)
      val date2Str = itemSplit2(dateIndexStr2.toInt)

      // 获取倒2个记录索引值
      val date1 = dateFormat.parse(date1Str)
      val date2 = dateFormat.parse(date2Str)

      val item1Retailerid = itemSplit1(lastIndexStr1.toInt) // 站点id
      val item2Retailerid = itemSplit2(lastIndexStr2.toInt)

      var conditionStr1 = ""
      var conditionStr2 = ""

      for (collectItem <- policyCollect) {
        if (collectItem.contains(item1Retailerid)) {
          conditionStr1 = collectItem
        }
        if (collectItem.contains(item2Retailerid)) {
          conditionStr2 = collectItem
        }
      }

      val condition1Splits = conditionStr1.split(connectStr)
      val condition2Splits = conditionStr2.split(connectStr)

      val priority1 = condition1Splits(1).toInt
      val validity1 = condition1Splits(2).toInt
      val priority2 = condition2Splits(1).toInt
      val validity2 = condition2Splits(2).toInt

      val calendar = Calendar.getInstance()
      val nowTime = calendar.getTime
      calendar.setTime(date1)
      calendar.add(Calendar.DATE, validity1)
      val item1ExpireTime = calendar.getTime
      calendar.setTime(date2)
      calendar.add(Calendar.DATE, validity2)
      val item2ExpireTime = calendar.getTime

      var result = ""
      for (i <- 0 until maxd) {
        val itemValue1 = itemSplit1(i)
        val itemValue2 = itemSplit2(i)
        if (!itemValue1.equalsIgnoreCase(itemValue2)) {
          if (itemValue1 == "" || itemValue2 == "" || itemValue1.equalsIgnoreCase("NULL") || itemValue2.equalsIgnoreCase("NULL")) {
            if (itemValue1.equalsIgnoreCase("NULL")) {
              result += itemValue2 + connectStr
            } else if (itemValue2.equalsIgnoreCase("NULL")) {
              result += itemValue1 + connectStr
            } else {
              result += (itemValue1 + itemValue2) + connectStr
            }

          } else {

            if (priority1 <= priority2) { // priority1优先级高
              if (item1ExpireTime.getTime < nowTime.getTime && item2ExpireTime.getTime >= nowTime.getTime) {
                result += itemValue2 + connectStr
              } else {
                if (item1ExpireTime.getTime >= nowTime.getTime && item2ExpireTime.getTime >= nowTime.getTime && item1ExpireTime.getTime < item2ExpireTime.getTime) {
                  result += itemValue2 + connectStr
                } else {
                  result += itemValue1 + connectStr
                }
              }
            } else {
              if (item2ExpireTime.getTime < nowTime.getTime && item1ExpireTime.getTime >= nowTime.getTime) {
                result += itemValue1 + connectStr
              } else {
                if (item2ExpireTime.getTime >= nowTime.getTime && item1ExpireTime.getTime >= nowTime.getTime && item2ExpireTime.getTime < item1ExpireTime.getTime) {
                  result += itemValue1 + connectStr
                } else {
                  result += itemValue2 + connectStr
                }
              }
            }

          }
        } else {
          if (priority1 <= priority2) { // priority1优先级高
            if (item1ExpireTime.getTime < nowTime.getTime && item2ExpireTime.getTime >= nowTime.getTime) {
              result += itemValue2 + connectStr
            } else {
              if (item1ExpireTime.getTime >= nowTime.getTime && item2ExpireTime.getTime >= nowTime.getTime && item1ExpireTime.getTime < item2ExpireTime.getTime) {
                result += itemValue2 + connectStr
              } else {
                result += itemValue1 + connectStr
              }
            }
          } else {
            if (item2ExpireTime.getTime < nowTime.getTime && item1ExpireTime.getTime >= nowTime.getTime) {
              result += itemValue1 + connectStr
            } else {
              if (item2ExpireTime.getTime >= nowTime.getTime && item1ExpireTime.getTime >= nowTime.getTime && item2ExpireTime.getTime < item1ExpireTime.getTime) {
                result += itemValue1 + connectStr
              } else {
                result += itemValue2 + connectStr
              }
            }
          }
        }
      }
      if (result.endsWith(connectStr)) {
        result = result.substring(0, result.length - 3)
      }
      result
    })

    val valueRDDs = keyStringRDD.map(item => {
      val splits = item._2.split(connectStr)
      val columnBean = createTableBean(splits)
      columnBean
    })

    sqlContext.createDataFrame(valueRDDs, dataFrame.schema).show()

    sc.stop()

  }

  def createTableBean(splits: Array[String]): Row = {
    val columnBean = RowFactory.create(splits(0), splits(1), splits(2), splits(3), splits(4), splits(5))
    columnBean
  }

}

运行结果:
+——-+——-+——-+———-+———-+——-+
|column1|column2|column3|createTime|retailerid|column4|
+——-+——-+——-+———-+———-+——-+
| b| value5| value7|2017-11-19| Z0100030|value10|
| a| value1| value3|2017-11-05| X0100013| value8|
+——-+——-+——-+———-+———-+——-+

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

向往的生活Life

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值