scala之reduceLeft type mismatch

今天用scala完成一件事情:当前行向前循环windows窗口计算这个窗口和。

先上最终代码,再说中间过程,最终代码是:

import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.functions._

object test {
    def rollingDataFrame(ss:SparkSession, dataFrame: DataFrame, windows:Int):List[Double]={
        val len = dataFrame.count().toInt
        val listValue = dataFrame
            .select("int_column")
            .rdd
            .collect()
            .map(_(0))
            .toList
            .map(_.toString.toInt)
        val result =
            (for (i <- 1 to len)
            yield
                if (i <= windows) 0.00
                else listValue.slice(i-1 - windows, i-1).reduceLeft(_ + _)).toList //从i开始向上滑动windows窗口的数值之和,i<=windows为0
        result
    }

    def main(args: Array[String]): Unit = {
        val spark = SparkSession
            .builder()
            .appName("test")
            .master("local")
            .getOrCreate()

        import spark.implicits._
        val df = Seq(
            (1, "First Value", java.sql.Date.valueOf("2010-01-01")),
            (2, "Second Value", java.sql.Date.valueOf("2010-02-01")),
            (3, "First Value", java.sql.Date.valueOf("2010-01-02")),
            (4, "Second Value", java.sql.Date.valueOf("2010-02-02")),
            (5, "First Value", java.sql.Date.valueOf("2010-01-03")),
            (6, "Second Value", java.sql.Date.valueOf("2010-02-03"))
        ).toDF("int_column", "string_column", "date_column")

        df.show()
        val result = rollingDataFrame(spark, df, 2)
        println(result)
        val result_addID = result.toDF("rollingData").withColumn("id", monotonically_increasing_id())
        val df_addID = df.withColumn("id", monotonically_increasing_id())
        val results_DF = df_addID.join(result_addID, "id")
        result_addID.show()
        df_addID.show()
        results_DF.show()

    }
}

输出结果是:
±–±---------±--------------±----------±----------+
| id|int_column|string_column|date_column|rollingData|
±–±---------±--------------±----------±----------+
| 0| 1| First Value| 2010-01-01| 0.0|
| 1| 2| Second Value| 2010-02-01| 0.0|
| 2| 3| First Value| 2010-01-02| 3.0|
| 3| 4| Second Value| 2010-02-02| 5.0|
| 4| 5| First Value| 2010-01-03| 7.0|
| 5| 6| Second Value| 2010-02-03| 9.0|
±–±---------±------------±----------±----------+

先scala命令行调试了下,如下:

scala> values
res17: List[Int] = List(1, 2, 3, 4, 5, 6, 7)
scala> values.slice(7-4, 7).reduceLeft(_+_)
res16: Int = 22

木有问题对吧,然后在IDE建一个DataFrame取出一列就有问题了,代码如下:

import org.apache.spark.sql.{DataFrame, SparkSession}

import scala.collection.JavaConverters._

object test {
//    def rollingDataFrame(ss:SparkSession, dataFrame: DataFrame, windows:Int):DataFrame={
//        val len = dataFrame.count().toInt
//        val listValue = dataFrame
//            .select("int_column")
//            .collect()
//            .map(_(0))
//            .toList
//
//    }

    def main(args: Array[String]): Unit = {
        val spark = SparkSession
            .builder()
            .appName("test")
            .master("local")
            .getOrCreate()

        import spark.implicits._
        val df = Seq(
            (1, "First Value", java.sql.Date.valueOf("2010-01-01")),
            (2, "Second Value", java.sql.Date.valueOf("2010-02-01")),
            (3, "First Value", java.sql.Date.valueOf("2010-01-02")),
            (4, "Second Value", java.sql.Date.valueOf("2010-02-02")),
            (5, "First Value", java.sql.Date.valueOf("2010-01-03")),
            (6, "Second Value", java.sql.Date.valueOf("2010-02-03"))
        ).toDF("int_column", "string_column", "date_column")

        df.show()

//        val rollingDataFrame = rollingDataFrame(spark, df, 2)
        var listValue = df
            .select("int_column")
            .rdd
            .collect()
            .map(_(0)) // return Array[Any]
//        val listValue = df.select("int_column").as("String").collect().toList
        println("listValue:{}:")
        print(listValue)
        var fillListVale:List[Int] = Nil
        val len = df.count().toInt
        val windows = 2
        val i=4
        var lv = List(3,4)
        val ll = listValue.slice(i-windows, i).toList.reduceLeft(_ + _) //.map(_.toString.toInt)
        print(ll)

    }
}

报错,如下:

Error:(51, 80) type mismatch;
 found   : Any
 required: String
        val ll:List[Int] = listValue.slice(i-windows, i).toList.reduceLeft(_ + _) //.map(_.toString.toInt)

加入.map(_.toString.toInt)就可以了,原因是var listValue = df .select("int_column") .rdd .collect() .map(_(0)) // return Array[Any]返回是Array[Any]类型,Any类型在reduceLeft(fold,foldLeft,rightLeft)是不允许的,需要做转换。
修改如下就可以了:

import org.apache.spark.sql.{DataFrame, SparkSession}

import scala.collection.JavaConverters._

object test {
//    def rollingDataFrame(ss:SparkSession, dataFrame: DataFrame, windows:Int):DataFrame={
//        val len = dataFrame.count().toInt
//        val listValue = dataFrame
//            .select("int_column")
//            .collect()
//            .map(_(0))
//            .toList
//
//    }

    def main(args: Array[String]): Unit = {
        val spark = SparkSession
            .builder()
            .appName("test")
            .master("local")
            .getOrCreate()

        import spark.implicits._
        val df = Seq(
            (1, "First Value", java.sql.Date.valueOf("2010-01-01")),
            (2, "Second Value", java.sql.Date.valueOf("2010-02-01")),
            (3, "First Value", java.sql.Date.valueOf("2010-01-02")),
            (4, "Second Value", java.sql.Date.valueOf("2010-02-02")),
            (5, "First Value", java.sql.Date.valueOf("2010-01-03")),
            (6, "Second Value", java.sql.Date.valueOf("2010-02-03"))
        ).toDF("int_column", "string_column", "date_column")

        df.show()

//        val rollingDataFrame = rollingDataFrame(spark, df, 2)
        var listValue = df
            .select("int_column")
            .rdd
            .collect()
            .map(_(0)) // return Array[Any]
//        val listValue = df.select("int_column").as("String").collect().toList
        println("listValue:{}:")
        print(listValue.toList)
        var fillListVale:List[Int] = Nil
        val len = df.count().toInt
        val windows = 2
        val i=4
        var lv = List(3,4)
        val ll:List[Int] = listValue.slice(i-windows, i).toList.map(_.toString.toInt) //.reduceLeft(_ + _)
        val result = ll.reduceLeft(_ + _)
        print(result)

    }
}

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值