Scala遇到的奇怪问题

 请大身帮忙一下 

插上电源和不插上电源的运行效果不一样

插上电源之后就报错

不插上电源就不会报错

package L02

import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.functions._

case class P2(selling_price:Double,bedrooms_num:Double,bathroom_num:Double,housing_area:Double,
              parking_area:Double,floor_num:Double,housing_rating:Double,built_area:Double,basement_area:Double,
              year_built:String,year_repair:String,latitude:Double,longitude:Double,sale_data:String)
object L001 {
  def main(args: Array[String]): Unit = {
//    val spark = new SparkConf().setMaster("local[*]").setAppName("test")
//    val sc = new SparkContext(spark)
    val spark = SparkSession.builder()
      .master("local[*]")
      .appName("test")
      .getOrCreate()
    val sc = spark.sparkContext
    import spark.implicits._
    val path = "src/main/scala/L02/house.csv"
    val rdd = sc.textFile(path, 1)
    rdd.collect().foreach(println)
    val df = rdd.map(
      x => {
        var y = x.split(",")
        P2(y(0).toDouble, y(1).toDouble, y(2).toDouble, y(3).toDouble, y(4).toDouble, y(5).toDouble,
          y(6).toDouble, y(7).toDouble, y(8).toDouble, y(9), y(10), y(11).toDouble, y(12).toDouble, y(13))
      }
    ).toDF()
    df.show(5)

    def null_count(data: DataFrame, columnName: String) = {
      println(columnName + ":缺失值数量" + (data.count() - data.na.drop().count()))
    }

    //    定义一个函数区求各列的最大值,最小值,平均值
    def mmas(data: DataFrame, columnName: String) = {
      println(columnName + ":")
      data.selectExpr("max(" + columnName + ") as max").foreach(x => println("max:" + x.toString()))
      data.selectExpr("min(" + columnName + ") as min").foreach(x => println("min:" + x.toString()))
      data.selectExpr("mean(" + columnName + ") as mean").foreach(x => println("mean:" + x.toString()))
      data.selectExpr("stddev(" + columnName + ") as stddev").foreach(x => println("stddev:" + x.toString()))
      null_count(data, "selling_price")
      println("*" * 30)
    }

    val dataColumnName = df.columns.toList
    for (i <- dataColumnName) {
      if (i == "year_built" || i == "year_repair" || i == "sale_data") {
        println(i + ":")
        null_count(df, i)
        println("-" * 20)
      } else {
        mmas(df, i)
      }
    }
    null_count(df, "selling_price")
    mmas(df, "selling_price")


    val houseDate = df.na.drop().withColumn("date", to_date(col("sale_data"), "yyyyMMdd"))
    val houseQuarter = houseDate.withColumn("quarter", quarter(col("date")))
    //    对各季度房屋销售额的统计分析
    houseQuarter.groupBy("quarter").sum("selling_price").sort("quarter").show()
    //    对各季度房屋评分的统计分析
    houseQuarter.groupBy("housing_rating").count().sort(desc("count")).show()
    houseQuarter.groupBy("housing_rating").agg(avg(col("selling_price") / col("housing_area"))).sort("housing_rating").show()

  }
}

请大家看看吧 是什么问题 好久都没解决了

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值