sparksql 使用spark 类似于 sql的命令 操作文件

数据

4,2024-02-08 11:56:00,154.56
2,2023-01-11 01:30:59,148.42
4,2023-03-28 08:01:19,105.36
4,2021-09-14 08:05:16,108.42
4,2024-01-03 01:22:22,206.46
5,2022-07-03 02:24:01,197.90
1,2021-12-03 06:42:50,107.83
4,2023-10-21 03:55:32,171.98
4,2023-06-13 05:02:08,204.10

命令

package cn.kgc.sql

import java.util.Properties

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SparkSession}
/*//这种设计模式怎么做的?  SparkSession.builder().master("local[*]").appName("spark_sql_01").getOrCreate()
class MySpark{
  private var _master:String = "local"
  private var _appName:String = ""

  private def master(master:String)={
    _master=master
  }

  private def appName(appName:String)={
    _appName=appName
  }
}

object MySpark{

  class Session{
    var singleton:MySpark = new MySpark();

    def master(master:String)={
      singleton.master(master)
      singleton
    }

    def appName(appName:String)={
      singleton.appName(appName)
      singleton
    }
    def getOrCreate()=singleton

  }

  private var session:Session = null;

  def builder()={
    session = new Session
    session
  }
}*/

//下面这个包存放的都是sql的函数
import org.apache.spark.sql.functions._

object Task04sql {

  case class Record(shopId:String,date:String,volume: String)

  def main(args: Array[String]): Unit = {
    val spark: SparkSession = SparkSession.builder()
      .master("local[*]")
      .appName("spark_sql_01")
      .getOrCreate()

   /*   .enableHiveSupport()      //开启spark sql对hive的支持
      .getOrCreate()*/

    val URL="jdbc:mysql://192.168.75.245:3306/schooldb?useSSL=false"
    val TABLE = "classinfo"
    val PRO = new Properties()
    PRO.setProperty("driver","com.mysql.jdbc.Driver")
    PRO.setProperty("user","root")
    PRO.setProperty("password","Fang@123")

    //spark.read.jdbc(URL,TABLE,PRO)加载一张表  它是一个DataFrame
    spark.read.jdbc(URL,TABLE,PRO).createTempView("classinfo")

   /* spark.sql(
      """
        |select * from classinfo
        |""".stripMargin)
        //.printSchema()
        //.show()
        .select("*")
        //.show()
        .toJavaRDD
        .saveAsTextFile("")*/

    //spark.read.jdbc

    //spark session 的隐式转换包
    import spark.implicits._

    val frame: DataFrame = spark.read.jdbc(URL, TABLE, PRO)
    //frame.createTempView("classinfo_view")

  /*  frame.select($"classId",
      concat_ws(",",$"className",$"classId").as("name_id"))
      //.where($"classId".between(3,7))
        .show*/

    //样例类+RDD 创建DataFrame
    //SparkSession内置一个SparkContext对象
    val sc = spark.sparkContext
    val rdd: RDD[Record] = sc.textFile("file:///D:\\d\\tools\\idea\\myprojects\\spark\\spark02\\file\\sales5.txt", 5)
      .mapPartitions(_.map(line => {
          val ps = line.split(",")
          Record(ps(0), ps(1), ps(2))
      }))

//    val frame1: DataFrame = spark.createDataFrame(rdd)
//    frame1.printSchema()
//    println(frame1.count())
    val rx="(.*?)-(.*?)-(.*?) .*"
    spark.createDataFrame(rdd)
        .select($"shopId".cast("Int"),
          regexp_extract($"date",rx,1).as("year"),
          regexp_extract($"date",rx,2).as("month"),
          regexp_extract($"date",rx,3).as("day"),
        $"volume".cast("Float")
        )
      //日聚合
       .groupBy($"shopId",$"year",$"month",$"day")
       .agg(sum($"volume").as("sumVolume")
         ,count($"volume").as("cntVolume"))
       //.select($"shopId",$"year",$"month",$"day",$"sumVolume",$"cntVolume")
       //月聚合
      .groupBy($"shopId",$"year",$"month")
      .agg(sum($"sumVolume").cast("decimal(10,2)").as("sumVolume")
        ,count($"cntVolume").as("cntVolume"))
      .filter($"sumVolume".geq(100000))
      .sort($"sumVolume".desc,$"cntVolume".asc)
       .limit(20).show()

    sc.stop()
    spark.close()
  }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值