计算2代码如下:
import java.util.Properties
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
object cal02 {
def main(args: Array[String]): Unit = {
// 连接到 MySQL 数据库
val mysqlUrl = "jdbc:mysql://master:3306/shtd_store?useUnicode=true&characterEncoding=utf8"
val mysqlProperties = new Properties()
mysqlProperties.setProperty("user", "root")
mysqlProperties.setProperty("password", "1234")
// 连接hive
val conf = new SparkConf().setMaster("local").setAppName("cal02")
val spark = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate()
spark.sql("use dwd")
println("Connected to Hive database: dwd")
// 执行 SQL 查询
println("----- provinceavgcmp -----")
val provinceavgcmp = spark.sql(
"""
|SELECT
| DISTINCT b.id AS province_id,
| b.name AS province_name,
| AVG(a.final_total_amount) OVER (PARTITION BY b.id) AS every_province_avgconsumption,
| AVG(a.final_total_amount) OVER () AS all_province_avgconsumption,
| CASE
| WHEN AVG(a.final_total_amount) OVER (PARTITION BY b.id) > AVG(a.final_total_amount) OVER () THEN '大于'