scala实例——六

import scala.util.control.Breaks._
import org.apache.spark.sql._
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.{DataFrame, SQLContext, SparkSession}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.execution.vectorized.ColumnarBatch.Row

import scala.util.Random
import scala.util.control.Breaks




object base_day_analyse {

  def main(args: Array[String]) {
    val spark = SparkSession
      .builder()
      .appName("base_day_analyse")
      .config("spark.some.config.option", "some-value")
      .getOrCreate()
    import spark.implicits._
    val df1 = spark.read.json("/spark_data/visit_records.json")
    df1.createOrReplaceTempView("visit")
    // Global temporary view is tied to a system preserved database `global_temp`
    var result_string = ""


    var sql = "SELECT  in_time from visit  order by `in_time`  limit 1"
    var min_time = (spark.sql(sql).collect()) (0).asInstanceOf[Row].getInt(0)
    sql = "SELECT  in_time from visit  order by `in_time` desc  limit 1 "
    var max_time = (spark.sql(sql).collect()) (0).asInstanceOf[Row].getInt(0)
    var now_time = 0;

    var outer = new Breaks

    now_time = min_time;
    var last_customer_num = 0;
    var now_customer_num = 0;
    var new_customer_num = 0;
    var old_customer_num = 0;
    var interval_customer_num = 0;

    while (now_time <= max_time) {
      outer.breakable {
        var jump_num = 0;
        var visit_num = 0;
        var deep_in_num = 0;
        var avg_stay_time = 0;
        var time1 = now_time;
        var time2 = now_time + 86400;

        var sql2 = "SELECT  COUNT(DISTINCT mac) num from visit   where `in_time` >= " + time1 + " and `in_time` <= " + time2 + " and  stay_time > 0"; //SQL语句
        interval_customer_num = (spark.sql(sql2).collect()) (0).asInstanceOf[Row].getInt(0)

         sql2 = "SELECT  COUNT(DISTINCT mac) num from visit   where `in_time` >= " + min_time + " and `in_time` <= " + time2 + " and  stay_time > 0"; //SQL语句
         now_customer_num = (spark.sql(sql2).collect()) (0).asInstanceOf[Row].getInt(0)

        new_customer_num = now_customer_num - last_customer_num;
        old_customer_num = interval_customer_num - new_customer_num;


        sql2 = "SELECT  count(*) jump_num from visit   where `in_time` >= " + time1 + " and `in_time` <= " + time2 + " and  stay_time <= 180"; //SQL语句
        jump_num = (spark.sql(sql2).collect()) (0).asInstanceOf[Row].getInt(0)


        sql2 = "SELECT  count(*) deep_in_num from visit   where `in_time` >= " + time1 + " and `in_time` <= " + time2 + " and  stay_time >= 1200"; //SQL语句
        deep_in_num = (spark.sql(sql2).collect()) (0).asInstanceOf[Row].getInt(0)


        sql2 = "SELECT  count(*) visit_num , AVG(stay_time) avg_stay_time from visit   where `in_time` >= " + time1 + " and `in_time` <= " + time2 + ""; //SQL语句
        var row = (spark.sql(sql2).collect()) (0).asInstanceOf[Row]
        visit_num = row.getInt(0)
        avg_stay_time = row.getInt(1)


        var jump_rate = (jump_num.asInstanceOf[Float]/visit_num.asInstanceOf[Float])
        var deep_in_rate = (deep_in_num.asInstanceOf[Float]/visit_num.asInstanceOf[Float])
        var format_deep_in_rate = f"$deep_in_rate%1.2f"
        var format_jump_rate = f"$jump_rate%1.2f"
        //每一条 jump 结果 添加到 结果集
        var day_string = """{"time":"""+time1 +","+""""jump_out_rate":"""+format_jump_rate+","+""""deep_in_rate":"""+format_deep_in_rate+","+""""avg_stay_time":"""+avg_stay_time+","+""""new_num":"""+new_customer_num+","+""""old_num":"""+old_customer_num+","+""""customer_num":"""+visit_num+"}\n"
        result_string = result_string + day_string


        now_time = now_time + 86400;
        last_customer_num = now_customer_num
      }

    }



    //将结果集 存入 文件
    import java.io._
    val writer = new PrintWriter(new File("\\sparkdata\\base_day_analyse.json" ))

    writer.write(result_string)
    writer.close()








  }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值