import org.apache.spark.sql.functions.{col, desc}
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
import java.io.{File, InputStream}
import java.net.URL
import java.text.SimpleDateFormat
import java.util.{Calendar, Properties}
import scala.io.Source
object SmartTV {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder()
.master("local[*]")
.appName("111")
.getOrCreate
spark.sparkContext.setLogLevel("ERROR")
//Add Properties asbelow
val prop = new java.util.Properties()
prop.put("user", "root")
prop.put("password", "2!!")
val url = "jdbc:mysql://14.14:3306/ms"
val colName = "create_time"
val calendar1 = Calendar.getInstance
val sdf1 = new SimpleDateFormat("yyyy-MM-dd")
val offset = 852
val interval = 0 + offset
val interval2 = 7
val interval3 = 16
val interval4 = 60
val interval5 = 90
val interval6 = 185
calendar1.add(Calendar.DATE, -interval)
val N_days_ago: String = sdf1.format(calendar1.getTime)
println("当前日期是" + interval + "天前:" + N_days_ago)
calendar1.add(Calendar.DATE, -interval2)
val N_days_ago2: String = sdf1.format(calendar1.getTime)
println("当前日期是" + (interval + interval2) + "天前:" + N_days_ago2)
calendar1.add(Calendar.DATE, -interval2)
val N_days_ago3: String = sdf1.format(calendar1.getTime)
println("当前日期是" + (interval + interval2 + interval2) + "天前:" + N_days_ago3)
calendar1.add(Calendar.DATE, -interval3)
val N_days_ago4: String = sdf1.format(calendar1.getTime)
println("当前日期是" + (interval + interval2 + interval2 + interval3) + "天前:" + N_days_ago4)
calendar1.add(Calendar.DATE, -interval4)
val N_days_ago5: String = sdf1.format(calendar1.getTime)
println("当前日期是" + (interval + interval2 + interval2 + interval3+interval4) + "天前:" + N_days_ago5)
calendar1.add(Calendar.DATE, -interval5)
val N_days_ago6: String = sdf1.format(calendar1.getTime)
println("当前日期是" + (interval + interval2 + interval2 + interval3+interval4+interval5) + "天前:" + N_days_ago6)
calendar1.add(Calendar.DATE, -interval6)
val N_days_ago7: String = sdf1.format(calendar1.getTime)
println("当前日期是" + (interval + interval2 + interval2 + interval3+interval4+interval5+interval6) + "天前:" + N_days_ago7)
import spark.implicits._
val d_c_m: Dataset[Row] = spark.read.jdbc(url, "d_c_m", prop).filter(col("member_id") > 0)
d_c_m.createOrReplaceTempView("d_c_m")
//val menu_data: DataFrame = spark.sql(s"select name,type,mac,member_id,member_name,device_type,device_version,user_no,menu_id,to_date($colName, 'yyyy-MM-dd HH:mm:ss') as dates from d_c_m")
//menu_data.show()
val menu_data: DataFrame = spark.sql(s"select name,type,mac,member_id,member_name,device_type,device_version,user_no,menu_id,substring($colName, 0,10) as dates from d_c_m")
//menu_data.show()
println("----------昨日日活分组统计结果------------")
//menu_data.filter("dates=='" + N_days_ago + "'").show()
menu_data.filter("dates=='" + N_days_ago + "'").createOrReplaceTempView("data_ago1")
//spark.sql("select count(distinct member_id) uv from data_ago1").show()
val day1: Long = spark.sql("select count(distinct member_id) uv from data_ago1").take(1)(0).getLong(0)
println("----------最近7日日活分组统计结果------------")
//menu_data.filter("dates<='" + N_days_ago + "' and dates>='" + N_days_ago2 + "'").show()
menu_data.filter("dates<='" + N_days_ago + "' and dates>='" + N_days_ago2 + "'").createOrReplaceTempView("data_ago7")
//spark.sql("select count(distinct member_id) uv from data_ago7").show()
val day7: Long = spark.sql("select count(distinct member_id) uv from data_ago7").take(1)(0).getLong(0)
println("----------最近14日日活分组统计结果------------")
//menu_data.filter("dates<='" + N_days_ago + "' and dates>='" + N_days_ago3 + "'").show()
menu_data.filter("dates<='" + N_days_ago + "' and dates>='" + N_days_ago3 + "'").createOrReplaceTempView("data_ago14")
//spark.sql("select count(distinct member_id) uv from data_ago14").show()
val day14: Long = spark.sql("select count(distinct member_id) uv from data_ago14").take(1)(0).getLong(0)
println("----------最近30日日活分组统计结果------------")
//menu_data.filter("dates<='" + N_days_ago + "' and dates>='" + N_days_ago4 + "'").show()
menu_data.filter("dates<='" + N_days_ago + "' and dates>='" + N_days_ago4 + "'").createOrReplaceTempView("data_ago30")
//spark.sql("select count(distinct member_id) uv from data_ago30").show()
val day30: Long = spark.sql("select count(distinct member_id) uv from data_ago30").take(1)(0).getLong(0)
println("----------最近90日日活分组统计结果------------")
//menu_data.filter("dates<='" + N_days_ago + "' and dates>='" + N_days_ago5 + "'").show()
menu_data.filter("dates<='" + N_days_ago + "' and dates>='" + N_days_ago5 + "'").createOrReplaceTempView("data_ago90")
//spark.sql("select count(distinct member_id) uv from data_ago30").show()
val day90: Long = spark.sql("select count(distinct member_id) uv from data_ago90").take(1)(0).getLong(0)
println("----------最近180日日活分组统计结果------------")
//menu_data.filter("dates<='" + N_days_ago + "' and dates>='" + N_days_ago6 + "'").show()
menu_data.filter("dates<='" + N_days_ago + "' and dates>='" + N_days_ago6 + "'").createOrReplaceTempView("data_ago180")
//spark.sql("select count(distinct member_id) uv from data_ago30").show()
val day180: Long = spark.sql("select count(distinct member_id) uv from data_ago180").take(1)(0).getLong(0)
println("----------最近365日日活分组统计结果------------")
//menu_data.filter("dates<='" + N_days_ago + "' and dates>='" + N_days_ago7 + "'").show()
menu_data.filter("dates<='" + N_days_ago + "' and dates>='" + N_days_ago7 + "'").createOrReplaceTempView("data_ago365")
//spark.sql("select count(distinct member_id) uv from data_ago30").show()
val day365: Long = spark.sql("select count(distinct member_id) uv from data_ago365").take(1)(0).getLong(0)
val d_c_v: DataFrame = spark.read.jdbc(url, "d_c_v", prop)
val frame_vod: DataFrame = d_c_v.groupBy("mac").sum("play_duration").toDF("mac", "sum").sort(desc("sum"))
//df.show()
//frame_vod.show()
val d_c_l: DataFrame = spark.read.jdbc(url, "d_c_l", prop)
val frame_live: DataFrame = d_c_l.groupBy("mac").sum("play_duration").toDF("mac", "sum").sort(desc("sum"))
//df.show()
//frame_live.show()
val vod_live_tmp: DataFrame =frame_vod.join(frame_live,Seq("mac"), joinType = "outer").toDF("mac", "vod", "live").na.fill(0)
val vod_live: DataFrame = vod_live_tmp
.rdd.map( row=> {
val mac: String = row.getString(0)
val vod: Long = row.getLong(1)
val live: Long = row.getLong(2)
val total: Long = row.getLong(1) + row.getLong(2)
(mac, vod, live, total)
})
.toDF("mac", "vod", "live","total")
//vod_live.show(20)
val smart_tv: DataFrame = spark.read.jdbc(url, "smarttv", prop)
println("----------SmartTV直播点播按mac合并数据统计时长------------")
val smart_tv_result: DataFrame = smart_tv.select("mac").join(vod_live, Seq("mac"),"left").na.fill(0)
//smart_tv_result.show(20)
//定义数据库表名
val smart_tv_result_table = "smart_tv_result"
val vod_live_table = "vod_live_result"
val active_user = "active_user_result"
loadDataToMysql(smart_tv_result,url,prop,smart_tv_result_table)
loadDataToMysql(vod_live,url,prop,vod_live_table)
val seq: DataFrame = Seq((N_days_ago, day1,day7,day14,day30,day90,day180,day365))
.toDF("date","day1","day7","day14","day30","day90","day180","day365")
//loadDataToMysql(seq,url,prop,active_user)
seq.write.mode("append").jdbc(url,active_user,prop)
println("program ended")
}
def loadDataToMysql(data: DataFrame,url:String,prop:Properties, table_name: String): Unit = {
// 写入数据库
data.write.mode("overwrite").jdbc(url, table_name, prop)
}
}
<dependencies>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql-version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
</dependencies>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<mysql-version>5.1.49</mysql-version>
<scala.version>2.11.12</scala.version>
<spark.version>2.4.0</spark.version>
</properties>