[root@master ~]# cd /usr/local/soft/hive-1.2.1/
[root@master hive-1.2.1]# ls
bin examples lib NOTICE RELEASE_NOTES.txt tmp
conf hcatalog LICENSE README.txt scripts
[root@master hive-1.2.1]# rm -rf tmp
[dwi@master jars]$ spark-submit --master local[6] --class com.ctyun.dwi.DwiResRegnMergelocationMskDay --jars common-1.0.jar dwi-1.0.jar
[dwi@master jars]$ hdfs dfs -ls /
Found 9 items
drwxr-xr-x - lenovo supergroup 0 2022-03-23 10:04 /TestAPI
drwxr-xr-x - root supergroup 0 2022-05-31 14:58 /daas
drwxr-xr-x - root supergroup 0 2022-06-08 17:27 /data
drwxr-xr-x - root supergroup 0 2022-04-16 09:43 /hbase
drwxr-xr-x - root supergroup 0 2022-04-18 19:47 /sqoop
drwxrw-r-x - root supergroup 0 2022-04-06 19:33 /student
drwxrwxrwx - root supergroup 0 2022-04-02 16:34 /tmp
drwxrwxrwx+ - root supergroup 0 2022-05-15 20:25 /user
drwxr-xr-x - root supergroup 0 2022-04-28 17:53 /zzj
[dwi@master jars]$ hdfs dfs -du -h /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/
0 /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/_SUCCESS
43.3 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00000-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.6 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00001-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.2 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00002-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.5 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00003-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.3 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00004-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.4 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00005-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.4 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00006-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
44.0 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00007-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
42.8 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00008-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.4 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00009-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.4 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00010-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.3 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00011-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.3 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00012-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.4 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00013-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.5 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00014-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.3 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00015-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.8 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00016-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.5 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00017-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.3 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00018-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.8 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00019-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
spark-sql> use dwi;
Time taken: 1.031 seconds
22/07/04 14:49:33 INFO thriftserver.SparkSQLCLIDriver: Time taken: 1.031 seconds
spark-sql> show tables;
22/07/04 14:49:35 INFO codegen.CodeGenerator: Code generated in 150.069267 ms
dwi dwi_res_regn_mergelocation_msk_d false
Time taken: 0.286 seconds, Fetched 1 row(s)
22/07/04 14:49:35 INFO thriftserver.SparkSQLCLIDriver: Time taken: 0.286 seconds, Fetched 1 row(s)
spark-sql> desc dwi_res_regn_mergelocation_msk_d;
22/07/04 14:49:56 INFO spark.ContextCleaner: Cleaned accumulator 0
22/07/04 14:49:56 INFO spark.ContextCleaner: Cleaned accumulator 1
22/07/04 14:49:56 INFO codegen.CodeGenerator: Code generated in 28.723829 ms
mdn string 手机号码
start_date string 开始时间
end_date string 结束时间
county_id string 区县编码
longi string 经度
lati string 纬度
bsid string 基站标识
grid_id string 网格号
day_id string 天分区
# Partition Information
# col_name data_type comment
day_id string 天分区
Time taken: 0.139 seconds, Fetched 12 row(s)
22/07/04 14:49:56 INFO thriftserver.SparkSQLCLIDriver: Time taken: 0.139 seconds, Fetched 12 row(s)
spark-sql> use ods;
Time taken: 0.019 seconds
22/07/04 15:08:36 INFO thriftserver.SparkSQLCLIDriver: Time taken: 0.019 seconds
spark-sql> show tables;
ods ods_admincode false
ods ods_oidd false
ods ods_scenic_boundary false
ods ods_usertag_d false
Time taken: 0.026 seconds, Fetched 4 row(s)
22/07/04 15:08:47 INFO thriftserver.SparkSQLCLIDriver: Time taken: 0.026 seconds, Fetched 4 row(s)
spark-sql> show partitions ods_oidd;
22/07/04 15:09:08 INFO codegen.CodeGenerator: Code generated in 7.03737 ms
day_id=20220531
Time taken: 0.108 seconds, Fetched 1 row(s)
22/07/04 15:09:08 INFO thriftserver.SparkSQLCLIDriver: Time taken: 0.108 seconds, Fetched 1 row(s)
package com.ctyun.dwi
import org.apache.spark.sql.expressions.{UserDefinedFunction, Window}
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import com.shujia.utils.Geography
object DwiResRegnMergelocationMskDay {
def main(args: Array[String]): Unit = {
//通过参数传入day_id
if(args.length==0){
println("请指定分区日期day_id!")
return
}
val day_id:String=args(0)
//基于ods的数据构建 位置数据融合表
val spark: SparkSession = SparkSession
.builder()
.appName("DwiResRegnMergelocationMskDay")
.enableHiveSupport() //开启Hive的支持
.config("spark.sql.shuffle.partitions","20")
.getOrCreate()
//导入隐式转换及函数
import spark.implicits._
import org.apache.spark.sql.functions._
//在Spark SQL中使用自定义函数
/**
* 传入两个点的经纬度 计算距离
*/
val calculateLength: UserDefinedFunction = udf((longi1: Double, lati1: Double, longi2: Double, lati2: Double) => {
Geography.calculateLength(longi1, lati1, longi2, lati2)
})
//读取hive中ods层中的oidd的数据
val oidd: DataFrame = spark.table("ods.ods_oidd")
//1、将开始时间、结束时间分成两列
oidd
//取对应分区的数据
.where($"day_id"===day_id)
//withColumn可以给数据增加一列
.withColumn("start_t",split($"start_time",",")(1))//提取业务的开始时间
.withColumn("end_t",split($"start_time",",")(0))//提取业务的结束时间
// 在时间轴上进行聚类 主要解决同一个网格内部相邻两条数据有时间交叠的问题
//按照用户”分组“ 再按 开始时间 排序 获取上一条数据的网络编号
.withColumn("last_grid",lag("grid_id",1)over Window.partitionBy("mdn").orderBy("start_t"))
// 用当前数据的grid_id 进行对比 上一条数据的grid_id 如果相同则置0,不同则置1
.withColumn("flag", when($"grid_id" === $"last_grid", 0).otherwise(1))
// 对flag列进行累计求和
.withColumn("grp", sum($"flag") over Window.partitionBy("mdn").orderBy("start_t"))
// 按照grp进行分组 取每个组内的 时间的最小值及最大值
.groupBy("mdn", "county_id", "longi", "lati", "bsid", "grid_id", "grp")
.agg(min("start_t") as "start_t", max("end_t") as "end_t")
//2、基于开始时间排序,取每一条数据的前一条数据 作为新的一列 lag
.withColumn("last_lg",lag($"longi",1)over Window.partitionBy($"mdn").orderBy($"start_t"))//取上一条数据的经度
.withColumn("last_lat",lag($"lati",1)over Window.partitionBy($"mdn").orderBy($"start_t"))//取上一条数据的纬度
// 由取上一条数据的结束时间end_t 变为 取上一条数据的 start_t 主要为了解决不同网格之间相邻两条数据有时间交叠的问题
.withColumn("last_end_time",lag($"start_t",1)over Window.partitionBy($"mdn").orderBy($"start_t"))//取上一条数据的结束时间
//3、计算相邻两条位置记录之间的时间间隔
.withColumn("diff_time",unix_timestamp($"start_t","yyyyMMddHHmmss")-unix_timestamp($"last_end_time","yyyyMMddHHmmss"))
//4、基于经纬度计算距离
.withColumn("distance",when($"last_lg".isNull,1).otherwise(calculateLength($"longi",$"lati",$"last_lg",$"last_lat")))
//5、根据距离及时间间隔计算速度
.withColumn("speed",round($"distance"/$"diff_time",3))
//将”超速“的数据进行过滤
.where($"speed"<=340)
//整理数据 将需要的字段取出来
.select($"mdn"
,$"start_t"as "start_date"
,$"end_t"as "end_date"
,$"county_id"
,$"longi"
,$"lati"
,$"bsid"
,$"grid_id"
)
//将结果保存到文件
.write
.format("csv")
.option("sep","\t")
.mode(SaveMode.Overwrite)
.save(s"/daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/day_id=$day_id")
}
/**
* 1、使用maven将代码打成jar包并上传
* 2、如果开启了Spark的historySever服务需要给/user/spark/applicationHistory目录通过acl设置权限
* hdfs dfs -setfacl -R -m user:dwi:rwx /user/spark/applicationHistory
* 3、使用spark-submit提交代码:
* spark-submit --master yarn-client --class com.ctyun.dwi.DwiResRegnMergelocationMskDay --jars common-1.0.jar dwi-1.0.jar 20220531
* 4、查看结果目录的文件大小
* hdfs dfs -du -h /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/
*/
}
[dwi@master jars]$ spark-submit --master local[6] --class com.ctyun.dwi.DwiResRegnMergelocationMskDay --jars common-1.0.jar dwi-1.0.jar 20220531
[dwi@master jars]$ hdfs dfs -ls /
Found 9 items
drwxr-xr-x - lenovo supergroup 0 2022-03-23 10:04 /TestAPI
drwxr-xr-x - root supergroup 0 2022-05-31 14:58 /daas
drwxr-xr-x - root supergroup 0 2022-06-08 17:27 /data
drwxr-xr-x - root supergroup 0 2022-04-16 09:43 /hbase
drwxr-xr-x - root supergroup 0 2022-04-18 19:47 /sqoop
drwxrw-r-x - root supergroup 0 2022-04-06 19:33 /student
drwxrwxrwx - root supergroup 0 2022-04-02 16:34 /tmp
drwxrwxrwx+ - root supergroup 0 2022-05-15 20:25 /user
drwxr-xr-x - root supergroup 0 2022-04-28 17:53 /zzj
[dwi@master jars]$ hdfs dfs -du -h /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/
0 /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/_SUCCESS
483.1 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/day_id=20220531
43.3 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00000-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.6 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00001-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.2 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00002-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.5 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00003-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.3 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00004-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.4 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00005-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.4 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00006-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
44.0 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00007-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
42.8 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00008-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.4 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00009-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.4 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00010-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.3 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00011-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.3 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00012-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.4 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00013-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.5 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00014-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.3 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00015-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.8 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00016-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.5 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00017-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.3 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00018-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.8 M /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00019-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv