位置数据融合表3

[root@master ~]# cd /usr/local/soft/hive-1.2.1/
[root@master hive-1.2.1]# ls
bin   examples  lib      NOTICE      RELEASE_NOTES.txt  tmp
conf  hcatalog  LICENSE  README.txt  scripts
[root@master hive-1.2.1]# rm -rf tmp
[dwi@master jars]$ spark-submit --master local[6] --class com.ctyun.dwi.DwiResRegnMergelocationMskDay --jars common-1.0.jar dwi-1.0.jar
[dwi@master jars]$ hdfs dfs -ls /
Found 9 items
drwxr-xr-x   - lenovo supergroup          0 2022-03-23 10:04 /TestAPI
drwxr-xr-x   - root   supergroup          0 2022-05-31 14:58 /daas
drwxr-xr-x   - root   supergroup          0 2022-06-08 17:27 /data
drwxr-xr-x   - root   supergroup          0 2022-04-16 09:43 /hbase
drwxr-xr-x   - root   supergroup          0 2022-04-18 19:47 /sqoop
drwxrw-r-x   - root   supergroup          0 2022-04-06 19:33 /student
drwxrwxrwx   - root   supergroup          0 2022-04-02 16:34 /tmp
drwxrwxrwx+  - root   supergroup          0 2022-05-15 20:25 /user
drwxr-xr-x   - root   supergroup          0 2022-04-28 17:53 /zzj
[dwi@master jars]$  hdfs dfs -du -h /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/
0       /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/_SUCCESS
43.3 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00000-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.6 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00001-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.2 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00002-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.5 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00003-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.3 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00004-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.4 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00005-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.4 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00006-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
44.0 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00007-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
42.8 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00008-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.4 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00009-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.4 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00010-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.3 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00011-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.3 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00012-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.4 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00013-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.5 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00014-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.3 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00015-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.8 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00016-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.5 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00017-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.3 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00018-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
43.8 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00019-ab692606-1667-439c-8c18-bd1f3f209ff1-c000.csv
spark-sql> use dwi;
Time taken: 1.031 seconds
22/07/04 14:49:33 INFO thriftserver.SparkSQLCLIDriver: Time taken: 1.031 seconds
spark-sql> show tables;
22/07/04 14:49:35 INFO codegen.CodeGenerator: Code generated in 150.069267 ms
dwi	dwi_res_regn_mergelocation_msk_d	false
Time taken: 0.286 seconds, Fetched 1 row(s)
22/07/04 14:49:35 INFO thriftserver.SparkSQLCLIDriver: Time taken: 0.286 seconds, Fetched 1 row(s)
spark-sql> desc dwi_res_regn_mergelocation_msk_d;
22/07/04 14:49:56 INFO spark.ContextCleaner: Cleaned accumulator 0
22/07/04 14:49:56 INFO spark.ContextCleaner: Cleaned accumulator 1
22/07/04 14:49:56 INFO codegen.CodeGenerator: Code generated in 28.723829 ms
mdn	string	手机号码
start_date	string	开始时间
end_date	string	结束时间
county_id	string	区县编码
longi	string	经度
lati	string	纬度
bsid	string	基站标识
grid_id	string	网格号
day_id	string	天分区
# Partition Information		
# col_name	data_type	comment
day_id	string	天分区
Time taken: 0.139 seconds, Fetched 12 row(s)
22/07/04 14:49:56 INFO thriftserver.SparkSQLCLIDriver: Time taken: 0.139 seconds, Fetched 12 row(s)
spark-sql> use ods;
Time taken: 0.019 seconds
22/07/04 15:08:36 INFO thriftserver.SparkSQLCLIDriver: Time taken: 0.019 seconds
spark-sql> show tables;
ods	ods_admincode	false
ods	ods_oidd	false
ods	ods_scenic_boundary	false
ods	ods_usertag_d	false
Time taken: 0.026 seconds, Fetched 4 row(s)
22/07/04 15:08:47 INFO thriftserver.SparkSQLCLIDriver: Time taken: 0.026 seconds, Fetched 4 row(s)
spark-sql> show partitions ods_oidd;
22/07/04 15:09:08 INFO codegen.CodeGenerator: Code generated in 7.03737 ms
day_id=20220531
Time taken: 0.108 seconds, Fetched 1 row(s)
22/07/04 15:09:08 INFO thriftserver.SparkSQLCLIDriver: Time taken: 0.108 seconds, Fetched 1 row(s)
package com.ctyun.dwi

import org.apache.spark.sql.expressions.{UserDefinedFunction, Window}
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import com.shujia.utils.Geography

object DwiResRegnMergelocationMskDay {
  def main(args: Array[String]): Unit = {
    //通过参数传入day_id
    if(args.length==0){
      println("请指定分区日期day_id!")
      return 
    }
    
    val day_id:String=args(0)
    
    //基于ods的数据构建 位置数据融合表
    val spark: SparkSession = SparkSession
      .builder()
      .appName("DwiResRegnMergelocationMskDay")
      .enableHiveSupport() //开启Hive的支持
      .config("spark.sql.shuffle.partitions","20")
      .getOrCreate()
    //导入隐式转换及函数
    import spark.implicits._
    import org.apache.spark.sql.functions._

    //在Spark SQL中使用自定义函数
    /**
     * 传入两个点的经纬度 计算距离
     */
    val calculateLength: UserDefinedFunction = udf((longi1: Double, lati1: Double, longi2: Double, lati2: Double) => {
      Geography.calculateLength(longi1, lati1, longi2, lati2)
    })

    //读取hive中ods层中的oidd的数据
    val oidd: DataFrame = spark.table("ods.ods_oidd")

      //1、将开始时间、结束时间分成两列
    oidd
      //取对应分区的数据
      .where($"day_id"===day_id)
      //withColumn可以给数据增加一列
      .withColumn("start_t",split($"start_time",",")(1))//提取业务的开始时间
      .withColumn("end_t",split($"start_time",",")(0))//提取业务的结束时间
      // 在时间轴上进行聚类 主要解决同一个网格内部相邻两条数据有时间交叠的问题
      //按照用户”分组“ 再按 开始时间 排序 获取上一条数据的网络编号
      .withColumn("last_grid",lag("grid_id",1)over Window.partitionBy("mdn").orderBy("start_t"))
      // 用当前数据的grid_id 进行对比 上一条数据的grid_id 如果相同则置0,不同则置1
      .withColumn("flag", when($"grid_id" === $"last_grid", 0).otherwise(1))
      // 对flag列进行累计求和
      .withColumn("grp", sum($"flag") over Window.partitionBy("mdn").orderBy("start_t"))
      // 按照grp进行分组 取每个组内的 时间的最小值及最大值
      .groupBy("mdn", "county_id", "longi", "lati", "bsid", "grid_id", "grp")
      .agg(min("start_t") as "start_t", max("end_t") as "end_t")
      //2、基于开始时间排序,取每一条数据的前一条数据 作为新的一列 lag
      .withColumn("last_lg",lag($"longi",1)over Window.partitionBy($"mdn").orderBy($"start_t"))//取上一条数据的经度
      .withColumn("last_lat",lag($"lati",1)over Window.partitionBy($"mdn").orderBy($"start_t"))//取上一条数据的纬度
      // 由取上一条数据的结束时间end_t 变为 取上一条数据的 start_t 主要为了解决不同网格之间相邻两条数据有时间交叠的问题
      .withColumn("last_end_time",lag($"start_t",1)over Window.partitionBy($"mdn").orderBy($"start_t"))//取上一条数据的结束时间
      //3、计算相邻两条位置记录之间的时间间隔
      .withColumn("diff_time",unix_timestamp($"start_t","yyyyMMddHHmmss")-unix_timestamp($"last_end_time","yyyyMMddHHmmss"))
      //4、基于经纬度计算距离
      .withColumn("distance",when($"last_lg".isNull,1).otherwise(calculateLength($"longi",$"lati",$"last_lg",$"last_lat")))
      //5、根据距离及时间间隔计算速度
      .withColumn("speed",round($"distance"/$"diff_time",3))
      //将”超速“的数据进行过滤
      .where($"speed"<=340)
      //整理数据 将需要的字段取出来
      .select($"mdn"
      ,$"start_t"as "start_date"
      ,$"end_t"as "end_date"
      ,$"county_id"
      ,$"longi"
      ,$"lati"
      ,$"bsid"
      ,$"grid_id"
      )
      //将结果保存到文件
      .write
      .format("csv")
      .option("sep","\t")
      .mode(SaveMode.Overwrite)
      .save(s"/daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/day_id=$day_id")
  }
  /**
   * 1、使用maven将代码打成jar包并上传
   * 2、如果开启了Spark的historySever服务需要给/user/spark/applicationHistory目录通过acl设置权限
   * hdfs dfs -setfacl -R -m user:dwi:rwx /user/spark/applicationHistory
   * 3、使用spark-submit提交代码:
   * spark-submit --master yarn-client --class com.ctyun.dwi.DwiResRegnMergelocationMskDay --jars common-1.0.jar dwi-1.0.jar 20220531
   * 4、查看结果目录的文件大小
   * hdfs dfs -du -h /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/
   */
}
[dwi@master jars]$ spark-submit --master local[6] --class com.ctyun.dwi.DwiResRegnMergelocationMskDay --jars common-1.0.jar dwi-1.0.jar 20220531
[dwi@master jars]$ hdfs dfs -ls /
Found 9 items
drwxr-xr-x   - lenovo supergroup          0 2022-03-23 10:04 /TestAPI
drwxr-xr-x   - root   supergroup          0 2022-05-31 14:58 /daas
drwxr-xr-x   - root   supergroup          0 2022-06-08 17:27 /data
drwxr-xr-x   - root   supergroup          0 2022-04-16 09:43 /hbase
drwxr-xr-x   - root   supergroup          0 2022-04-18 19:47 /sqoop
drwxrw-r-x   - root   supergroup          0 2022-04-06 19:33 /student
drwxrwxrwx   - root   supergroup          0 2022-04-02 16:34 /tmp
drwxrwxrwx+  - root   supergroup          0 2022-05-15 20:25 /user
drwxr-xr-x   - root   supergroup          0 2022-04-28 17:53 /zzj
[dwi@master jars]$ hdfs dfs -du -h /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/
0        /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/_SUCCESS
483.1 M  /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/day_id=20220531
43.3 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00000-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.6 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00001-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.2 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00002-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.5 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00003-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.3 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00004-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.4 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00005-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.4 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00006-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
44.0 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00007-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
42.8 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00008-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.4 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00009-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.4 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00010-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.3 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00011-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.3 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00012-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.4 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00013-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.5 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00014-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.3 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00015-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.8 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00016-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.5 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00017-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.3 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00018-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv
43.8 M   /daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/part-00019-64160c6a-c22e-4602-b2ea-5a8bac5fe212-c000.csv

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
提供的源码资源涵盖了安卓应用、小程序、Python应用和Java应用等多个领域,每个领域都包含了丰富的实例和项目。这些源码都是基于各自平台的最新技术和标准编写,确保了在对应环境下能够无缝运行。同时,源码中配备了详细的注释和文档,帮助用户快速理解代码结构和实现逻辑。 适用人群: 这些源码资源特别适合大学生群体。无论你是计算机相关专业的学生,还是对其他领域编程感兴趣的学生,这些资源都能为你提供宝贵的学习和实践机会。通过学习和运行这些源码,你可以掌握各平台开发的基础知识,提升编程能力和项目实战经验。 使用场景及目标: 在学习阶段,你可以利用这些源码资源进行课程实践、课外项目或毕业设计。通过分析和运行源码,你将深入了解各平台开发的技术细节和最佳实践,逐步培养起自己的项目开发和问题解决能力。此外,在求职或创业过程中,具备跨平台开发能力的大学生将更具竞争力。 其他说明: 为了确保源码资源的可运行性和易用性,特别注意了以下几点:首先,每份源码都提供了详细的运行环境和依赖说明,确保用户能够轻松搭建起开发环境;其次,源码中的注释和文档都非常完善,方便用户快速上手和理解代码;最后,我会定期更新这些源码资源,以适应各平台技术的最新发展和市场需求。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值