[C:\~]$ ssh dwi@master
Host 'master' resolved to 192.168.49.110.
Connecting to 192.168.49.110:22...
Connection established.
To escape to local shell, press Ctrl+Alt+].
Last login: Tue May 31 15:42:33 2022 from fe80::e938:d89b:966:b6a8%ens33
[dwi@master ~]$ hive
Logging initialized using configuration in jar:file:/usr/local/soft/hive-1.2.1/lib/hive-common-1.2.1.jar!/hive-log4j.properties
hive> use dwi;
OK
Time taken: 0.465 seconds
hive> CREATE EXTERNAL TABLE IF NOT EXISTS dwi.dwi_res_regn_mergelocation_msk_d (
> mdn string comment '手机号码'
> ,start_date string comment '开始时间'
> ,end_date string comment '结束时间'
> ,county_id string comment '区县编码'
> ,longi string comment '经度'
> ,lati string comment '纬度'
> ,bsid string comment '基站标识'
> ,grid_id string comment '网格号'
> )
> comment '位置数据融合表'
> PARTITIONED BY (
> day_id string comment '天分区'
> )
> ROW FORMAT DELIMITED
> FIELDS TERMINATED BY '\t'
> STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
> OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
> location '/daas/motl/dwi/dwi_res_regn_mergelocation_msk_d';
OK
Time taken: 0.842 seconds
hive> use ods;
OK
Time taken: 0.013 seconds
hive> show tables;
OK
ods_admincode
ods_oidd
ods_scenic_boundary
ods_usertag_d
Time taken: 0.013 seconds, Fetched: 4 row(s)
[dwi@master ~]$ hdfs dfs -getfacl /daas/motl/ods
# file: /daas/motl/ods
# owner: ods
# group: ods
user::rwx
user:dwi:r-x
group::r-x
mask::r-x
other::---
hive> desc ods_oidd;
OK
mdn string 手机号码
start_time string 业务时间
county_id string 区县编码
longi string 经度
lati string 纬度
bsid string 基站标识
grid_id string 网格号
biz_type string 业务类型
event_type string 事件类型
data_source string 数据源
day_id string 天分区
# Partition Information
# col_name data_type comment
day_id string 天分区
Time taken: 0.065 seconds, Fetched: 16 row(s)
package com.ctyun.dwi
import org.apache.spark.sql.expressions.{UserDefinedFunction, Window}
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import com.shujia.utils.Geography
object DwiResRegnMergelocationMskDay {
def main(args: Array[String]): Unit = {
//基于ods的数据构建 位置数据融合表
val spark: SparkSession = SparkSession
.builder()
.appName("DwiResRegnMergelocationMskDay")
.enableHiveSupport() //开启Hive的支持
.getOrCreate()
//导入隐式转换及函数
import spark.implicits._
import org.apache.spark.sql.functions._
//在Spark SQL中使用自定义函数
/**
* 传入两个点的经纬度 计算距离
*/
val calculateLength: UserDefinedFunction = udf((longi1: Double, lati1: Double, longi2: Double, lati2: Double) => {
Geography.calculateLength(longi1, lati1, longi2, lati2)
})
//读取hive中ods层中的oidd的数据
val oidd: DataFrame = spark.table("ods.ods_oidd")
//1、将开始时间、结束时间分成两列
oidd
//withColumn可以给数据增加一列
.withColumn("start_t",split($"start_time",",")(1))//提取业务的开始时间
.withColumn("end_t",split($"start_time",",")(0))//提取业务的结束时间
//2、基于开始时间排序,取每一条数据的前一条数据 作为新的一列 lag
.withColumn("last_lg",lag($"longi",1)over Window.partitionBy($"mdn").orderBy($"start_t"))//取上一条数据的经度
.withColumn("last_lat",lag($"lati",1)over Window.partitionBy($"mdn").orderBy($"start_t"))//取上一条数据的纬度
//3、基于经纬度计算距离
.withColumn("distance",calculateLength($"longi",$"lati",$"last_lg",$"last_lat"))
//将结果保存到文件
.write
.format("csv")
.option("sep","\t")
.mode(SaveMode.Overwrite)
.save("/daas/motl/dwi/dwi_res_regn_mergelocation_msk_d/")
}
}
[root@master ~]# hdfs dfs -setfacl -R -m user:dwi:r-x /user
[root@master ~]# hdfs dfs -chmod -R 777 /user
[root@master ~]# cd /usr/local/soft/spark-2.4.5/
[root@master spark-2.4.5]# ls
bin examples LICENSE NOTICE README.md yarn
conf jars licenses python RELEASE
data kubernetes logs R sbin
[root@master spark-2.4.5]# cd conf/
[root@master conf]# ls
docker.properties.template slaves.template
fairscheduler.xml.template spark-defaults.conf
hive-site.xml spark-defaults.conf.template
log4j.properties.template spark-env.sh
metrics.properties.template spark-env.sh.template
slaves
[root@master conf]# vim spark-defaults.conf
#spark.eventLog.enabled true
#spark.eventLog.dir hdfs://master:9000/user/spark/applicationHistory
#spark.yarn.historyServer.address master:18080
#spark.eventLog.compress true
#spark.history.fs.logDirectory hdfs://master:9000/user/spark/applicationHistory
#spark.history.retainedApplications 15
[root@master conf]# hdfs dfs -setfacl -R -m user:dwi:rwx /user/spark/applicationHistory
[root@master conf]# hdfs dfs -getfacl /user/spark/applicationHistory
# file: /user/spark/applicationHistory
# owner: root
# group: supergroup
user::rwx
user:dwi:rwx
group::r-x
mask::rwx
other::rwx
[root@master conf]# hdfs dfs -setfacl -R -m user:dwi:r-x /daas/motl/ods
[dwi@master jars]$ spark-submit --master local --class com.ctyun.dwi.DwiResRegnMergelocationMskDay --jars common-1.0.jar dwi-1.0.jar