一、需求介绍
- 目前需要对客户提供的数据坐标串进行数据的入库和计算中心点。
- 文件是以txt格式提供,数据内容各字段是通过分号来分割,坐标串每个经纬度是逗号隔开,点与点之间是空格来分割。
二、数据入库操作
- 数据内容如下所示:
123;北京;xxx19号;116.393069920046 39.8794358298622,116.392284281065 39.8794240227275,116.392278945594 39.8799218737309,116.393069989912 39.8799368255341,116.393069920046 39.8794358298622
123;北京;xxx20号;116.389060369857 39.8805385985325,116.389149003366 39.8793549988737,116.389226226988 39.8793093165453,116.390195405884 39.8793281494735,116.390262646471 39.8793685904133,116.390294516148 39.8794955959993,116.390218462185 39.8805449250846,116.390083322732 39.8805946022267,116.389060369857 39.8805385985325
- hive建表语句
create external table if not exists tmp_lbs.cell_data_info_0117
(
province_code string comment '省份编码',
province_name string comment '省份简称',
cell_name string comment '小区名称',
lon_lat_strs string comment '经纬度串'
)
comment '小区数据清单'
row format delimited fields terminated by '\073'
stored as textfile
location 'hdfs://xxx/domain/ns3/wznlxt_yx/tmp_lbs.db/lbs/tmp/cell_data_info_0117';
特别提示:建表时候针对于数据字段中分隔符为";",需要转义为相应的二进制073来代替。
- 加载数据到hive
hive > load data local inpath '/data06/lbs/cell_data_info.txt' into table tmp_lbs.cell_data_info_0117;
三、数据转换操作
- 打包地理空间UDF代码
- 加载到hive库
add jar /data06/lbs/udf/spatial-sdk-hive-2.2.1-SNAPSHOT-jar-with-dependencies.jar;
add jar /data06/lbs/udf/hive-udf-1.0-SNAPSHOT-jar-with-dependencies.jar;
- 创建临时函数
###来源于jar包1
CREATE FUNCTION ST_AsText01 AS 'com.esri.hadoop.hive.ST_AsText';
CREATE FUNCTION ST_Buffer01 AS 'com.esri.hadoop.hive.ST_Buffer';
CREATE FUNCTION ST_Centroid01 AS 'com.esri.hadoop.hive.ST_Centroid';
CREATE FUNCTION ST_GeomFromText01 AS 'com.esri.hadoop.hive.ST_GeomFromText';
CREATE FUNCTION ST_Point01 AS 'com.esri.hadoop.hive.ST_Point';
###来源于jar包2
CREATE FUNCTION Gcj02ToWgs84 AS 'com.oidd.hive.Gcj02ToWgs84';
- 第一次转换(此时坐标系为GCJ02火星坐标系)
select province_code,
province_name,
cell_name,
split(regexp_replace(regexp_replace(ST_AsText01(ST_Centroid01(
ST_GeomFromText01(concat('polygon ((', lon_lat_strs, '))')))), "POINT \\(", ""), "\\)", ""),
" ")[0] as lon,
split(regexp_replace(regexp_replace(ST_AsText01(ST_Centroid01(
ST_GeomFromText01(concat('polygon ((', lon_lat_strs, '))')))), "POINT \\(", ""), "\\)", ""),
" ")[1] as lat
from tmp_lbs.cell_data_info_0117;
- 第二次转换(GCJ02->WGS84)
insert overwrite table tmp_lbs.cell_data_info_wgs84
select province_code,
province_name,
cell_name,
split(Gcj02ToWgs84(lon, lat), ',')[0] as lon,
split(Gcj02ToWgs84(lon, lat), ',')[1] as lat
from tmp_lbs.cell_data_info_gcj02;