json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar 可在github上下载
json数据解析jar 可以上传到hdfs上:
使用是在hive中先add 如:
add jar hdfs://tmp/jsonser/json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar;
或
add jar /bd/json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar;
CREATE TABLE json_test (
> country string,
> languages array<string>,
> religions map<string,array<int>>)
> ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
> STORED AS TEXTFILE;
desc formatted json_test;
LOAD DATA LOCAL INPATH '/bd/a.json' OVERWRITE INTO TABLE json_test;
select languages[0] from json_test;
select religions['catholic'][0] from json_test;
create table if not exists login(
app_key string comment '',
pro_type string,
token string,
source_type string,
session_id string,
message_id string,
user_id string,
id_no string,
name string,
loan_number string,
mobile string,
app_version string,
os_version string,
sdk_version string,
time string,
event string,
body struct<app_list:array<map<string,string>>,
device:map<string,string>,
device_call_logs:array<map<string,string>>,
device_contacts:array<
struct<company:string,
display_name:string,
insert_time:string,
numbers:array<map<string,string>>
>
>,
location:map<string,string>
>
) PARTITIONED BY (dt string) ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '/bd/login.json' OVERWRITE INTO TABLE login partition (dt='2017-11-01’);
add jar hdfs://finai/tmp/jsonser/json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar;
select body.app_list[1]["app_name"] from login;
select dt,body.device['carrier'],body.device['brand'],count(1) from login
group by dt,body.device['carrier'],body.device['brand']
order by dt
add jar hdfs://data/tmp/jsonser/json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar;
set mapred.reduce.tasks=10;
select dt,count(1) from login
group by dt
hive 时间格式化和处理
// 日期相差 分钟 结果精确小数点后3位
select t1.a1,t1.a2,cast((t1.a1-t1.a2)/60 as decimal(10,3)) from
(select unix_timestamp('2017-02-08 11:15:51.343') a1,unix_timestamp('2017-01-31 11:15:50.123') a2) t1;
如果有科学计数法: decimal 不满足
select cast(1.8111111E7 as decimal(10,3));
1、转换函数:
select cast(substring('2016-06-05 00:00:00.0',1,10) as int);
结果---》20160605
2、替换函数:
select cast(substring(regexp_replace('2016-06-05 00:00:00.0', '-', ''),1,8) as int);
结果—》20160605
select dt,body.device['brand'],count(distinct body.device['mac']) from ods_wd_apply_submit
group by dt,body.device['brand']
select dt,body.location['province'],body.device['brand'],count(distinct body.device['mac'])
from ods_wd_apply_submit
group by dt,body.location['province'],body.device['brand']
select from_unixtime(cast(substring(unix_timestamp(event_time),1,10) as bigint),'yyyy-MM-dd HH:mm:ss')
FROM ods_wd_apply_submit LIMIT 100
select month(from_unixtime(cast(substring(unix_timestamp(event_time),1,10) as bigint),'yyyy-MM-dd HH:mm:ss'))
FROM ods_wd_apply_submit LIMIT 100
select to_date(split(event_time,'\\.')[0]),split(event_time,'\\.')[0],
split(event_time,' ')[0],split(event_time,' ')[1]
FROM ods_wd_apply_submit LIMIT 100