hive json数据处理

json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar 可在github上下载
 
json数据解析jar 可以上传到hdfs上:
 
使用是在hive中先add 如:
 
 add jar hdfs://tmp/jsonser/json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar;
 
 

add jar /bd/json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar;

 
 CREATE TABLE json_test (
    >   country string,
    >   languages array<string>,
    >   religions map<string,array<int>>)
    >   ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
    >   STORED AS TEXTFILE;
 
 
desc formatted json_test;
 
 
 
 LOAD DATA LOCAL INPATH '/bd/a.json' OVERWRITE INTO TABLE json_test;
 
 
 
select  languages[0] from json_test;
 
select religions['catholic'][0] from json_test;
 
 
 
create table if not exists login(
   app_key string comment '',
   pro_type string,
   token string,
   source_type string,
   session_id string,
   message_id string,
   user_id string,
   id_no string,
   name string,
  loan_number string,
  mobile string,
  app_version string,
  os_version string,
  sdk_version string,
  time string,
  event string,
  body struct<app_list:array<map<string,string>>,
              device:map<string,string>,
              device_call_logs:array<map<string,string>>,
              device_contacts:array<
                                    struct<company:string,
                                           display_name:string,
                                            insert_time:string,
                                            numbers:array<map<string,string>>
                                        >
                                    >,
              location:map<string,string>
            >
) PARTITIONED BY (dt string)  ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
 STORED AS TEXTFILE;
 
 LOAD DATA LOCAL INPATH '/bd/login.json' OVERWRITE INTO TABLE login partition (dt='2017-11-01’);
 
 add jar hdfs://finai/tmp/jsonser/json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar;
 
select body.app_list[1]["app_name"] from login;
 
 
 
select dt,body.device['carrier'],body.device['brand'],count(1) from login
group by dt,body.device['carrier'],body.device['brand']
order by dt
 
 
 
add jar hdfs://data/tmp/jsonser/json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar;
set mapred.reduce.tasks=10;
select dt,count(1) from login
group by dt
 
 
hive 时间格式化和处理
 
// 日期相差 分钟  结果精确小数点后3位
select t1.a1,t1.a2,cast((t1.a1-t1.a2)/60 as decimal(10,3)) from
(select unix_timestamp('2017-02-08 11:15:51.343') a1,unix_timestamp('2017-01-31 11:15:50.123') a2) t1;
 
如果有科学计数法: decimal 不满足
 
select cast(1.8111111E7 as decimal(10,3));
 
 

1、转换函数:

select cast(substring('2016-06-05 00:00:00.0',1,10) as int);

结果---》20160605

 

2、替换函数:

select  cast(substring(regexp_replace('2016-06-05 00:00:00.0', '-', ''),1,8) as int);

结果—》20160605

 

 

select dt,body.device['brand'],count(distinct body.device['mac']) from ods_wd_apply_submit

group by  dt,body.device['brand']

 

select dt,body.location['province'],body.device['brand'],count(distinct body.device['mac']) 

from ods_wd_apply_submit

group by dt,body.location['province'],body.device['brand']

 

 

select from_unixtime(cast(substring(unix_timestamp(event_time),1,10) as bigint),'yyyy-MM-dd HH:mm:ss')

FROM ods_wd_apply_submit LIMIT 100

 

select month(from_unixtime(cast(substring(unix_timestamp(event_time),1,10) as bigint),'yyyy-MM-dd HH:mm:ss'))

FROM ods_wd_apply_submit LIMIT 100

 

select to_date(split(event_time,'\\.')[0]),split(event_time,'\\.')[0],

split(event_time,' ')[0],split(event_time,' ')[1]

FROM ods_wd_apply_submit LIMIT 100

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值