hive常用语句

1、创建一张外部分区表(分区需要另外添加)

-- 创建方式1(hive不能用多个字符作为列分隔符)
drop table if exists stg.stg_dev_location_info_d;
CREATE TABLE `stg.stg_dev_location_info_d`
(`device_id` string COMMENT '用户唯一识别ID',`device_brand` string COMMENT '设备品牌',`device_model` string COMMENT '设备型号',`system_version` string COMMENT '系统版本',`system_name` string COMMENT '操作系统名称')
COMMENT 'stg-设备基站信息' 
PARTITIONED BY(day string,source string) 
row format delimited 
fields terminated by '\t' collection items terminated by ',' map keys terminated by ':' lines terminated by '\n'
stored as orc;
-- 创建方式2
CREATE EXTERNAL TABLE `ext_base_click`(
  `reqid` string,
  `adslotid` string,
  `ip` string,
  `timestamp` string,
  `adgroupid` string,
  `lbs` string)
PARTITIONED BY (
  `date` string,
  `hour` int)
ROW FORMAT SERDE
  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
  'field.delim'='|',
  'line.delim'='\n',
  'serialization.format'='|')
STORED AS INPUTFORMAT
  'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'

2、创建一张外表(需写好分区位置)

CREATE EXTERNAL TABLE `b_base_adx`(
  `id` string,
  `enname` string,
  `zhname` string,
  `description` string,
  `supportdeal` string)
ROW FORMAT SERDE
  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
  'field.delim'='|',
  'line.delim'='\n',
  'serialization.format'='|')
STORED AS INPUTFORMAT
  'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  'hdfs://tercel/user/mapred2/hive_basetable/b_base_adx'

3、表字段的修改(注意锁表问题、ORC文件表字段的修改、矢量查询)

--hive 新增一列
ALTER TABLE dsp.int_optimize_bidrequest ADD COLUMNS (app_id String);
--hive 修改列名
ALTER TABLE ext_preprocess_click CHANGE col_old_name col_new_name String AFTER column_name;
-- 修改列类型
Alter table stg.xh_temp_netinfo_d change column st st BIGINT
--hive 修改列注释
alter table stg.stg_dev_xxx_info_d CHANGE COLUMN is_root is_root string comment '是否获取root权限: 1表示获取, 0表示没获取';
--hive 删除替换全部列(禁止删除列会有问题)
ALTER TABLE ext_base_conversion REPLACE COLUMNS (reqid string, adslotid string, adx string, deviceid string);  
--解锁hive表
show locks your_table
unlock table your_table

4、hive常用语句

--新增外部表分区(数据不会移动)
alter table media_stats add partition (date ="2018-02-02",hour=5) location 'hdfs://tercel/user/DAILY/$day/media-stats';
--外部分区表load数据(数据会移动到表下面)
load data local inpath '/tmp/pck_num.txt' OVERWRITE into table stg.stg_app_net_info_d partition (day='20200916');
--判断字符串是否都为数字
select * from base_media_baidu where '123456' rlike '^\\d+$';
--删除表分区
ALTER TABLE dsp.ext_preprocess_bidrequest DROP IF EXISTS PARTITION (date ="1018-01-05",hour=5);
--添加hive_udf
add jar hdfs://tercel//user/mapred2/myudf/hive/hiveudf.jar;
create temporary function md5 as  'com.Md5';

5、分割字符串’,’ (行转列 | 列转行)

-- 行转列
select adslotallowedsize_  
from dsp_media.b_report_flow_daily  lateral  view explode(split(adslotallowedsize,'_'))  b AS adslotallowedsize_
where date='2018-03-07'
-- 列转行
select
    device_id,
    concat_ws(',',collect_set(package_name)) AS package_name
from tmp.stg_dev_user_bhv_d_old
where day=20170301
group by device_id

6、select除了某些字段之外的剩余所有字段

set hive.support.quoted.identifiers=None;
select `(name|id|pwd)?+.+` from tableName;

7、不同的adx中的曝光top3的机型

select * 
from
(
  select adx,brandmodelid,c,row_number() over (partition by adx order by adx,c desc) ord 
  from
  (
    select adx,brandmodelid,count(1) c from dsp.ext_base_impression WHERE date='2017-07-03' AND hour=4 
    group by adx,brandmodelid
  ) t1
) t2
where ord<=3;

8、设备信息的组内占比

select a.source AS source,
    a.sdk_version AS sdk_version,
    a.device_info_count AS device_info_count,
    sum(a.device_info_count) over(partition by a.source) as group_device_info_count  
from
(select source, split(sdk_version,'\\|')[0] AS sdk_version, count(1) AS device_info_count
from stg.stg_dev_device_info_d
  where day=20180426
group by source, split(sdk_version,'\\|')[0]
order by cast(source AS int),sdk_version) AS a

9、动态分区插入数据

INSERT OVERWRITE TABLE tmp.tmp_stg_dev_user_bhv_d
PARTITION (day = '20181115', source = '200')
SELECT  device_id,package_name,app_key,open_time,close_time,sample_package_name,device_brand,device_model,system_version,system_name,sdk_version,ip,sample_app_version,app_channel,tmp_id,count(1) AS cou
FROM stg.stg_dev_user_bhv_d 
where day=20181115 and source=200 
group by device_id,package_name,app_key,open_time,close_time,sample_package_name,device_brand,device_model,system_version,system_name,sdk_version,ip,sample_app_version,app_channel,tmp_id
order by cou desc

10、hive常用函数

-- json 查询
select get_json_object("{\"imei\":\"368173410356449\",\"mac\":\"008B2F377231\"}",'$.mac')
-- map查询
select map('k1','v1')['k1']

11、创建视图

-- 创建分区
create view test_view as select * from test

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值