Hive错误记录与语句记录

一、错误

1.运行到一半任务断掉:空指针

 

2.资源分配与map数(任务较大时建议考虑其他方法)

 

3.row_number_old时行出现空值(udf函数)

 

4.暂不支持update和delete操作

5.注意hive锁表

SET mapreduce.job.queuename=highlevel;
SET hive.support.concurrency=false;--此功能(这两句不用语句解释,否则无法运行

二、主要语句

--建表
create table hue_10086_url_output(url string, Login_UserNumber string ,mailKey string ,Max_Age string ,provCode string ,areaCode string ,time_consume string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
location '/user/hue_10086_url_output'

                    --其中若有中文需用notepad++转化为utf8无bom编码格式

 --更新数据(--hbase是key/value类型数据)insert overwrite 覆盖功能,insert into简单插入功能

insert into table hbase_package_programname
select apk_name as package_name,program_name from hue_zouyanmei_packageprogram_buchong0915

--建list表(collect_set(named_struct))

 create table hue_apt_attack0206
location '/user/hue_apt_attack0206'
as
select time , source , downloadurl ,APT , title ,
collect_set(named_struct("hash", hash, "hashtag", hashtag)) hash_info, 
collect_set(named_struct("ip", ip, "iptag", iptag)) ip_info,
collect_set(named_struct("domain", domain, "domaintag", domaintag)) domain_info,
collect_set(named_struct("url", url, "urltag", urltag)) url_info,
collect_set(named_struct("reference", reference)) reference_info
from hue_apt_attack
group by time , source , downloadurl ,APT , title

--udf函数使用

create table hue_10086_url_output_city as 
select * ,callerlocation(substr(login_usernumber,3,11),1) city ,callerlocation(substr(login_usernumber,3,11),2) province,
callerlocation(substr(login_usernumber,3,11),3) card_type from hue_10086_url_output where login_usernumber!='err'

--is not NULL

select max(url),imei_p,login_usernumber from hue_url1207 where login_usernumber !='err' and login_usernumber is not NULL
group by imei_p,login_usernumber

--string转化为数组array
creat table hue_imei1208
location '/user/hue_imei1208'
select imei_p, collect_set(login_usernumber) from hue_url1207 where imei_p != 'ERR_N' and login_usernumber !='err' and login_usernumber is not NULL group by imei_p

--array转化成多个string
select * from (select *,adid from ods_url_dns lateral view explode(us) ad as adid where dt='20171211') a where adid like '1%' limit 10

--在一个表中不在另一表中(类似于不等值连接)

select a.url,a.dt from 
(select a.url,a.dt,b.url url1 from 
(select a.* from mds_engine_url_dt a where a.dt>='20171201' and a.dt<='20171231')a
left outer join 
(select url from ods_url_dwz group by url) b
on a.url=b.url)a
where a.url1 is null

--udf函数(目的挑选出数目最多的一个)

select a.domain,a.url,a.a from
(
select row_number_old(b.domain) seq_no,b.domain,b.url,b.a from
(
select * from
(
select case when instr(url,'www.')>0 and instr(url,'http://')<=0 and instr(url,'https://')<=0 then split(split(url,'\\/')[0],'www.')[1]
when (instr(url,'http://')>0 or instr(url,'https://')>0) and instr(url,'www.')<=0 then split(url,'/')[2] 
when (instr(url,'http://')>0 or instr(url,'https://')>0) and instr(url,'www.')>0 then split(split(url,'\\/')[2],'www.')[1]
when instr(url,'http://')<=0 and instr(url,'https://')<=0 and instr(url,'www.')<=0 then split(url,'/')[0] end as
domain,url,count(*) as a 
from hue_domain_url0105 where dt>='20171201' and dt<='20171231'
group by case when instr(url,'www.')>0 and instr(url,'http://')<=0 and instr(url,'https://')<=0 then split(split(url,'\\/')[0],'www.')[1]
when (instr(url,'http://')>0 or instr(url,'https://')>0) and instr(url,'www.')<=0 then split(url,'/')[2] 
when (instr(url,'http://')>0 or instr(url,'https://')>0) and instr(url,'www.')>0 then split(split(url,'\\/')[2],'www.')[1]
when instr(url,'http://')<=0 and instr(url,'https://')<=0 and instr(url,'www.')<=0 then split(url,'/')[0] end
,url
)a distribute by a.domain sort by a.domain,a.a desc)b
)a where seq_no=1 limit 500

-修改某列数据,并增加一列dt(一般先备份一个原表)

 create table  tds_package_systembk11 as 
select package_name,mau,(case when package_name='com.aliyun.pwmob.h_izz_cc' 

then '微语录'  
when   package_name='com.gionee.livewallpaper' then 'Ami天气动态壁纸-随变'  
when   package_name='com.gionee.livewallpaper.v2' then '天气动态壁纸'  
when   package_name='com.bbk.stickdefence' then 'Stickman Warriors: Cartoon Wars'  
when   package_name='com.letv.kttvControl' then '万能遥控器'  
else program_name end) program_name,tag,
'' as dt
from tds_package_system 

--改表名

alter table tds_package_systembk11 rename to tds_package_system

--转换成日期格式

from_unixtime(cast(receive_time as int ),'yyyyMMdd')

 

SQL语法可参考网址:http://www.w3school.com.cn/

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值