HiveQL:查询(五)

select ...(field)from....(table)

 

select  * from sogou_20111230 limit 3;

select * from sogou.sogou_20111230 limit 3;

 

可以在别的数据库下,select * from sogou.sogou_20111230 limit 3;

 

select uid from sogou.sogou_20111230 limit 3;

 

select ts,uid from sogou.sogou_20111230 limit 3;

 

select rank+1 from sogou_20111230 limit 3;

 

select uid+1 from sogou_20111230 limit 3;  //null

 

select pow(rank,2) from sogou_20111230 limit 3;//rank的2次方

 

select pmod(rank+1,2) from sogou_20111230 limit 3;//取模

 

select count(*) from sogou_20111230;//求总行数

 

select count(uid) from sogou_20111230;//求uid行数

 

select sum(order) from sogou_20111230; //计算order的和

 

select max(order),min(order) from sogou_20111230;//计算order的最大小值

 

select count(distinct uid) from sogou_20111230;//独立uid  uuid,distinct去重后的uid

 

select cast(rank as double) from sogou_20111230 limit 3 ;//将rank转换为double型的

select concat(uid,url) from sougou_20111230 limit 3;//将uid与url拼在一起

select get_json_object('{"name":"xiaoming","age":"15"}','$.age') from sogou_20111230 limit 3;//取age元素

select get_json_object('{channel,'$.age') from sogou_20111230 limit 3;//在channel数据中取age元素

select format_number(rank,2) from sogou_20111230 limit 3;//1 ,3 ,1 -> 1.00,3.00,1.00

select length(uid) from sogou_20111230 limit 3;//计算uid的长度

select locate('a',uid,2) from sogou_20111230 limit 3;//查找在字符串uid中的2位置后字符串a第一次出现的位置

select lower(uid) from sogou_20111230 limit 3;//将uid字符串中的字母全部转换成小写

select ltrim(uid) from sogou_20111230 limit 3;//将uid字符串前面出现的空格全部去除掉

select regexp_extract(uid,'5',0) from sogou_20111230 limit 3 ;//抽取字符串uid中‘5’的第0个部分的子字符串

select regexp_replace(uid,'5','6') from sogou_20111230 limit 3;//将uid字符串中‘5’替换成‘6’

select reverse(uid) from sogou_20111230 limit 3;//将字符串uid反转

select split(uid,'2') from sogou_20111230 limit 3;//按照正则表达式‘2’分割字符串uid,并将分割后的部分以字符串数组的方式返回

select substr(uid,3,3) from sogou_20111230 limit 3;//对于字符串uid,从3位置开始截取3长度的字符串,作为子字符串

select upper(uid) from sogou_20111230 limit 3;//将字符串uid中所有的字母转换成大写字母

select datediff(string enddate,string startdate) from sogou_20111230 limit 3;//计算开始时间startdate到结束时间enddate相差的天数

select count(distinct e.uid) as cnt from (select * from sogou_20111230 where rank <= 3 and order=1) e;//别名

select count(distinct e.uid) from (select * from sogou_20111230 where rank <= 3 and order=1) e;//嵌套select,(...)是一个临时表e

select * from sogou_20111230  where rank <= 3 and order=1 limit 3;//where 语句

select * from sogou_20111230  where url like '%http%' limit 3;//like    ,'http%'是以http开头的,'%http%'包含http的

select * from sogou_20111230 where uid like '5%' limit 3; //uid以5开头的

select * from sogou_20111230 where url rlike '.*http.*' limit 3;//正则表达式

-------------------------------group by--------------------------

select rank,count(*) from sogou_20111230 group by rank;//rank=1(1 2071720)出现的次数

1 2071720

2 905769

3 554258

4 375813

5 283848

6 218351

7 179380

8 151384

9 128344

10 131002

select rank,count(*) from sogou_20111230 group by rank,order;

1 1942685

1 100623

1 19395

1 5606

1 2088

1 779

1 321

1 158

1 42

1 23

2 641207

2 232966

2 22748

2 5666

2 1950

select rank,order,count(*) from sogou_20111230 group by rank,order;

1 1 1942685

1 2 100623

1 3 19395

1 4 5606

1 5 2088

1 6 779

1 7 321

1 8 158

1 9 42

1 10 23

2 1 641207

select rank,order,count(*) from sogou_20111230 group by rank,order having

    rank > 3;            //having

select rank,order,count(*) from sogou_20111230 group by rank,order having rank > 3 and order > 3;

------------------------------join------------------------------- ---

hive> create external table sogou.sogou_20111230_limit3(ts string,uid string,keyword string,rank int,order int,url string)

    > row format delimited

    > fields terminated by '\t'

    > location '/sogou/20111230-limit3';

insert overwrite table sogou_20111230_limit3 select * from sogou_20111230 limit 3;

select * from sogou_20111230 m join sogou_20111230_limit3 n on m.uid=n.uid;//n表中的3个uid在m表中搜索过的所有记录

select m.uid,m.keyword from sogou_20111230 m join sogou_20111230_limit3 n on m.uid=n.uid;//n表中的3个uid在m表中搜索过的所有记录,把uid与keyword两个字段取出来

搜索过仙剑奇侠传的用户还搜索过什么内容

(1)搜索过仙剑奇侠传的用户

select distinct uid from sogou_20111230 where keyword like '%仙剑奇侠传%'

(2)还搜索过什么内容

select m.uid,m.keyword from sogou_20111230 m join(select distinct uid from sogou_20111230 where keyword like '%仙剑奇侠传%') n on m.uid=n.uid where m.keyword rlike '~(.*仙剑奇侠传.*)';//未能实现

select m.uid,m.keyword from sogou_20111230 m join(select distinct uid from sogou_20111230 where keyword like '%仙剑奇侠传%') n on m.uid=n.uid where m.keyword not like '%仙剑奇侠传%';

 

-------------------------------外连接------------------------------

waibubiao1:                waibubiao2:

1 xiaoming                    1 dongdong

2 xiaodong                    2 dongxixi

3 xiaohong                    3 dongnann

4 xiaoxiao                      4 dongbeii

5 xiaojing                       5 dongmmmn

8 xiaolili                         6 dongnnnn

 

左外连接:

select m.id,m.name,n.id,n.name from waibubiao1 m left outer join waibubiao2 n on m.id=n.id;

1 xiaoming 1 dongdong

2 xiaodong 2 dongxixi

3 xiaohong 3 dongnann

4 xiaoxiao 4 dongbeii

5 xiaojing 5 dongmmmn

8 xiaolili NULL NULL

右外连接:

select m.id,m.name,n.id,n.name from waibubiao1 m right outer join waibubiao2 n on m.id=n.id;

1 xiaoming 1 dongdong

2 xiaodong 2 dongxixi

3 xiaohong 3 dongnann

4 xiaoxiao 4 dongbeii

5 xiaojing 5 dongmmmn

NULL NULL 6 dongnnnn

全外连接

select m.id,m.name,n.id,n.name from waibubiao1 m full outer join waibubiao2 n on m.id=n.id;

1 xiaoming 1 dongdong

2 xiaodong 2 dongxixi

3 xiaohong 3 dongnann

4 xiaoxiao 4 dongbeii

5 xiaojing 5 dongmmmn

NULL NULL 6 dongnnnn

8 xiaolili NULL NULL

left semi join:

select * from waibubiao1 m left semi join waibubiao2 n on m.id=n.id;//select n.name from waibubiao1......,n.name是不行的,select和where中不能引用右边表中的字段

1 xiaoming

2 xiaodong

3 xiaohong

4 xiaoxiao

5 xiaojing

mapjoin: 优化  减少了reduce过程,有时还减少了map过程的执行步骤 ,提升执行速度,25M以下自动启动mapjoin

select  m.uid,m.keyword,n.keyword

from sogou_20111230 m join sogou_20111230_limit3 n on m.uid=n.uid; 

 

 

 

select * from sogou_20111230 where rank in (1,2,5);//sql语法,hive里不支持

select * from sogou_20111230 m left semi join sogou_20111230_limit3 n on m.rank=n.rank;//只能带到左边:m.uid不能得到n.uid

select /*+mapjoin(n)*/ m.uid,m.keyword,n.keyword from sogou_20111230 m join sogou_20111230_limit3 n on m.uid=n.uid;

select  m.uid,m.keyword,n.keyword from sogou_20111230 m join sogou_20111230_limit3 n on m.uid=n.uid;

select * from sogou_20111230_limit3 order by rank desc;

select uid ,count(*) m from sogou_20111230 group by uid order by m desc;// uid  seach times 降序排序

{select uid,count(*) from sogou_20111230 group by uid order by count(*) desc;

FAILED: SemanticException [Error 10128]: Line 1:62 Not yet supported place for UDAF 'count'}

select cast(ts as bigint) from sogou_20111230_limit3;//强转类型

select cast(ts as bigint) m from sogou_20111230 group by ts order by m;

select ts m from sogou_20111230 group by ts order by m;

select count(distinct e.uid)

from

(select * from sogou_20111230 where rank <11

union all

select * from sogou_20111230_limit3 where rank <11

) e ;

 

 

 

 

 

 

 

 

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值