Hive sql基本实操

#启动hdfs

#启动mysql

 

#启动hive

#操作基本和sql一样(这是在服务端操作)

 

#在server01上启动客户端

#在server02上用客户端连接server01上的客户端

 #创建,t_a,t_b表

#在server01上/root/dataDir创建文件a.txt、b.txt;内容如下

#

#导入数据

#表里已经有数据了

#join操作

#sql语句其实是转换为map,reduce

#左半连接,只显示左表数据

#分区表,以dt(日期分区)

有如下数据
access.log.0804

192.168.33.3,http://www.edu360.cn/stu,2017-08-04 15:30:20
192.168.33.3,http://www.edu360.cn/teach,2017-08-04 15:35:20
192.168.33.4,http://www.edu360.cn/stu,2017-08-04 15:30:20
192.168.33.4,http://www.edu360.cn/job,2017-08-04 16:30:20
192.168.33.5,http://www.edu360.cn/job,2017-08-04 15:40:20

access.log.0805

192.168.33.3,http://www.edu360.cn/stu,2017-08-05 15:30:20
192.168.44.3,http://www.edu360.cn/teach,2017-08-05 15:35:20
192.168.33.44,http://www.edu360.cn/stu,2017-08-05 15:30:20
192.168.33.46,http://www.edu360.cn/job,2017-08-05 16:30:20
192.168.33.55,http://www.edu360.cn/job,2017-08-05 15:40:20

access.log.0806

192.168.133.3,http://www.edu360.cn/register,2017-08-06 15:30:20
192.168.111.3,http://www.edu360.cn/register,2017-08-06 15:35:20
192.168.34.44,http://www.edu360.cn/pay,2017-08-06 15:30:20
192.168.33.46,http://www.edu360.cn/excersize,2017-08-06 16:30:20
192.168.33.55,http://www.edu360.cn/job,2017-08-06 15:40:20
192.168.33.46,http://www.edu360.cn/excersize,2017-08-06 16:30:20
192.168.33.25,http://www.edu360.cn/job,2017-08-06 15:40:20
192.168.33.36,http://www.edu360.cn/excersize,2017-08-06 16:30:20
192.168.33.55,http://www.edu360.cn/job,2017-08-06 15:40:20

二,

建表映射上述数据
create table t_access(ip string,url string,access_time string)
partitioned by (dt string)
row format delimited fields terminated by ',';

三,导入数据
load data local inpath '/root/hivetest/access.log.0804' into table t_access partition(dt='2017-08-04');
load data local inpath '/root/hivetest/access.log.0805' into table t_access partition(dt='2017-08-05');
load data local inpath '/root/hivetest/access.log.0806' into table t_access partition(dt='2017-08-06');

四,sql练习

-- 查看表的分区
show partitions t_access;

--删除分区
alter table t_access drop partition (dt='2017-08-04');

--删除表数据

truncate table t_access;

-- 求8月4号以后,每天http://www.edu360.cn/job的总访问次数,及访问者中ip地址中最大的
select dt,'http://www.edu360.cn/job',count(1),max(ip)
from t_access
where url='http://www.edu360.cn/job'
group by dt having dt>'2017-08-04';


select dt,max(url),count(1),max(ip)
from t_access
where url='http://www.edu360.cn/job'
group by dt having dt>'2017-08-04';


select dt,url,count(1),max(ip)
from t_access
where url='http://www.edu360.cn/job'
group by dt,url having dt>'2017-08-04';

select dt,url,count(1),max(ip)
from t_access
where url='http://www.edu360.cn/job' and dt>'2017-08-04'
group by dt,url;


-- 求8月4号以后,每天每个页面的总访问次数,及访问者中ip地址中最大的

select dt,url,count(1),max(ip)
from t_access
where dt>'2017-08-04'
group by dt,url;

-- 求8月4号以后,每天每个页面的总访问次数,及访问者中ip地址中最大的,且,只查询出总访问次数>2 的记录
-- 方式1:
select dt,url,count(1) as cnts,max(ip)
from t_access
where dt>'2017-08-04'
group by dt,url having cnts>2;


-- 方式2:用子查询
select dt,url,cnts,max_ip
from
(select dt,url,count(1) as cnts,max(ip) as max_ip
from t_access
where dt>'2017-08-04'
group by dt,url) tmp
where cnts>2;

#HIVE 中的复合数据类型

数组


有如下数据:
zhanlang2,wujing:wugang:longmu,2017-08-16
sanshengsanshi,liuyifei:yangyang,2017-08-20

 

建表映射:
create table t_movie(movie_name string,actors array<string>,first_show date)
row format delimited fields terminated by ','
collection items terminated by ':';

导入数据
load data local inpath '/root/dataDir/movie-actor.txt' into table t_movie;(汉字乱码,暂时没解决)

查看导入结果

select movie_name,actors[0],first_show from t_movie;

select movie_name,actors,first_show
from t_movie where array_contains(actors,'yangyang');

select movie_name
,size(actors) as actor_number
,first_show
from t_movie;

MAP类型

有如下数据:fm.dat
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26

 

建表映射上述数据

create table t_family(id int,name string,family_members map<string,string>,age int)
row format delimited fields terminated by ','
collection items terminated by '#'
map keys terminated by ':';

导入数据
load data local inpath '/root/hivetest/fm.dat' into table t_family;

 导入结果:

查出每个人的 爸爸、姐妹
select id,name,family_members["father"] as father,family_members["sister"] as sister,age
from t_family;

以下自己练习:

-- 查出每个人有哪些亲属关系
select id,name,map_keys(family_members) as relations,age
from  t_family;

-- 查出每个人的亲人名字
select id,name,map_values(family_members) as relations,age
from  t_family;

-- 查出每个人的亲人数量
select id,name,size(family_members) as relations,age
from  t_family;

-- 查出所有拥有兄弟的人及他的兄弟是谁
-- 方案1:一句话写完
select id,name,age,family_members['brother']
from t_family  where array_contains(map_keys(family_members),'brother');


-- 方案2:子查询
select id,name,age,family_members['brother']
from
(select id,name,age,map_keys(family_members) as relations,family_members 
from t_family) tmp 
where array_contains(relations,'brother');
 

struct类型

假如有以下数据:
1,zhangsan,18:male:深圳
2,lisi,28:female:北京
3,wangwu,38:male:广州
4,赵六,26:female:上海
5,钱琪,35:male:杭州
6,王八,48:female:南京
 

-- 建表映射上述数据

drop table if exists t_user;
create table t_user(id int,name string,info struct<age:int,sex:string,addr:string>)
row format delimited fields terminated by ','
collection items terminated by ':';

-- 导入数据
load data local inpath '/root/hivetest/user.dat' into table t_user;

-- 查询每个人的id name和地址
select id,name,info.addr
from t_user;

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值