**基础篇**
1. 使用服务的形式启动hive(启动之后可以选择一下两种连接方式)
a. nohup bin/hiveserver2 1>/dev/null 2>&1 &
2. 连接方式
a. beenline(bin/beeline)
i. !connect jdbc:hive2://hadoop02:10000
ii. root
iii. 没密码不用输入密码
b. beenline -u jdbc:hive2://hadoop02:10000 -n root
3. 基本操作语句
a. 建表并且制定分隔符
b. create table t_user(id string,name string)
row format delimited
fields terminated by ',';
查看表结构 desc tanlename
4. 创建外部表
a. create external table t_access(ip string,url string,access_time string)
row format delimited
fields terminated by ','
location "/data/acc"
注:外部表跟内部表的区别,内部表放在warehouse下面,删除表的时候会把数据删除掉,外部表示需要自己制定目录,删除表的时候,不会删除数据
5. 分区表:更精细的控制数据,在跑mr程序的时候,不需要把所有数据都加载,可以指定分区数据进行计算
a. create table t_access(id string,url string,access_time string)
partitioned by(dt string)
row format delimited
fields terminated by ',';
加载数据: load data local inpath "/root/acc.txt" into table t_access partition(dt="2018-8-28");
b. 注意:分区字段里面不能有表字段
c. 不同的分区数据存放在不同的目录下面
6. load数据
a. 通过本地(linux上main加载数据)
i. load data local inpath "/root/userdata.txt" into table t_user;
b. 从hdfs上加载数据
i. load data inpath "/user.data2.txt" into table t_user;
7. 导出
a. 导出到hdfs上
i. insert overwrite directory "/access-data"
row format delimited
fields terminated by ','
select * from t_user;
b. 导出到本地
i. insert overwrite local directory "/root/access-data"
row format delimited fields terminated by ":"
select * from t_user;
8. 存储格式
a. stored as textfile; //默认的存储格式
b. stored as sequencefile;
c. stored as parquetfile;
create table t_user(id string,name string)
row format delimited
fields terminated by ','
stored as sequencefile;
9. 复合数据类型
-- array
create table t_movie(movie_name string,actors array<string>,first_data
string)
row format delimited
fields terminated by ","
collection items terminated by ":";
select movie_name from t_movie where array_contains(actors,"吴刚");
10. map 类型
a.create table t_person(id int,name string,family_members map<string,string>,age int)
row format delimited fields terminated by ','
collection items terminated by '#'
map keys terminated by ':';
b.
## 取map字段的指定key的值
select id,name,family_members['father'] as father from t_person;
## 取map字段的所有key
select id,name,map_keys(family_members) as relation from t_person;
## 取map字段的所有value
select id,name,map_values(family_members) from t_person;
select id,name,map_values(family_members)[0] from t_person;
11. struct类型
a. 想用一个字段描述整个用户的信息,可以采用struct
b. 建表:
create table t_person_struct(id int,name string,info struct<age:int,sex:string,addr:string>)
row format delimited fields terminated by ','
collection items terminated by ':';
c. 查询
select * from t_person_struct;
**查询篇**
基本查询实例
select * from t_access;
select count(*) from t_access;
select max(ip) from t_access;- 条件查询
select * from t_access where access_time<’2017-08-06 15:30:20’
select * from t_access where access_time<’2017-08-06 16:30:20’ and ip>’192.168.33.3’; - join关联查询
- 数据:a.txt
a,1
b,2
c,3
d,4
b.txt
a,xx
b,yy
d,zz
e,pp
- 数据:a.txt
- inner join
select
a.name as aname,
a.numb as anumb,
b.name as bname,
b.nick as bnick
from t_a a
join t_b b
on a.name=b.name - left outer join(left join)
select
a.name as aname,
a.numb as anumb,
b.name as bname,
b.nick as bnick
from t_a a
left outer join t_b b
on a.name=b.name - right outer join(right join)
select
a.name as aname,
a.numb as anumb,
b.name as bname,
b.nick as bnick
from t_a a
right outer join t_b b
on a.name=b.name full outer join(full join)
select
a.name as aname,
a.numb as anumb,
b.name as bname,
b.nick as bnick
from t_a a
full join t_b b
on a.name=b.name;左半查询
select
a.name as aname,
a.numb as anumb
from t_a a
left semi join t_b b
on a.name=b.name;分组聚合
select dt,count(*),max(ip) as cnt from t_access group by dt;select dt,count(*),max(ip) as cnt from t_access group by dt having dt>’20170804’;
select
dt,count(*),max(ip) as cnt
from t_access
where url=’http://www.edu360.cn/job’
group by dt having dt>’20170804’;
注意: 一旦有group by子句,那么,在select子句中就不能有 (分组字段,聚合函数) 以外的字段
为什么where必须写在group by的前面,为什么group by后面的条件只能用having
因为,where是用于在真正执行查询逻辑之前过滤数据用的
having是对group by聚合之后的结果进行再过滤;
上述语句的执行逻辑:
where过滤不满足条件的数据
用聚合函数和group by进行数据运算聚合,得到聚合结果
用having条件过滤掉聚合结果中不满足条件的数据
10.子查询
数据:
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26
查询有兄弟的人
select id,name,brother
from
(select id,name,family_members[‘brother’] as brother from t_person) tmp
where brother is not null;