数据导入
loaddata[local] inpath '数据的path'[overwrite]intotable student [partition(partcol1=val1,…)];createtable stu1(id int, name string)row format delimited fieldsterminatedby'\t
create table stu2(id int , name string) row format delimited fields terminated by '\t'
load data local inpath '/opt/module/hive/datas/stu1.txt' into table stu1;
load data inpath '/datas/stu2.txt' into table stu2;
--如果是从本地 local 那么是复制进去 如果不是本地 那么是剪切进去的
load data local inpath '/opt/module/hive/datas/stu1.txt' overwrite into table stu1;
------------------------------------------------------------
insert
insert into stu1 values(1,'yizuzhang'),(2,'erzuzhang');
-- 通过查询插入
insert overwrite table stu1 select * from stu2;
insert overwrite table student select * from stu2;
----------------------------------------------
location
create table teacher (
id int,
name string
)
row format delimited fields terminated by '\t'
location '/teacher';
数据导出
----导出到本地insert overwrite local directory '/opt/module/hive/datas/export1'select*from stu1;insert overwrite local directory '/opt/module/hive/datas/export1'row format delimited fieldsterminatedby'\t'select*from stu1;insert overwrite directory '/export1'row format delimited fieldsterminatedby'\t'select*from stu1;----export 导出
export table teacher to'/teacher';-- import 导入importtable teacher2 from'/teacher';import 导入必须是export 导出
第六章 查询
SELECT[ALL|DISTINCT] select_expr, select_expr,...FROM table_reference
[WHERE where_condition][GROUPBY col_list][HAVING col_list][ORDERBY col_list][CLUSTER BY col_list
|[DISTRIBUTE BY col_list][SORT BY col_list]][LIMIT number]createtableifnotexists dept(
deptno int,--部门编号
dname string,--部门名称
loc int--部门地址标号)row format delimited fieldsterminatedby'\t';createtableifnotexists emp(
empno int,--员工编号
ename string,--员工名称
job string,--员工职位
mgr int,--员工的领导
hiredate string,--员工的入职日期
sal double,--员工的薪资
comm double,--员工的奖金
deptno int)--员工的部门idrow format delimited fieldsterminatedby'\t'------名字开头待sselect*from emp where ename like'S%';select*from emp where ename rlike'^S';-----名字末尾带sselect*from emp where ename like'%S';select*from emp where ename rlike'S$';-----名字中带sselect*from emp where ename like'%S%';select*from emp where ename rlike'[S]';
join--员工表和部门表中的部门编号相等,查询员工编号、员工名称和部门名称select
e.empno,
e.ename,
d.deptno,
d.dname
from emp e join dept d
on e.deptno !=d.deptno;--合并员工表select e.*,d.dname,d.loc from emp e join dept d on e.deptno=d.deptno;--左连接 将emp作为主表select
e.empno,
e.ename,
e.deptno,
d.deptno,
d.dname
from emp e leftjoin dept d
on e.deptno=d.deptno;--右连接 将emp作为主表select
e.empno,
e.ename,
d.deptno,
d.dname
from dept d rightjoin emp e
on e.deptno=d.deptno;--满外连接 select
e.empno,
e.ename,
d.deptno,
d.dname
from emp e fulljoin dept d
on e.deptno =d.deptno
--在mysql如果想要实现满外连接的方式select
e.empno,
e.ename,
d.deptno,
d.dname
from dept d leftjoin emp e
on e.deptno=d.deptno
unionallselect
e.empno,
e.ename,
d.deptno,
d.dname
from dept d rightjoin emp e
on e.deptno=d.deptno;--多表联查---查到 员工名称,员工部门名称,员工部门位置名称--第一种写法select
e.ename,
d.dname,
l.loc_name
from emp e join dept d
on e.deptno=d.deptno
join location l
on d.loc=l.loc
--第二种写法select
e.ename,
d.dname,
l.loc_name
from emp e join dept d join location l
on e.deptno=d.deptno and d.loc=l.loc
--笛卡尔积
连接条件失效
连接条件衡成立
select*from dept,emp;select*from dept join emp;select*from dept join emp on1=1;
全局排序 order-- 按照工资给emp所有的人做一个全局排序select empno,ename,sal,deptno from emp orderby sal asc--升序排序select empno,ename,sal,deptno from emp orderby sal desc--倒序排序--按照不同部门里所有人工资排序select empno,ename,sal,deptno from emp orderby deptno,sal;select empno,ename,sal,deptno from emp orderby deptno desc,sal desc;
sort byset mapreduce.job.reduces=3;--其实设置当前的 分区数 =》3
sort by 对于每个reduce而言是 有序,但是在设置分区数过后 他是随机进入分区的 --没有人会单独使用
distribute by 结合着 sort by 使用
select
empno,
ename,
sal,
deptno
from emp
distribute by deptno sort by sal;
distribute by 以什么分区 sort by 分区内区内排序 需要手动这是map的分区数
cluster by 分区排序
select*from emp cluster by deptno asc;
以下的sql等同于上面的sqlselect*from emp distribute by deptno sort by sal;
--按照4月份过滤select*from business
wheremonth(orderdate)=4;select*from business
where substring(orderdate,1,7)='2017-04'select*from business
where date_format(orderdate,'yyyy-MM')='2017-04';--求总人数窗口函数1select
name,count(*)over(rowsbetweenUNBOUNDEDPRECEDINGandUNBOUNDEDFOLLOWING)from business
wheremonth(orderdate)=4groupby name;+-------+-----------------+| name | count_window_0 |+-------+-----------------+| mart |2|| jack |2|+-------+-----------------+--求总人数窗口函数2select name,count(*)over()from business
where substring(orderdate,1,7)='2017-04'groupby name;--需求1变种1 累加统计4月份有多少人消费select
name,count(*)over(rowsbetweenUNBOUNDEDPRECEDINGandcurrentrow)from business
wheremonth(orderdate)=4groupby name;| name | count_window_0 |+-------+-----------------+| mart |1|| jack |2|--需求1变种2 累加统计有多少人消费 select
name,count(*)over(rowsbetweenUNBOUNDEDPRECEDINGandcurrentrow)from business;+-------+-----------------+| name | count_window_0 |+-------+-----------------+| mart |1|| neil |2|| mart |3|| neil |4|| mart |5|| mart |6|| jack |7|| tony |8|| jack |9|| jack |10|| tony |11|| jack |12|| tony |13|| jack |14|+-------+-----------------+--需求1变种3 统计一共有多少人消费select
name,count(*)over(rowsbetweenUNBOUNDEDPRECEDINGandUNBOUNDEDFOLLOWING)from business;+-------+-----------------+| name | count_window_0 |+-------+-----------------+| mart |14|| neil |14|| mart |14|| neil |14|| mart |14|| mart |14|| jack |14|| tony |14|| jack |14|| jack |14|| tony |14|| jack |14|| tony |14|| jack |14|+-------+-----------------+--用一般函数怎么做(求所有不同月份的总人数和人)select
name,
date_format(orderdate,'yyyy-MM') orderdate,
cost
from business t1
select
collect_set(name) c_s_n,
t1.orderdate
from(select
name,
date_format(orderdate,'yyyy-MM') orderdate,
cost
from business)t1
groupby t1.orderdate t2
select
t2.c_s_n,
size(t2.c_s_n) p_n,
t2.orderdate
from(select
collect_set(name) c_s_n,
t1.orderdate
from(select
name,
date_format(orderdate,'yyyy-MM') orderdate,
cost
from business)t1
groupby t1.orderdate)t2
+------------------+------+---------------+| t2.c_s_n | p_n | t2.orderdate |+------------------+------+---------------+|["jack","tony"]|2|2017-01||["jack"]|1|2017-02||["jack","mart"]|2|2017-04||["neil"]|1|2017-05||["neil"]|1|2017-06|+------------------+------+---------------+
8.5.2询顾客的购买明细及月购买总额
select
name,
orderdate,
cost,sum(cost)over(partitionby name,month(orderdate)rowsbetweenUNBOUNDEDPRECEDINGandUNBOUNDEDFOLLOWING)from business;+-------+-------------+-------+---------------+| name | orderdate | cost | sum_window_0 |+-------+-------------+-------+---------------+| jack |2017-01-05|46|111|| jack |2017-01-08|55|111|| jack |2017-01-01|10|111|| jack |2017-02-03|23|23|| jack |2017-04-06|42|42|| mart |2017-04-13|94|299|| mart |2017-04-11|75|299|| mart |2017-04-09|68|299|| mart |2017-04-08|62|299|| neil |2017-05-10|12|12|| neil |2017-06-12|80|80|| tony |2017-01-04|29|94|| tony |2017-01-02|15|94|| tony |2017-01-07|50|94|+-------+-------------+-------+---------------+
需求2的变种
select
name,
orderdate,
cost,sum(cost)over(partitionby name,month(orderdate))--满足order 子句的第二种情况 默认是上无边界到当前行from business;+-------+-------------+-------+---------------+| name | orderdate | cost | sum_window_0 |+-------+-------------+-------+---------------+| jack |2017-01-05|46|111|| jack |2017-01-08|55|111|| jack |2017-01-01|10|111|| jack |2017-02-03|23|23|| jack |2017-04-06|42|42|| mart |2017-04-13|94|299|| mart |2017-04-11|75|299|| mart |2017-04-09|68|299|| mart |2017-04-08|62|299|| neil |2017-05-10|12|12|| neil |2017-06-12|80|80|| tony |2017-01-04|29|94|| tony |2017-01-02|15|94|| tony |2017-01-07|50|94|+-------+-------------+-------+---------------+
需求2的变种 统计每个人一共消费了多少和明细
select
name,
orderdate,
cost,sum(cost)over(partitionby name)from business;
name | orderdate | cost | sum_window_0 |+-------+-------------+-------+---------------+| jack |2017-01-05|46|176|| jack |2017-01-08|55|176|| jack |2017-01-01|10|176|| jack |2017-04-06|42|176|| jack |2017-02-03|23|176|| mart |2017-04-13|94|299|| mart |2017-04-11|75|299|| mart |2017-04-09|68|299|| mart |2017-04-08|62|299|| neil |2017-05-10|12|92|| neil |2017-06-12|80|92|| tony |2017-01-04|29|94|| tony |2017-01-02|15|94|| tony |2017-01-07|50|94|+-------+-------------+-------+---------------+
需求2的变种 累计每个人一共消费了多少和明细
select
name,
orderdate,
cost,sum(cost)over(partitionby name rowsbetweenUNBOUNDEDPRECEDINGandcurrentrow)from business;+-------+-------------+-------+---------------+| name | orderdate | cost | sum_window_0 |+-------+-------------+-------+---------------+| jack |2017-01-05|46|46|| jack |2017-01-08|55|101|| jack |2017-01-01|10|111|| jack |2017-04-06|42|153|| jack |2017-02-03|23|176|| mart |2017-04-13|94|94|| mart |2017-04-11|75|169|| mart |2017-04-09|68|237|| mart |2017-04-08|62|299|| neil |2017-05-10|12|12|| neil |2017-06-12|80|92|| tony |2017-01-04|29|29|| tony |2017-01-02|15|44|| tony |2017-01-07|50|94|+-------+-------------+-------+---------------+
8.5.3上述的场景, 将每个顾客的cost按照日期进行累加
select
name,
orderdate,
cost,sum(cost)over(partitionby name orderby orderdate)from business;+-------+-------------+-------+---------------+| name | orderdate | cost | sum_window_0 |+-------+-------------+-------+---------------+| jack |2017-01-01|10|10|| jack |2017-01-05|46|56|| jack |2017-01-08|55|111|| jack |2017-02-03|23|134|| jack |2017-04-06|42|176|| mart |2017-04-08|62|62|| mart |2017-04-09|68|130|| mart |2017-04-11|75|205|| mart |2017-04-13|94|299|| neil |2017-05-10|12|12|| neil |2017-06-12|80|92|| tony |2017-01-02|15|15|| tony |2017-01-04|29|44|| tony |2017-01-07|50|94|
需求变种1 统计当天消费和上一次消费的和
select
name,
orderdate,
cost,sum(cost)over(partitionby name orderby orderdate rowsbetween1PRECEDINGandcurrentrow)from business;+-------+-------------+-------+---------------+| name | orderdate | cost | sum_window_0 |+-------+-------------+-------+---------------+| jack |2017-01-01|10|10|| jack |2017-01-05|46|56|| jack |2017-01-08|55|101|| jack |2017-02-03|23|78|| jack |2017-04-06|42|65|| mart |2017-04-08|62|62|| mart |2017-04-09|68|130|| mart |2017-04-11|75|143|| mart |2017-04-13|94|169|| neil |2017-05-10|12|12|| neil |2017-06-12|80|92|| tony |2017-01-02|15|15|| tony |2017-01-04|29|44|| tony |2017-01-07|50|79|
需求变种2 统计当天消费和下一次消费的和
select
name,
orderdate,
cost,sum(cost)over(partitionby name orderby orderdate rowsbetweencurrentrowand1following)from business;+-------+-------------+-------+---------------+| name | orderdate | cost | sum_window_0 |+-------+-------------+-------+---------------+| jack |2017-01-01|10|56|| jack |2017-01-05|46|101|| jack |2017-01-08|55|78|| jack |2017-02-03|23|65|| jack |2017-04-06|42|42|| mart |2017-04-08|62|130|| mart |2017-04-09|68|143|| mart |2017-04-11|75|169|| mart |2017-04-13|94|94|| neil |2017-05-10|12|92|| neil |2017-06-12|80|80|| tony |2017-01-02|15|44|| tony |2017-01-04|29|79|| tony |2017-01-07|50|50|+-------+-------------+-------+---------------+
需求变种3 统计上一次消费到下一次消费的和
select
name,
orderdate,
cost,sum(cost)over(partitionby name orderby orderdate rowsbetween1PRECEDINGand1following)from business;+-------+-------------+-------+---------------+| name | orderdate | cost | sum_window_0 |+-------+-------------+-------+---------------+| jack |2017-01-01|10|56|| jack |2017-01-05|46|111|| jack |2017-01-08|55|124|| jack |2017-02-03|23|120|| jack |2017-04-06|42|65|| mart |2017-04-08|62|130|| mart |2017-04-09|68|205|| mart |2017-04-11|75|237|| mart |2017-04-13|94|169|| neil |2017-05-10|12|92|| neil |2017-06-12|80|92|| tony |2017-01-02|15|44|| tony |2017-01-04|29|94|| tony |2017-01-07|50|79|
需求变种4 直接按日期累加消费并且需要明细
select
name,
orderdate,
cost,sum(cost)over(orderby orderdate)from business;| name | orderdate | cost | sum_window_0 |+-------+-------------+-------+---------------+| jack |2017-01-01|10|10|| tony |2017-01-02|15|25|| tony |2017-01-04|29|54|| jack |2017-01-05|46|100|| tony |2017-01-07|50|150|| jack |2017-01-08|55|205|| jack |2017-02-03|23|228|| jack |2017-04-06|42|270|| mart |2017-04-08|62|332|| mart |2017-04-09|68|400|| mart |2017-04-11|75|475|| mart |2017-04-13|94|569|| neil |2017-05-10|12|581|| neil |2017-06-12|80|661|
8.5.4查询顾客购买明细以及上次的购买时间和下次购买时间
select
name,
orderdate,
cost,
lag(orderdate)over(partitionby name orderby orderdate) prev_time,
lead(orderdate)over(partitionby name orderby orderdate) next_time
from business;+-------+-------------+-------+-------------+-------------+| name | orderdate | cost | prev_time | next_time |+-------+-------------+-------+-------------+-------------+| jack |2017-01-01|10|NULL|2017-01-05|| jack |2017-01-05|46|2017-01-01|2017-01-08|| jack |2017-01-08|55|2017-01-05|2017-02-03|| jack |2017-02-03|23|2017-01-08|2017-04-06|| jack |2017-04-06|42|2017-02-03|NULL|| mart |2017-04-08|62|NULL|2017-04-09|| mart |2017-04-09|68|2017-04-08|2017-04-11|| mart |2017-04-11|75|2017-04-09|2017-04-13|| mart |2017-04-13|94|2017-04-11|NULL|| neil |2017-05-10|12|NULL|2017-06-12|| neil |2017-06-12|80|2017-05-10|NULL|| tony |2017-01-02|15|NULL|2017-01-04|| tony |2017-01-04|29|2017-01-02|2017-01-07|| tony |2017-01-07|50|2017-01-04|NULL|+-------+-------------+-------+-------------+-------------+select
name,
orderdate,
cost,
lag(orderdate,1,'0000-00-00')over(partitionby name orderby orderdate) prev_time,
lead(orderdate,1,'9999-99-99')over(partitionby name orderby orderdate) next_time
from business;+-------+-------------+-------+-------------+-------------+| name | orderdate | cost | prev_time | next_time |+-------+-------------+-------+-------------+-------------+| jack |2017-01-01|10|0000-00-00|2017-01-05|| jack |2017-01-05|46|2017-01-01|2017-01-08|| jack |2017-01-08|55|2017-01-05|2017-02-03|| jack |2017-02-03|23|2017-01-08|2017-04-06|| jack |2017-04-06|42|2017-02-03|9999-99-99|| mart |2017-04-08|62|0000-00-00|2017-04-09|| mart |2017-04-09|68|2017-04-08|2017-04-11|| mart |2017-04-11|75|2017-04-09|2017-04-13|| mart |2017-04-13|94|2017-04-11|9999-99-99|| neil |2017-05-10|12|0000-00-00|2017-06-12|| neil |2017-06-12|80|2017-05-10|9999-99-99|| tony |2017-01-02|15|0000-00-00|2017-01-04|| tony |2017-01-04|29|2017-01-02|2017-01-07|| tony |2017-01-07|50|2017-01-04|9999-99-99|+-------+-------------+-------+-------------+-------------+
8.5.5查询顾客每个月第一次的购买时间 和 每个月的最后一次购买时间
select
name,
orderdate,
cost,
first_value(orderdate)over(partitionby name,month(orderdate)orderby orderdate
rowsbetweenUNBOUNDEDPRECEDINGandUNBOUNDEDfollowing) first_date,
last_value(orderdate)over(partitionby name,month(orderdate)orderby orderdate
rowsbetweenUNBOUNDEDPRECEDINGandUNBOUNDEDfollowing) last_date
from business
+-------+-------------+-------+-------------+-------------+| name | orderdate | cost | first_date | last_date |+-------+-------------+-------+-------------+-------------+| jack |2017-01-01|10|2017-01-01|2017-01-08|| jack |2017-01-05|46|2017-01-01|2017-01-08|| jack |2017-01-08|55|2017-01-01|2017-01-08|| jack |2017-02-03|23|2017-02-03|2017-02-03|| jack |2017-04-06|42|2017-04-06|2017-04-06|| mart |2017-04-08|62|2017-04-08|2017-04-13|| mart |2017-04-09|68|2017-04-08|2017-04-13|| mart |2017-04-11|75|2017-04-08|2017-04-13|| mart |2017-04-13|94|2017-04-08|2017-04-13|| neil |2017-05-10|12|2017-05-10|2017-05-10|| neil |2017-06-12|80|2017-06-12|2017-06-12|| tony |2017-01-02|15|2017-01-02|2017-01-07|| tony |2017-01-04|29|2017-01-02|2017-01-07|| tony |2017-01-07|50|2017-01-02|2017-01-07|+-------+-------------+-------+-------------+-------------+
需求变种1 查询顾客第一次的购买时间 和 最后一次购买时间
select
name,
orderdate,
cost,
first_value(orderdate)over(partitionby name orderby orderdate
rowsbetweenUNBOUNDEDPRECEDINGandUNBOUNDEDfollowing) first_date,
last_value(orderdate)over(partitionby name orderby orderdate
rowsbetweenUNBOUNDEDPRECEDINGandUNBOUNDEDfollowing) last_date
from business
+-------+-------------+-------+-------------+-------------+| name | orderdate | cost | first_date | last_date |+-------+-------------+-------+-------------+-------------+| jack |2017-01-01|10|2017-01-01|2017-04-06|| jack |2017-01-05|46|2017-01-01|2017-04-06|| jack |2017-01-08|55|2017-01-01|2017-04-06|| jack |2017-02-03|23|2017-01-01|2017-04-06|| jack |2017-04-06|42|2017-01-01|2017-04-06|| mart |2017-04-08|62|2017-04-08|2017-04-13|| mart |2017-04-09|68|2017-04-08|2017-04-13|| mart |2017-04-11|75|2017-04-08|2017-04-13|| mart |2017-04-13|94|2017-04-08|2017-04-13|| neil |2017-05-10|12|2017-05-10|2017-06-12|| neil |2017-06-12|80|2017-05-10|2017-06-12|| tony |2017-01-02|15|2017-01-02|2017-01-07|| tony |2017-01-04|29|2017-01-02|2017-01-07|| tony |2017-01-07|50|2017-01-02|2017-01-07|