Hive操作演示
在usr下,mkdir /wang
hadoop fs -mkdir /upload
hadoop fs -chmod g+w /upload
上传数据
[root@master wang]# hadoop fs -put emp.csv /upload
[root@master wang]# hadoop fs -put dept.csv /upload
服务端:hiveserver2 &
客户端:
使用anonymous登录
beeline -u jdbc:hive2://master:10000/default
****使用root登录,我们用root用户登录,不然没有写的权限
beeline -u jdbc:hive2://master:10000/default -n root
创建员工表,使用创建表的语句来指明输入文件间隔符,然后load data到这个表
create table emp001(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) row format delimited fields terminated by ','; # 字段分割用逗号
创建部门表
create table dept001(deptno int,dname string,loc string) row format delimited fields terminated by ',';
导入数据
load data inpath '/upload/emp.csv' into table emp001;
load data inpath '/upload/dept.csv' into table dept001;
根据员工的部门号创建分区
create table emp_part001(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int) partitioned by (deptno int) row format delimited fields terminated by ',';
往分区表中插入数据:指明导入的数据的分区(通过子查询导入数据)
insert into table emp_part001 partition(deptno=10) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=10;
insert into table emp_part001 partition(deptno=20) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=20;
insert into table emp_part001 partition(deptno=30) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=30;
创建一个桶表,根据员工的职位 (job)进行分桶
create table emp_bucket001 (empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) clustered by (job) into 4 buckets row format delimited fields terminated by ',';
通过子查询插入数据
insert into emp_bucket001 select * from emp001;
查询员工信息:员工号 姓名 薪水
select empno,ename,sal from emp001;
多表连接查询
select dept001.dname,emp001.ename from emp001,dept001 where emp001.deptno=dept001.deptno;
员工总数
select count(empno) as emp_num from emp001;
去除重复值
select distinct deptno from emp001;
查询公司共有多少种职位
select count(distinct job) from emp001;
统计雇佣时间为1981年的总人数
select count(hiredate) as result from emp001 where hiredate like '%1981%';
统计各部门的薪水总和
select deptno,sum(sal) from emp001 group by deptno;
统计每个职位有多少个员工
select job, count(*) as emp_num from emp001 group by job order by emp_num asc;
查询入职时间最早的员工
select ename,hiredate from emp001
join
(select min(hiredate) as min_hiredate from emp001) t1
where hiredate=t1.min_hiredate;
判断薪水级别
select ename,empno,sal,
case when sal<2000 then 'low' when sal >=2000
and sal <3000 then 'middle'
else 'high'
end as level
from emp001
order by sal desc;
根据职位给员工涨工资,把涨前、涨后的薪水显示出来
select empno,ename,job,sal,
case job when 'PRESIDENT' then sal+1000
when 'MANAGER' then sal+800
else sal+400
end
from emp001;
统计上半年入职员工最多的地区
# cast用于转换数据类型
# substr用于截取字符串
select t1.loc,count(*)as emp_count
from
(select dept001.loc,emp001.ename,
cast(substr(emp001.hiredate,6,2) as int) as hire_month
from dept001 join emp001
on dept001.deptno=emp001.deptno) t1
where t1.hire_month<=6
group by t1.loc
order by emp_count desc
limit 1;