(1) 把上面两张表拷贝到hdfs某个目录下,如/001/hive,001表示学号,注意修改。
hadoop fs -mkdir -p /2019443719/hive
hadoop fs -put emp.csv dept.csv /2019443719/hive
hadoop fs -ls -R /2019443719
(2) 创建员工表(emp+学号,如:emp001)
show databases;
use financial;
set hive.cli.print.current.db=true;
show tables;
create table emp2019443719 (empno int,ename string,job string,mgr int,hiredate date,sal int,comm int,deptno int)row format delimited fields terminated by ‘,’;
(3) 创建部门表(dept+学号,如:dept001)
create table dept2019443719(deptno int,dname string,loc string)row format delimited fields terminated by ‘,’;
show tables;
(4) 导入数据
load data local inpath ‘./emp.csv’ overwrite into table emp2019443719;
select * from emp2019443719 limit 3;
load data local inpath ‘./dept.csv’ overwrite into table dept2019443719;
select * from dept2019443719 limit 3;
(5)根据员工的部门号创建分区,表名emp_part+学号,如:emp_part001往分区表中插入数据:指明导入的数据的分区(通过子查询导入数据)。
create table emp_part2019443719(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int) partitioned by (deptno int)row format delimited fields terminated by ‘,’;
insert into table emp_part2019443719 partition(deptno=10) select empno,ename,job,mgr,hiredate,sal,comm,deptno from emp_part2019443719 where deptno=10;
insert into table emp_part2019443719 partition(deptno=20) select empno,ename,job,mgr,hiredate,sal,comm from emp_part2019443719 where deptno=20;
insert into table emp_part2019443719 partition(deptno=30) select empno,ename,job,mgr,hiredate,sal,comm from emp_part2019443719 where deptno=30;
(5) 创建一个桶表,表名emp_bucket+学号,如:emp_bucket001,根据员工的职位(job)进行分桶:
create table emp_bucket2019443719 (empno int,ename string,job string,mgr int,hiredate date,sal int,comm int,deptno int) clustered by (job) into 3 buckets row format delimited fields terminated by ‘,’;
show tables;
通过子查询插入数据:
insert into emp_bucket2019443719 select * from emp2019443719;
(7)查询所有的员工信息
select * from emp2019443719;
(8)查询员工信息:员工号 姓名 薪水
select empno,ename,sal from emp2019443719;
(9)多表查询,查询员工部门名称
select e.ename,d.dname from emp2019443719 as e,dept2019443719 as d where e.deptno=d.deptno;
(10)做报表,根据职位给员工涨工资,把涨前、涨后的薪水显示出来
select ename,case job when “PRESIDENT” then sal+1000 when “MANAGER” then sal+800 else sal+400 end from emp2019443719;