1108课后作业

一、hive创建表并加载数据

1、在 hive 里创建表 dept

在数据库default下,创建表dept

hive > create table dept(deptno string, dname string, loc string)
row format delimited fields terminated by '\t';

2、加载数据到表 dept

在linux的目录/root/下,创建dept.txt

10	ACCOUNTING	NEW YORK
20	RESEARCH	DALLAS
30	SALES	CHICAGO
40	OPERATIONS	BOSTON

方式一:

用 local inpath 本地路径的方法加载

hive> load data local inpath '/root/dept.txt' into table default.dept;

方式二:

再用 hdfs 路径的方法加载数据覆盖到 dept

上传文件到hdfs

hive > dfs -put /root/dept.txt /user/hive;

加载hdfs上的数据覆盖到hive

hive > load data inpath '/user/hive/dept.txt' overwrite into table default.dept;

二、编写sql语句

1、customer-deposit-dept

客户信息表customer

字段数据类型描述
c_idvarchar2主键,客户号
c_namevarchar2客户姓名

存款交易表deposit

字段数据类型描述
trade_datedate存款日期
deposit_idvarchar2流水号
c_idvarchar2客户号
amountnumber存款金额
dept_idvarchar2营业网点代码

营业网点信息表dept

字段数据类型描述
dept_idvarchar2营业网点代码
dept_namevarchar2营业网点名称

存款日期的数据类型默认为 date 类型,
若存款日期的数据类型为 varchar2类型的,对应的过滤数据的方法不一样

1)查找客户号“88888888”的2021全年的存款流水,按日期升序排列

select t1.trade_date
      ,t1.deposit_id
      ,t1.c_id
      ,t1.amount
      ,t1.dept_id
from deposit t1
where t1.c_id = '88888888' 
      and to_char(t1.trade_date, 'yyyy') = '2021'
order by t1.trade_date asc;

2)针对上面的查询要求,为deposit表创建一个索引idx_deposit

create unique index index_c_id on deposit(c_id);

3)输出整个银行2021年全年各网点名称及对应的存款总额。

select t1.dept_name
      ,sum(t2.amount) as sum_amount
from dept t1
left join deposit t2
       on t1.dept_id = t2.dept_id  
      and to_char(t2.trade_date, 'yyyy') = '2021'
group by t1.dept_id, t1.dept_name; 

4)列出曾经有多笔(2笔或者大于2笔)存款的客户号。

select t1.c_id
from deposit t1
group by t1.c_id
having count(1) > 1;
  1. 列出客户信息表中没有存款交易的客户,输出客户号和客户姓名。
--表关联
select t1.c_id 
      ,t1.c_name
from customer t1
left join deposit  t2
       on t1.c_id  = t2.c_id 
where t2.c_id is null;
   
--not in
select t1.c_id 
      ,t1.c_name
from customer t1
where t1.c_id not in (
                      select distinct t2.c_id 
                      from deposit t2
                      );
                                         
--not exists 
select t1.c_id 
      ,t1.c_name
from customer t1
where not exists(
                 select t.c_id 
                 from deposit t 
                 where t.c_id  = t1.c_id 
                 );                      

2、t_tran

表t_tran

card_nbrtrans_typedesc_narinp_datebill_amt
62231840111040批量代扣202204264850
62231840111040批量代扣202204264850
62228282351022网联支付2022042612100
62228282351022网联支付20220426899400
62228282341022网联支付20220426400
62228282341022网联支付20220426400

查询出发生重复(每个字段的内容都一样)记录的数据

--理解1:查询出重复数据中的一条
select t1.card_nbr
      ,t1.trans_type
      ,t1.desc_nar
      ,t1.inp_date
      ,t1.billamt
from t_tran t1
group by t1.card_nbr
        ,t1.trans_type
        ,t1.desc_nar
        ,t1.inp_date
        ,t1.billamt
having count(1) > 1;

--理解2:查出所有重复数据
select t1.card_nbr
	  ,t1.trans_type
	  ,t1.desc_nar
	  ,t1.inp_date
	  ,t1.bill_amt
from (
      select t.card_nbr
            ,t.trans_type
            ,t.desc_nar
            ,t.inp_date
            ,t.bill_amt
            ,count(1)over(partition by t.card_nbr
                                       ,t.trans_type
                                       ,t.desc_nar
                                       ,t.inp_date
                                       ,t.bill_amt
					      ) as count_row
      from t_tran t
     ) t1
where t1.count_row > 1;

3、a-b

表a

idproducttype
100010000120160705000000011
100010000120140801000000012
100010000120170330000000011

表b

pc_ididlevel
10011000100001a
10021000100001a
10031000100001c
10041000100002g
10051000100002b
10061000100003d

1、请写出运行结果:select count(*) from a left join b on a.id=b.id;

9

a.idproducttypepc_idb.idlevel
10001000012016070500000001110011000100001a
10001000012016070500000001110021000100001a
10001000012016070500000001110031000100001c
10001000012014080100000001210011000100001a
10001000012014080100000001210021000100001a
10001000012014080100000001210031000100001c
10001000012017033000000001110011000100001a
10001000012017033000000001110021000100001a
10001000012017033000000001110031000100001c

2、请写出运行结果:select count(*) from a right join b on a.id=b.id;

12

pc_idb.idlevela.idproducttype
10011000100001a100010000120160705000000011
10011000100001a100010000120140801000000012
10011000100001a100010000120170330000000011
10021000100001a100010000120160705000000011
10021000100001a100010000120140801000000012
10021000100001a100010000120170330000000011
10031000100001c100010000120160705000000011
10031000100001c100010000120140801000000012
10031000100001c100010000120170330000000011
10041000100002g
10051000100002b
10061000100003d

3、请写出运行结果:select count(*) from a inner join b on a.id=b.id

9

a.idproducttypepc_idb.idlevel
10001000012016070500000001110011000100001a
10001000012016070500000001110021000100001a
10001000012016070500000001110031000100001c
10001000012014080100000001210011000100001a
10001000012014080100000001210021000100001a
10001000012014080100000001210031000100001c
10001000012017033000000001110011000100001a
10001000012017033000000001110021000100001a
10001000012017033000000001110031000100001c

4、请写出运行结果:select count(*) from a full join b on a.id=b.id

12

pc_idb.idlevela.idproducttype
10011000100001a100010000120160705000000011
10011000100001a100010000120140801000000012
10011000100001a100010000120170330000000011
10021000100001a100010000120160705000000011
10021000100001a100010000120140801000000012
10021000100001a100010000120170330000000011
10031000100001c100010000120160705000000011
10031000100001c100010000120140801000000012
10031000100001c100010000120170330000000011
10041000100002g
10051000100002b
10061000100003d

参考建表sql语句

create table a (id number ,product number,type number);

insert into a values(1000100001,  2016070500000001, 1);
insert into a values(1000100001,  2014080100000001, 2);
insert into a values(1000100001,  2017033000000001, 1);

commit;

create table b (pc_id number, id  number, level_1  varchar2 (20));

insert into b values (1001, 1000100001, 'a');
insert into b values (1002, 1000100001, 'a');
insert into b values (1003, 1000100001, 'c');
insert into b values (1004, 1000100002, 'g');
insert into b values (1005, 1000100002, 'b');
insert into b values (1006, 1000100003, 'd');

commit;

4、t_prod

t_prod

cust_idprodamtcnt
客户号产品金额个数
1000100001a1002
1000100001b2001
1000100002b2004
1000100003a1001
1000100003b2002

1、查询出同时持有a产品和b产品的客户

--表关联
select t1.cust_id
from t_prod t1 --'a'
inner join t_prod t2 --'b' 
        on t1.cust_id = t2.cust_id
       and t1.prod = 'a'
       and t2.prod = 'b';

--exists 
select t1.cust_id
from t_prod t1 --'a'
where t1.prod = 'a'
and exists(select 1
           from t_prod t2 --'b'
           where t2.prod = 'b'
           and t1.cust_id = t2.cust_id
          );
--in
select t1.cust_id
from t_prod t1
where t1.prod = 'a'
      and t1.cust_id in (
                          select t2.cust_id
                          from t_prod t2
                          where t2.prod = 'b'
                        );
--count()
select t1.cust_id
from t_prod t1 
where t1.prod in('a','b')
group by t1.cust_id
having count(1) =2;                        

2、查询出只持有b产品的客户

--not in
select t1.cust_id
from t_prod t1
where t1.prod = 'b'
      and t1.cust_id not in (
		                     select t2.cust_id
                             from t_prod t2
                             where t2.prod = 'a'	
                            );
--not exists
select *
from t_prod t 
where t.prod = 'b'
and not exists(select t1.cust_id
               from t_prod t1 
               where t1.prod <> 'b'
               and t.cust_id = t1.cust_id
              );    
              
--先把只有一个产品的客户找到
--再看看他有没有 b 产品
select t1.cust_id
from (
      select t.cust_id,
             t.prod,
             t.amt,
             t.cnt
             ,count(t.prod)over(partition by t.cust_id) as c_p
      from t_prod t 
     ) t1 
where t1.prod = 'b'
and t1.c_p = 1;                                      

3、通过sql将表1转化为如下的表

cust_idab
1000100001200200
1000100002800
1000100003100400
-- sum-case-when
select t.cust_id
      ,sum(case when t.prod='a' then t.amt*t.cnt end) as a
      ,sum(case when t.prod='b' then t.amt*t.cnt end) as b
from t_prod t
group by t.cust_id;

-- pivot
select *
from (
	  select t1.cust_id
		    ,t1.prod
		    ,t1.amt * t1.cnt as spend
	  from t_prod t1
     ) t2
pivot(sum(spend)
      for prod in(
                  'a' as "a"
                 ,'b' as "b"
                 )
     );

参考建表语句

create table t_prod(cust_id number(20)
                    ,prod varchar2(50)
                    ,amt number(20)
                    ,cnt number(20)
                    );
                    
insert into t_prod values (1000100001 , 'a' , 100 , 2 ) ;
insert into t_prod values (1000100001 , 'b' , 200 , 1 ) ;
insert into t_prod values (1000100002 , 'b' , 200 , 4 ) ;
insert into t_prod values (1000100003 , 'a' , 100 , 1 ) ;
insert into t_prod values (1000100003 , 'b' , 200 , 2 ) ;
insert into t_prod values (1000100004 , 'c' , 100 , 1 ) ;
insert into t_prod values (1000100004 , 'b' , 200 , 2 ) ;
insert into t_prod values (1000100005 , 'c' , 200 , 2 ) ;

commit;
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

只会HelloWorld的华娃

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值