IT面试真题详解SQL(领取SQL详解一份)

80 篇文章 2 订阅
33 篇文章 0 订阅

【课程简介】

SQL不仅在工作中使用频繁,在IT面试中SQL题出现频率也很高,而且题目占比越来越大,经常有同学面试挂在SQL题不会做痛失offer。本次课程将从技术使用跟面试真题两方面展开讲解,分享高效的SQL代码写法,告别拐弯抹角长篇大论轻松满足工作需求;并且将40多家大厂面试真题整理出SQL题集,把常见专题总结出简易解法,套上公式就会做。助力大家轻松掌握SQL!

【教程推荐】IT面试真题详解SQL(一)

【主讲内容】

1.怎样套上公式就会做

2.40+真实大厂原题解法

3.快如闪电高效SQL写法

【主讲教师】

金牌讲师:陈老师

前中国平安高级技术专家,有9年大数据研发经验,精通SQL授课通俗易懂。

 

1. SQL执行顺序

上面的SQL语句的执行顺序是: from (去加载table1 和 table2这2个表 ) -> join -> on -> where -> group by->select 后 面的普通字段,聚合函数count,sum -> having -> distinct -> order by -> limit  

--举例:

select       a.sex,       b.city,       count(1) as cnt,       sum(salary) as sum1

from table1 a

join table2 b on a.id=b.id

where a.name=b.name

group by a.sex,b.city

having cnt>=2

order by a.sex,b.city

limit 10

--或者是

select distinct

      a.sex,       b.city,       a.age

from table1 a

join table2 b on a.id=b.id

where a.name=b.name

order by a.sex,b.city

limit 10

--on 和where的先后顺序讨论

--下面用left join 各得到结果,结果不一样。

--下面可知,先执行on,再执行where

select *

from table1 a left join table2 b

on a.id=b.id

where a.name=b.name;

--下面的条数可能会比上面多。

select *

from table1 a left join table2 b

2. hive10题

先配置环境 在pycharm或datagrip或idea中配置hive数据源。也可以配置一个sparkSQL数据源,来加快速度。 如果配置hive数据源: 需要提前启动hdfs和yarn,hive的metastore,hive的hiveserver2

on a.id=b.id

and a.name=b.name;

--下面用inner join 各得到结果,结果是一样的

select *

from table1 a

join table2 b

on a.id=b.id

where a.name=b.name;

select *

from table1 a

join table2 b

on a.id=b.id

and a.name=b.name;

#启动hdfs和yarn

start-all.sh  

# hive的metastore

nohup /export/server/hive/bin/hive --service metastore  2>&1 > /tmp/hivemetastore.log &

#hive的hiveserver2 #hiveserver2开启后,等过2分钟后才能生效。

nohup /export/server/hive/bin/hive --service hiveserver2 2>&1 > /tmp/hivehiveserver2.log &

如果遇到下面的问题 解决办法 如果配置SparkSQL数据源 需要提前启动hdfs,hive的metastore,Spark的Thriftserver服务。 下面是spark3集成hive3需要的jar包,如果是spark2集成hive2,则jar包不一样。

hive/conf/hive-env.sh中加入

export HADOOP_CLIENT_OPTS=" -Xmx512m" export HADOOP_HEAPSIZE=1024

改完重启hiveserver2

#启动hdfs和yarn

start-all.sh  

# hive的metastore

nohup /export/server/hive/bin/hive --service metastore  2>&1 > /tmp/hivemetastore.log &

#Spark的Thriftserver服务

/export/server/spark/sbin/start-thriftserver.sh \  --hiveconf hive.server2.thrift.port=10001 \  --hiveconf hive.server2.thrift.bind.host=node1 \  --master local[*]

show databases ;

create database if not exists test_sql; use test_sql;

-- 一些语句会走 MapReduce,所以慢。 可以开启本地化执行的优化。

set hive.exec.mode.local.auto=true;-- (默认为false) --第1题:访问量统计

CREATE TABLE test_sql.test1 ( userId string, visitDate string, visitCount INT ) ROW format delimited FIELDS TERMINATED BY "\t";

INSERT overwrite TABLE test_sql.test1

VALUES

  ( 'u01', '2017/1/21', 5 ),   ( 'u02', '2017/1/23', 6 ),   ( 'u03', '2017/1/22', 8 ),   ( 'u04', '2017/1/20', 3 ),   ( 'u01', '2017/1/23', 6 ),   ( 'u01', '2017/2/21', 8 ),   ( 'u02', '2017/1/23', 6 ),   ( 'u01', '2017/2/22', 4 );

select *,       sum(sum1) over(partition by userid order by month1 /*rows between unbounded preceding and current row*/ ) as `累积` from

(select userid,       date_format(replace(visitdate,'/','-'),'yyyy-MM') as month1,       sum(visitcount) sum1

from test_sql.test1

group by userid,         date_format(replace(visitdate,'/','-'),'yyyy-MM')) as t;

-- 第2题:电商场景TopK统计

CREATE TABLE test_sql.test2 ( user_id string, shop string ) ROW format delimited FIELDS TERMINATED BY '\t';

INSERT INTO TABLE test_sql.test2 VALUES

( 'u1', 'a' ), ( 'u2', 'b' ), ( 'u1', 'b' ), ( 'u1', 'a' ), ( 'u3', 'c' ), ( 'u4', 'b' ), ( 'u1', 'a' ), ( 'u2', 'c' ), ( 'u5', 'b' ), ( 'u4', 'b' ), ( 'u6', 'c' ), ( 'u2', 'c' ), ( 'u1', 'b' ), ( 'u2', 'a' ), ( 'u2', 'a' ), ( 'u3', 'a' ), ( 'u5', 'a' ), ( 'u5', 'a' ), ( 'u5', 'a' );

--(1)每个店铺的UV(访客数)

-- UV和PV -- PV是访问当前网站所有的次数

-- UV是访问当前网站的客户数(需要去重)--(2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数

select shop,       count(distinct user_id) as uv

from test_sql.test2 group by shop ;

--上面的拆解来看,等价于

--distinct后可以接多个字段,表示联合去重

select shop,       count(user_id) as uv

from

(select distinct shop,       user_id

from test_sql.test2 ) as t

group by shop ;

--也等价于

select shop,       count(user_id) as uv

from

(select shop,       user_id

from test_sql.test2 group by shop, user_id) as t

group by shop ;

select * from

(select *,       row_number() over (partition by shop order by cnt desc) as rn

from

(select shop,user_id,count(1) as cnt from test_sql.test2 group by shop,user_id ) as t) t2

where t2.rn3;

-- 第3题:订单量统计

CREATE TABLE test_sql.test3 ( dt string, order_id string, user_id string, amount DECIMAL ( 10, 2 ) ) ROW format delimited FIELDS TERMINATED BY '\t';

INSERT overwrite TABLE test_sql.test3 VALUES

('2017-01-01','10029028','1000003251',33.57), ('2017-01-01','10029029','1000003251',33.57), ('2017-01-01','100290288','1000003252',33.57), ('2017-02-02','10029088','1000003251',33.57), ('2017-02-02','100290281','1000003251',33.57), ('2017-02-02','100290282','1000003253',33.57), ('2017-11-02','10290282','100003253',234), ('2018-11-02','10290284','100003243',234);

-- (1)给出 2017年每个月的订单数、用户数、总成交金额。

-- (2)给出2017年11月的新客数(指在11月才有第一笔订单)

select date_format(dt,'yyyy-MM') as month1,       count(distinct order_id) as cnt1,       count(distinct user_id) as cnt2,       sum(amount) as amt  from test_sql.test3

 where year(dt)=2017

group by date_format(dt,'yyyy-MM');

select count(user_id) cnt from

(select user_id,       min(date_format(dt,'yyyy-MM')) min_month

from test3 group by user_id) as t where min_month='2017-11';

--统计每个月的新客户数

select min_month,       count(user_id) cnt

from (select user_id,             min(date_format(dt, 'yyyy-MM')) min_month      from test3      group by user_id) as t

group by min_month;

-- 第4题:大数据排序统计

CREATE TABLE test_sql.test4user (user_id string,name string,age int);

CREATE TABLE test_sql.test4log (user_id string,url string);

INSERT INTO TABLE test_sql.test4user VALUES('001','u1',10), ('002','u2',15), ('003','u3',15), ('004','u4',20), ('005','u5',25), ('006','u6',35), ('007','u7',40), ('008','u8',45), ('009','u9',50), ('0010','u10',65);

INSERT INTO TABLE test_sql.test4log VALUES('001','url1'), ('002','url1'), ('003','url2'), ('004','url3'), ('005','url3'), ('006','url1'), ('007','url5'), ('008','url7'), ('009','url5'), ('0010','url1');

select * from test_sql.test4user ;

select * from test_sql.test4log ;

--有一个5000万的用户文件(user_id,name,age),

-- 一个2亿记录的用户看电影的记录文件(user_id,url),根据年龄段观看电影的次数进行排序?

--取整函数有 round,floor,ceil

select *,       round(x,0) as r,--四舍五入

      floor(x) as f,--向下取整

      ceil(x) as c--向上取整

 from

(select 15/10 as x union all

select 18/10 as x union all

select 24/10 as x union all

select 27/10 as x ) as t;

select type,       sum(cnt) as sum1

from

(select *,       concat(floor(age/10)*10,'-',floor(age/10)*10+10) as type

from test_sql.test4user as a

-- join前最好提前减小数据量

join (select user_id,count(url) as cnt from test_sql.test4log group by user_id) as b

on a.user_id=b.user_id) as t

group by type

order by sum(cnt) desc;

-- 第5题:活跃用户统计

CREATE TABLE test5( dt string, user_id string, age int) ROW format delimited fields terminated BY ',';

INSERT overwrite TABLE test_sql.test5 VALUES ('2019-02-11','test_1',23), ('2019-02-11','test_2',19), ('2019-02-11','test_3',39), ('2019-02-11','test_1',23), ('2019-02-11','test_3',39), ('2019-02-11','test_1',23), ('2019-02-12','test_2',19), ('2019-02-13','test_1',23), ('2019-02-15','test_2',19), ('2019-02-16','test_2',19);

select * from test_sql.test5 order by dt,user_id;

--有日志如下,请写出代码求得所有用户和活跃用户的总数及平均年龄。(活跃用户指连续两天都有访问记录的用户)

-- type     总数   平均年龄

-- '所有用户' 3   27 -- '活跃用户' 1   19

with t1 as (select distinct dt, user_id,age from test_sql.test5),     t2 as (select *,row_number() over (partition by user_id order by dt) as rn from t1 ),     t3 as (select *,date_sub(dt,rn) as dt2 from t2),     t4 as (select dt2,user_id,age,count(1) cnt from t3 group by dt2,user_id,age),     t5 as (select * from t4 where cnt>=2),     t6 as (select distinct user_id,age from t5)

select '所有用户' as type, count(user_id) cnt,avg(age) as avg_age

from (select distinct user_id,age from test_sql.test5) t union all

select '活跃用户' as type, count(user_id) cnt,avg(age) as avg_age from t6;

-- 用思路2来分析连续2天登录

with t1 as (select distinct dt, user_id from test_sql.test5),     t2 as (select *,                   date_add(dt,1) as dt2,                   lead(dt,1)over(partition by user_id order by dt) as dt3              from t1)

select count(distinct user_id) from t2 where dt2=dt3;

-- 第6题:电商购买金额统计实战

CREATE TABLE test_sql.test6 ( userid string, money decimal(10,2), paymenttime string, orderid string);

INSERT INTO TABLE test_sql.test6 VALUES('001',100,'2017-10-01','123'), ('001',200,'2017-10-02','124'), ('002',500,'2017-10-01','125'), ('001',100,'2017-11-01','126');

select * from test_sql.test6 order by userid,paymenttime;

--请用sql写出所有用户中在今年10月份第一次购买商品的金额,

select userid,paymenttime,money

from

(select *,       row_number() over (partition by userid order by paymenttime) as rn       from test_sql.test6 where date_format(paymenttime,'yyyy-MM')='2017-10' ) as t

where t.rn=1

;

-- 第7题:教育领域SQL实战

CREATE TABLE test_sql.book(book_id string, `SORT` string, book_name string, writer string, OUTPUT string, price decimal(10,2));

INSERT INTO TABLE test_sql.book VALUES

('001','TP391','信息处理','author1','机械工业出版社','20'), ('002','TP392','数据库','author12','科学出版社','15'), ('003','TP393','计算机网络','author3','机械工业出版社','29'), ('004','TP399','微机原理','author4','科学出版社','39'), ('005','C931','管理信息系统','author5','机械工业出版社','40'), ('006','C932','运筹学','author6','科学出版社','55');

CREATE TABLE test_sql.reader (reader_id string, company string, name string, sex string, grade string, addr string);

INSERT INTO TABLE test_sql.reader VALUES

('0001','阿里巴巴','jack','男','vp','addr1'), ('0002','百度','robin','男','vp','addr2'), ('0003','腾讯','tony','男','vp','addr3'), ('0004','京东','jasper','男','cfo','addr4'), ('0005','网易','zhangsan','女','ceo','addr5'), ('0006','搜狐','lisi','女','ceo','addr6');

CREATE TABLE test_sql.borrow_log(reader_id string, book_id string, borrow_date string);

INSERT INTO TABLE test_sql.borrow_log VALUES ('0001','002','2019-10-14'), ('0002','001','2019-10-13'), ('0003','005','2019-09-14'), ('0004','006','2019-08-15'), ('0005','003','2019-10-10'), ('0006','004','2019-17-13');

select * from test_sql.book;

select * from test_sql.reader;

select * from test_sql.borrow_log;

--(8)考虑到数据安全的需要,需定时将“借阅记录”中数据进行备份,请使用一条SQL语句,

-- 在备份用户bak下创建与“借阅记录”表结构完全一致的数据表BORROW_LOG_BAK. -- 井且将“借阅记录”中现有数据全部复制到BORROW_L0G_ BAK中。

create table test_sql.BORROW_LOG_BAK as select * from test_sql.borrow_log;

select * from test_sql.BORROW_LOG_BAK;

--(9)现在需要将原Oracle数据库中数据迁移至Hive仓库,

-- 请写出“图书”在Hive中的建表语句(Hive实现,提示:列分隔符|;

-- 数据表数据需要外部导入:分区分别以month_part、day_part 命名)

CREATE TABLE test_sql.book2

(   book_id   string,   `SORT`   string,   book_name string,   writer   string,   OUTPUT   string,   price     decimal(10, 2) )partitioned by (month_part string,day_part string )   row format delimited fields terminated by '|';

--(10)Hive中有表A,现在需要将表A的月分区 201505 中 -- user_id为20000的user_dinner字段更新为bonc8920,其他用户user_dinner字段数据不变,

-- 请列出更新的方法步骤。(Hive实现,提示:Hive中无update语法,请通过其他办法进行数据更新)

--A -- user_id   user_dinner part -- 20000       aaaaa     201505 -- 30000       bbbbb     201505

create table A (user_id int,user_dinner string) partitioned by (part string);

insert overwrite table A partition (part = '201505')

values (20000, 'aaaaa'),       (30000, 'bbbbb'),       (40000, 'ccccc');

select * from A;

--update A set user_dinner='bonc8920' where user_id=20000;

insert overwrite table A partition (part = '201505')

select user_id, 'bonc8920' as user_dinner from A where user_id=20000 and part = '201505'

union all

select user_id, user_dinner from A where user_id!=20000 and part = '201505' ;

-- 第8题:服务日志SQL统计

CREATE TABLE test_sql.test8(`date` string, interface string, ip string);

INSERT INTO TABLE test_sql.test8 VALUES

('2016-11-09 11:22:05','/api/user/login','110.23.5.23'), ('2016-11-09 11:23:10','/api/user/detail','57.3.2.16'), ('2016-11-09 23:59:40','/api/user/login','200.6.5.166'), ('2016-11-09 11:14:23','/api/user/login','136.79.47.70'), ('2016-11-09 11:15:23','/api/user/detail','94.144.143.141'), ('2016-11-09 11:16:23','/api/user/login','197.161.8.206'), ('2016-11-09 12:14:23','/api/user/detail','240.227.107.145'), ('2016-11-09 13:14:23','/api/user/login','79.130.122.205'), ('2016-11-09 14:14:23','/api/user/detail','65.228.251.189'), ('2016-11-09 14:15:23','/api/user/detail','245.23.122.44'), ('2016-11-09 14:17:23','/api/user/detail','22.74.142.137'), ('2016-11-09 14:19:23','/api/user/detail','54.93.212.87'), ('2016-11-09 14:20:23','/api/user/detail','218.15.167.248'), ('2016-11-09 14:24:23','/api/user/detail','20.117.19.75'), ('2016-11-09 15:14:23','/api/user/login','183.162.66.97'), ('2016-11-09 16:14:23','/api/user/login','108.181.245.147'), ('2016-11-09 14:17:23','/api/user/login','22.74.142.137'), ('2016-11-09 14:19:23','/api/user/login','22.74.142.137');

select * from test_sql.test8;

--求11月9号下午14点(14-15点),访问/api/user/login接口的top10的ip地址

select ip, count(1) cnt

from test_sql.test8

where date_format(`date`, 'yyyy-MM-dd HH') = '2016-11-09 14'

 and interface = '/api/user/login'

group by ip

order by cnt desc limit 10

;

-- 第9题:充值日志SQL实战

CREATE TABLE test_sql.test9( dist_id string COMMENT '区组id', account string COMMENT '账号',   `money` decimal(10,2) COMMENT '充值金额', create_time string COMMENT '订单时间');

INSERT INTO TABLE test_sql.test9 VALUES ('1','11',100006,'2019-01-02 13:00:01'), ('1','22',110000,'2019-01-02 13:00:02'), ('1','33',102000,'2019-01-02 13:00:03'), ('1','44',100300,'2019-01-02 13:00:04'), ('1','55',100040,'2019-01-02 13:00:05'), ('1','66',100005,'2019-01-02 13:00:06'), ('1','77',180000,'2019-01-03 13:00:07'), ('1','88',106000,'2019-01-02 13:00:08'), ('1','99',100400,'2019-01-02 13:00:09'), ('1','12',100030,'2019-01-02 13:00:10'), ('1','13',100003,'2019-01-02 13:00:20'), ('1','14',100020,'2019-01-02 13:00:30'), ('1','15',100500,'2019-01-02 13:00:40'), ('1','16',106000,'2019-01-02 13:00:50'), ('1','17',100800,'2019-01-02 13:00:59'), ('2','18',100800,'2019-01-02 13:00:11'), ('2','19',100030,'2019-01-02 13:00:12'), ('2','10',100000,'2019-01-02 13:00:13'), ('2','45',100010,'2019-01-02 13:00:14'), ('2','78',100070,'2019-01-02 13:00:15');

select * from test_sql.test9 order by dist_id , money desc;

--请写出SQL语句,查询充值日志表2019年01月02号每个区组下充值额最大的账号,要求结果:

--区组id,账号,金额,充值时间

select * from

(select *,       row_number() over (partition by dist_id order by money desc) rn

from test_sql.test9 where to_date(create_time)='2019-01-02') t

where t.rn=1;

-- 第10题:电商分组TopK实战

CREATE TABLE test_sql.test10( `dist_id` string COMMENT '区组id', `account` string COMMENT '账号', `gold` int COMMENT '金币');

INSERT INTO TABLE test_sql.test10 VALUES ('1','77',18), ('1','88',106), ('1','99',10), ('1','12',13), ('1','13',14), ('1','14',25), ('1','15',36), ('1','16',12), ('1','17',158), ('2','18',12), ('2','19',44), ('2','10',66), ('2','45',80), ('2','78',98);

year month amount 1991 1 1.1 1991 2 1.2 1991 3 1.3 1991 4 1.4 1992 1 2.1 1992 2 2.2 1992 3 2.3 1992 4 2.4 year m1 m2 m3 m4 1991 1.1 1.2 1.3 1.4 1992 2.1 2.2 2.3 2.4  

SQL强化面试题(pdf版)

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值