IT面试真题详解SQL（领取SQL详解一份）

最新推荐文章于 2023-02-14 15:30:23 发布

骨灰级收藏家

最新推荐文章于 2023-02-14 15:30:23 发布

阅读量762

点赞数

分类专栏：面试互联网工具文章标签：面试 sql java

本文链接：https://blog.csdn.net/JACK_SUJAVA/article/details/126404235

版权

互联网同时被 3 个专栏收录

316 篇文章 25 订阅

订阅专栏

面试

80 篇文章 2 订阅

订阅专栏

工具

33 篇文章 0 订阅

订阅专栏

【课程简介】

SQL不仅在工作中使用频繁，在IT面试中SQL题出现频率也很高，而且题目占比越来越大，经常有同学面试挂在SQL题不会做痛失offer。本次课程将从技术使用跟面试真题两方面展开讲解，分享高效的SQL代码写法，告别拐弯抹角长篇大论轻松满足工作需求；并且将40多家大厂面试真题整理出SQL题集，把常见专题总结出简易解法，套上公式就会做。助力大家轻松掌握SQL！

【教程推荐】IT面试真题详解SQL（一）

【主讲内容】

1.怎样套上公式就会做

2.40+真实大厂原题解法

3.快如闪电高效SQL写法

【主讲教师】

金牌讲师：陈老师

前中国平安高级技术专家，有9年大数据研发经验，精通SQL授课通俗易懂。

1. SQL执行顺序

上面的SQL语句的执行顺序是: from (去加载table1 和 table2这2个表 ) -> join -> on -> where -> group by->select 后面的普通字段，聚合函数count,sum -> having -> distinct -> order by -> limit

--举例：

select a.sex, b.city, count(1) as cnt, sum(salary) as sum1

from table1 a

join table2 b on a.id=b.id

where a.name=b.name

group by a.sex,b.city

having cnt>=2

order by a.sex,b.city

limit 10

--或者是

select distinct

a.sex, b.city, a.age

from table1 a

join table2 b on a.id=b.id

where a.name=b.name

order by a.sex,b.city

limit 10

--on 和where的先后顺序讨论

--下面用left join 各得到结果，结果不一样。

--下面可知，先执行on，再执行where

select *

from table1 a left join table2 b

on a.id=b.id

where a.name=b.name;

--下面的条数可能会比上面多。

select *

from table1 a left join table2 b

2. hive10题

先配置环境在pycharm或datagrip或idea中配置hive数据源。也可以配置一个sparkSQL数据源，来加快速度。如果配置hive数据源：需要提前启动hdfs和yarn，hive的metastore，hive的hiveserver2

on a.id=b.id

and a.name=b.name;

--下面用inner join 各得到结果，结果是一样的

select *

from table1 a

join table2 b

on a.id=b.id

where a.name=b.name;

select *

from table1 a

join table2 b

on a.id=b.id

and a.name=b.name;

#启动hdfs和yarn

start-all.sh

# hive的metastore

nohup /export/server/hive/bin/hive --service metastore 2>&1 > /tmp/hivemetastore.log &

#hive的hiveserver2 #hiveserver2开启后，等过2分钟后才能生效。

nohup /export/server/hive/bin/hive --service hiveserver2 2>&1 > /tmp/hivehiveserver2.log &

如果遇到下面的问题解决办法如果配置SparkSQL数据源需要提前启动hdfs，hive的metastore，Spark的Thriftserver服务。下面是spark3集成hive3需要的jar包，如果是spark2集成hive2，则jar包不一样。

hive/conf/hive-env.sh中加入

export HADOOP_CLIENT_OPTS=" -Xmx512m" export HADOOP_HEAPSIZE=1024

改完重启hiveserver2

#启动hdfs和yarn

start-all.sh

# hive的metastore

nohup /export/server/hive/bin/hive --service metastore 2>&1 > /tmp/hivemetastore.log &

#Spark的Thriftserver服务

/export/server/spark/sbin/start-thriftserver.sh \ --hiveconf hive.server2.thrift.port=10001 \ --hiveconf hive.server2.thrift.bind.host=node1 \ --master local[*]

show databases ;

create database if not exists test_sql; use test_sql;

-- 一些语句会走 MapReduce，所以慢。可以开启本地化执行的优化。

set hive.exec.mode.local.auto=true;-- (默认为false) --第1题：访问量统计

CREATE TABLE test_sql.test1 ( userId string, visitDate string, visitCount INT ) ROW format delimited FIELDS TERMINATED BY "\t";

INSERT overwrite TABLE test_sql.test1

VALUES

( 'u01', '2017/1/21', 5 ), ( 'u02', '2017/1/23', 6 ), ( 'u03', '2017/1/22', 8 ), ( 'u04', '2017/1/20', 3 ), ( 'u01', '2017/1/23', 6 ), ( 'u01', '2017/2/21', 8 ), ( 'u02', '2017/1/23', 6 ), ( 'u01', '2017/2/22', 4 );

select *, sum(sum1) over(partition by userid order by month1 /*rows between unbounded preceding and current row*/ ) as `累积` from

(select userid, date_format(replace(visitdate,'/','-'),'yyyy-MM') as month1, sum(visitcount) sum1

from test_sql.test1

group by userid, date_format(replace(visitdate,'/','-'),'yyyy-MM')) as t;

-- 第2题：电商场景TopK统计

CREATE TABLE test_sql.test2 ( user_id string, shop string ) ROW format delimited FIELDS TERMINATED BY '\t';

INSERT INTO TABLE test_sql.test2 VALUES

( 'u1', 'a' ), ( 'u2', 'b' ), ( 'u1', 'b' ), ( 'u1', 'a' ), ( 'u3', 'c' ), ( 'u4', 'b' ), ( 'u1', 'a' ), ( 'u2', 'c' ), ( 'u5', 'b' ), ( 'u4', 'b' ), ( 'u6', 'c' ), ( 'u2', 'c' ), ( 'u1', 'b' ), ( 'u2', 'a' ), ( 'u2', 'a' ), ( 'u3', 'a' ), ( 'u5', 'a' ), ( 'u5', 'a' ), ( 'u5', 'a' );

--（1）每个店铺的UV（访客数）

-- UV和PV -- PV是访问当前网站所有的次数

-- UV是访问当前网站的客户数(需要去重)--(2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数

select shop, count(distinct user_id) as uv

from test_sql.test2 group by shop ;

--上面的拆解来看，等价于

--distinct后可以接多个字段，表示联合去重

select shop, count(user_id) as uv

from

(select distinct shop, user_id

from test_sql.test2 ) as t

group by shop ;

--也等价于

select shop, count(user_id) as uv

from

(select shop, user_id

from test_sql.test2 group by shop, user_id) as t

group by shop ;

select * from

(select *, row_number() over (partition by shop order by cnt desc) as rn

from

(select shop,user_id,count(1) as cnt from test_sql.test2 group by shop,user_id ) as t) t2

where t2.rn3;

-- 第3题：订单量统计

CREATE TABLE test_sql.test3 ( dt string, order_id string, user_id string, amount DECIMAL ( 10, 2 ) ) ROW format delimited FIELDS TERMINATED BY '\t';

INSERT overwrite TABLE test_sql.test3 VALUES

('2017-01-01','10029028','1000003251',33.57), ('2017-01-01','10029029','1000003251',33.57), ('2017-01-01','100290288','1000003252',33.57), ('2017-02-02','10029088','1000003251',33.57), ('2017-02-02','100290281','1000003251',33.57), ('2017-02-02','100290282','1000003253',33.57), ('2017-11-02','10290282','100003253',234), ('2018-11-02','10290284','100003243',234);

-- (1)给出 2017年每个月的订单数、用户数、总成交金额。

-- (2)给出2017年11月的新客数(指在11月才有第一笔订单)

select date_format(dt,'yyyy-MM') as month1, count(distinct order_id) as cnt1, count(distinct user_id) as cnt2, sum(amount) as amt from test_sql.test3

where year(dt)=2017

group by date_format(dt,'yyyy-MM');

select count(user_id) cnt from

(select user_id, min(date_format(dt,'yyyy-MM')) min_month

from test3 group by user_id) as t where min_month='2017-11';

--统计每个月的新客户数

select min_month, count(user_id) cnt

from (select user_id, min(date_format(dt, 'yyyy-MM')) min_month from test3 group by user_id) as t

group by min_month;

-- 第4题：大数据排序统计

CREATE TABLE test_sql.test4user (user_id string,name string,age int);

CREATE TABLE test_sql.test4log (user_id string,url string);

INSERT INTO TABLE test_sql.test4user VALUES('001','u1',10), ('002','u2',15), ('003','u3',15), ('004','u4',20), ('005','u5',25), ('006','u6',35), ('007','u7',40), ('008','u8',45), ('009','u9',50), ('0010','u10',65);

INSERT INTO TABLE test_sql.test4log VALUES('001','url1'), ('002','url1'), ('003','url2'), ('004','url3'), ('005','url3'), ('006','url1'), ('007','url5'), ('008','url7'), ('009','url5'), ('0010','url1');

select * from test_sql.test4user ;

select * from test_sql.test4log ;

--有一个5000万的用户文件(user_id，name，age)，

-- 一个2亿记录的用户看电影的记录文件(user_id，url)，根据年龄段观看电影的次数进行排序？

--取整函数有 round，floor，ceil

select *, round(x,0) as r,--四舍五入

floor(x) as f,--向下取整

ceil(x) as c--向上取整

from

(select 15/10 as x union all

select 18/10 as x union all

select 24/10 as x union all

select 27/10 as x ) as t;

select type, sum(cnt) as sum1

from

(select *, concat(floor(age/10)*10,'-',floor(age/10)*10+10) as type

from test_sql.test4user as a

-- join前最好提前减小数据量

join (select user_id,count(url) as cnt from test_sql.test4log group by user_id) as b

on a.user_id=b.user_id) as t

group by type

order by sum(cnt) desc;

-- 第5题：活跃用户统计

CREATE TABLE test5( dt string, user_id string, age int) ROW format delimited fields terminated BY ',';

INSERT overwrite TABLE test_sql.test5 VALUES ('2019-02-11','test_1',23), ('2019-02-11','test_2',19), ('2019-02-11','test_3',39), ('2019-02-11','test_1',23), ('2019-02-11','test_3',39), ('2019-02-11','test_1',23), ('2019-02-12','test_2',19), ('2019-02-13','test_1',23), ('2019-02-15','test_2',19), ('2019-02-16','test_2',19);

select * from test_sql.test5 order by dt,user_id;

--有日志如下，请写出代码求得所有用户和活跃用户的总数及平均年龄。（活跃用户指连续两天都有访问记录的用户）

-- type 总数平均年龄

-- '所有用户' 3 27 -- '活跃用户' 1 19

with t1 as (select distinct dt, user_id,age from test_sql.test5), t2 as (select *,row_number() over (partition by user_id order by dt) as rn from t1 ), t3 as (select *,date_sub(dt,rn) as dt2 from t2), t4 as (select dt2,user_id,age,count(1) cnt from t3 group by dt2,user_id,age), t5 as (select * from t4 where cnt>=2), t6 as (select distinct user_id,age from t5)

select '所有用户' as type, count(user_id) cnt,avg(age) as avg_age

from (select distinct user_id,age from test_sql.test5) t union all

select '活跃用户' as type, count(user_id) cnt,avg(age) as avg_age from t6;

-- 用思路2来分析连续2天登录

with t1 as (select distinct dt, user_id from test_sql.test5), t2 as (select *, date_add(dt,1) as dt2, lead(dt,1)over(partition by user_id order by dt) as dt3 from t1)

select count(distinct user_id) from t2 where dt2=dt3;

-- 第6题：电商购买金额统计实战

CREATE TABLE test_sql.test6 ( userid string, money decimal(10,2), paymenttime string, orderid string);

INSERT INTO TABLE test_sql.test6 VALUES('001',100,'2017-10-01','123'), ('001',200,'2017-10-02','124'), ('002',500,'2017-10-01','125'), ('001',100,'2017-11-01','126');

select * from test_sql.test6 order by userid,paymenttime;

--请用sql写出所有用户中在今年10月份第一次购买商品的金额，

select userid,paymenttime,money

from

(select *, row_number() over (partition by userid order by paymenttime) as rn from test_sql.test6 where date_format(paymenttime,'yyyy-MM')='2017-10' ) as t

where t.rn=1

;

-- 第7题：教育领域SQL实战

CREATE TABLE test_sql.book(book_id string, `SORT` string, book_name string, writer string, OUTPUT string, price decimal(10,2));

INSERT INTO TABLE test_sql.book VALUES

('001','TP391','信息处理','author1','机械工业出版社','20'), ('002','TP392','数据库','author12','科学出版社','15'), ('003','TP393','计算机网络','author3','机械工业出版社','29'), ('004','TP399','微机原理','author4','科学出版社','39'), ('005','C931','管理信息系统','author5','机械工业出版社','40'), ('006','C932','运筹学','author6','科学出版社','55');

CREATE TABLE test_sql.reader (reader_id string, company string, name string, sex string, grade string, addr string);

INSERT INTO TABLE test_sql.reader VALUES

('0001','阿里巴巴','jack','男','vp','addr1'), ('0002','百度','robin','男','vp','addr2'), ('0003','腾讯','tony','男','vp','addr3'), ('0004','京东','jasper','男','cfo','addr4'), ('0005','网易','zhangsan','女','ceo','addr5'), ('0006','搜狐','lisi','女','ceo','addr6');

CREATE TABLE test_sql.borrow_log(reader_id string, book_id string, borrow_date string);

INSERT INTO TABLE test_sql.borrow_log VALUES ('0001','002','2019-10-14'), ('0002','001','2019-10-13'), ('0003','005','2019-09-14'), ('0004','006','2019-08-15'), ('0005','003','2019-10-10'), ('0006','004','2019-17-13');

select * from test_sql.book;

select * from test_sql.reader;

select * from test_sql.borrow_log;

--（8）考虑到数据安全的需要，需定时将“借阅记录”中数据进行备份，请使用一条SQL语句，

-- 在备份用户bak下创建与“借阅记录”表结构完全一致的数据表BORROW_LOG_BAK. -- 井且将“借阅记录”中现有数据全部复制到BORROW_L0G_ BAK中。

create table test_sql.BORROW_LOG_BAK as select * from test_sql.borrow_log;

select * from test_sql.BORROW_LOG_BAK;

--（9）现在需要将原Oracle数据库中数据迁移至Hive仓库，

-- 请写出“图书”在Hive中的建表语句（Hive实现，提示：列分隔符|；

-- 数据表数据需要外部导入：分区分别以month＿part、day＿part 命名）

CREATE TABLE test_sql.book2

( book_id string, `SORT` string, book_name string, writer string, OUTPUT string, price decimal(10, 2) )partitioned by (month_part string,day_part string ) row format delimited fields terminated by '|';

--（10）Hive中有表A，现在需要将表A的月分区 201505 中 -- user＿id为20000的user＿dinner字段更新为bonc8920，其他用户user＿dinner字段数据不变，

-- 请列出更新的方法步骤。（Hive实现，提示：Hive中无update语法，请通过其他办法进行数据更新）

--A -- user_id user_dinner part -- 20000 aaaaa 201505 -- 30000 bbbbb 201505

create table A (user_id int,user_dinner string) partitioned by (part string);

insert overwrite table A partition (part = '201505')

values (20000, 'aaaaa'), (30000, 'bbbbb'), (40000, 'ccccc');

select * from A;

--update A set user_dinner='bonc8920' where user_id=20000;

insert overwrite table A partition (part = '201505')

select user_id, 'bonc8920' as user_dinner from A where user_id=20000 and part = '201505'

union all

select user_id, user_dinner from A where user_id!=20000 and part = '201505' ;

-- 第8题：服务日志SQL统计

CREATE TABLE test_sql.test8(`date` string, interface string, ip string);

INSERT INTO TABLE test_sql.test8 VALUES

('2016-11-09 11:22:05','/api/user/login','110.23.5.23'), ('2016-11-09 11:23:10','/api/user/detail','57.3.2.16'), ('2016-11-09 23:59:40','/api/user/login','200.6.5.166'), ('2016-11-09 11:14:23','/api/user/login','136.79.47.70'), ('2016-11-09 11:15:23','/api/user/detail','94.144.143.141'), ('2016-11-09 11:16:23','/api/user/login','197.161.8.206'), ('2016-11-09 12:14:23','/api/user/detail','240.227.107.145'), ('2016-11-09 13:14:23','/api/user/login','79.130.122.205'), ('2016-11-09 14:14:23','/api/user/detail','65.228.251.189'), ('2016-11-09 14:15:23','/api/user/detail','245.23.122.44'), ('2016-11-09 14:17:23','/api/user/detail','22.74.142.137'), ('2016-11-09 14:19:23','/api/user/detail','54.93.212.87'), ('2016-11-09 14:20:23','/api/user/detail','218.15.167.248'), ('2016-11-09 14:24:23','/api/user/detail','20.117.19.75'), ('2016-11-09 15:14:23','/api/user/login','183.162.66.97'), ('2016-11-09 16:14:23','/api/user/login','108.181.245.147'), ('2016-11-09 14:17:23','/api/user/login','22.74.142.137'), ('2016-11-09 14:19:23','/api/user/login','22.74.142.137');

select * from test_sql.test8;

--求11月9号下午14点（14-15点），访问/api/user/login接口的top10的ip地址

select ip, count(1) cnt

from test_sql.test8

where date_format(`date`, 'yyyy-MM-dd HH') = '2016-11-09 14'

and interface = '/api/user/login'

group by ip

order by cnt desc limit 10

;

-- 第9题：充值日志SQL实战

CREATE TABLE test_sql.test9( dist_id string COMMENT '区组id', account string COMMENT '账号', `money` decimal(10,2) COMMENT '充值金额', create_time string COMMENT '订单时间');

INSERT INTO TABLE test_sql.test9 VALUES ('1','11',100006,'2019-01-02 13:00:01'), ('1','22',110000,'2019-01-02 13:00:02'), ('1','33',102000,'2019-01-02 13:00:03'), ('1','44',100300,'2019-01-02 13:00:04'), ('1','55',100040,'2019-01-02 13:00:05'), ('1','66',100005,'2019-01-02 13:00:06'), ('1','77',180000,'2019-01-03 13:00:07'), ('1','88',106000,'2019-01-02 13:00:08'), ('1','99',100400,'2019-01-02 13:00:09'), ('1','12',100030,'2019-01-02 13:00:10'), ('1','13',100003,'2019-01-02 13:00:20'), ('1','14',100020,'2019-01-02 13:00:30'), ('1','15',100500,'2019-01-02 13:00:40'), ('1','16',106000,'2019-01-02 13:00:50'), ('1','17',100800,'2019-01-02 13:00:59'), ('2','18',100800,'2019-01-02 13:00:11'), ('2','19',100030,'2019-01-02 13:00:12'), ('2','10',100000,'2019-01-02 13:00:13'), ('2','45',100010,'2019-01-02 13:00:14'), ('2','78',100070,'2019-01-02 13:00:15');

select * from test_sql.test9 order by dist_id , money desc;

--请写出SQL语句，查询充值日志表2019年01月02号每个区组下充值额最大的账号，要求结果：

--区组id，账号，金额，充值时间

select * from

(select *, row_number() over (partition by dist_id order by money desc) rn

from test_sql.test9 where to_date(create_time)='2019-01-02') t

where t.rn=1;

-- 第10题：电商分组TopK实战

CREATE TABLE test_sql.test10( `dist_id` string COMMENT '区组id', `account` string COMMENT '账号', `gold` int COMMENT '金币');

INSERT INTO TABLE test_sql.test10 VALUES ('1','77',18), ('1','88',106), ('1','99',10), ('1','12',13), ('1','13',14), ('1','14',25), ('1','15',36), ('1','16',12), ('1','17',158), ('2','18',12), ('2','19',44), ('2','10',66), ('2','45',80), ('2','78',98);

year month amount 1991 1 1.1 1991 2 1.2 1991 3 1.3 1991 4 1.4 1992 1 2.1 1992 2 2.2 1992 3 2.3 1992 4 2.4 year m1 m2 m3 m4 1991 1.1 1.2 1.3 1.4 1992 2.1 2.2 2.3 2.4

SQL强化面试题（pdf版）