金九银十又是找工作的好季节啊,最近不少数据蛙数据分析社群同学,问到一些面试数据分析时的SQL题目,所以就结合大家的面试题目以及工作场景给大家总结了一些经典题目。同时也特别感谢001号同学和002号同学的帮忙整理
面试题目一
1.题目如下
2.下面开始建表、插入数据
create table datafrog_test1
(userid varchar(20),
changjing varchar(20),
inttime varchar(20)
);
insert into datafrog_test1 values
(1,1001,1400),
(2,1002,1401),
(1,1002,1402),
(1,1001,1402),
(2,1003,1403),
(2,1004,1404),
(3,1003,1400)
(4,1004,1402),
(4,1003,1403),
(4,1001,1403),
(4,1002,1404)
(5,1002,1402),
(5,1002,1403),
(5,1001,1404),
(5,1003,1405);
3.解答思路:排序及concat连接
select concat(t.userid,'-',group_concat(t.changjing separator'-')) as result
from(
select userid,changjing,inttime,
if(@tmp=userid,@rank:=@rank+1,@rank:=1) as new_rank,
@tmp:=userid as tmp
from (select userid,changjing, min(inttime) inttime from datafrog_test1 group by userid,changjing)temp
order by userid,inttime
)t
where t.new_rank<=2
group by t.userid;
4.输出结果:
5.注意:
有可能大家的代码会有报错现象,主要是ONLY_FULL_GROUP_BY引起的报错,解决办法是运行上面代码时,运行下这个就好set sql_mode='' 。其实mysql 作为查询还是不错的,但是拿来做分析的话,就是有点乏力了,像排序、中位数等都比较麻烦些的,工作中一般用pandas、sqlserver、oracle、hive、spark这些来做分析。这里可不是说mysql没用,反而是特别有用,也容易上手,是其他的基础。
面试题目二
1.题目如下
image.png
2.下面开始建表、插入数据
create database xiangji;
use xiangji;
create table userinfo(
uid varchar(10),
app_name varchar(20),
duration int(10),
times int(10),
dayno varchar(30)
);
load data infile 'D:/d.csv'
into table userinfo
fields terminated by ','
ignore 1 lines;
咱们数据蛙小伙伴给大家编辑了一份数据,大家文末查看哟
3.先看看活跃度的计算
select dayno, count(distinct uid) as 活跃度
from aui
where app_name='相机'
group by dayno ;
4.次日留存
使用两表自交,利用case when找到符合相差日期为1天的id,计数,得出次日留存人数,最后用distinct去重
select
a.day1,count(distinct case when day2-day1=1 then a.uid end) 次留
from
(select uid,date_format(dayno,'%Y%m%d')as day1 from aui where app_name='相机') a
#用date_format把dayno的文本格式改为可计算的形式
left join
(select uid,date_format(dayno,'%Y%m%d')as day2 from aui where app_name='相机') b
on a.uid=b.uid
group by a.day1;
5.计算次日、三日、七日留存
select
day1,count(distinct a.uid) 活跃,
count(distinct case when day2-day1=1 then a.uid end) 次留,
count(distinct case when day2-day1=3 then a.uid end) 三留,
count(distinct case when day2-day1=7 then a.uid end) 七留,
concat(count(distinct case when day2-day1=1 then a.uid end)/count(distinct a.uid)*100,'%') 次日留存率,
concat(count(distinct case when day2-day1=3 then a.uid end)/count(distinct a.uid)*100,'%') 三日留存率,
concat(count(distinct case when day2-day1=7 then a.uid end)/count(distinct a.uid)*100,'%') 七日留存率
from (select uid,date_format(dayno,'%Y%m%d') day1 from aui where app_name = '相机') a
left join (select uid,date_format(dayno,'%Y%m%d') day2 from aui where app_name = '相机') b
on a.uid=b.uid
group by day1;
知识点:date_format用法,sum()结合条件语句的用法,百分比符号concat添加
面试题目三
1.行转列(图中左变右)
1.行转列(图中左变右)
-- 创建 course 表
create table course (
id varchar(20),
teacher_id varchar(20),
week_day varchar(20),
has_course varchar(20)
);
insert into course value
(1,1,2,"Yes"),
(2,1,3,"Yes"),
(3,2,1,"Yes"),
(4,3,2,"Yes"),
(5,1,2,"Yes")
;
select * from course;
-- 行转列
select id,teacher_id,
(case when week_day = 1 then "Yes" else " " end) "mon",
(case when week_day = 2 then "Yes" else " " end) "tue",
(case when week_day = 3 then "Yes" else " " end) "thi",
(case when week_day = 4 then "Yes" else " " end) "thu",
(case when week_day = 5 then "Yes" else " " end) "fri"
from course;
方法二:
select id,teacher_id,
(case week_day when 1 then "Yes" else " " end) "mon",
(case week_day when 2 then "Yes" else " " end) "tue",
(case week_day when 3 then "Yes" else " " end) "thi",
(case week_day when 4 then "Yes" else " " end) "thu",
(case week_day when 5 then "Yes" else " " end) "fri"
from course;
知识点:case when的用法
4.结果如下:
面试题目四
1.问题
2.数据导入
create table a1 (
name varchar(20),
english int,
maths int,
music int);
insert into a1 values
("Jim",90,88,99);
3.解答如下
SELECT name, "english" AS subject, english AS score
FROM a1
UNION
SELECT name, "maths" AS subject, maths AS score
FROM a1
UNION
SELECT name, "music" AS subject, music AS score
FROM a1
ORDER BY name;
这个题目还有其他方法,大家思考下,主要是考察行转列的
面试题目五
1.问题
2.建表导入数据
create table A2 (
FDATE datetime,
value int
);
insert into a2 values
("2018/11/23",10),
("2018/12/31",3),
("2019/2/9",53),
("2019/3/31",23),
("2019/7/8",11),
("2019/7/31",10);
# delete from a2 where value = 10;
select * from a2;
3.解答第一问
-- 添加索引 给 FDATE;
create index id_FDATE on a2(FDATE);
show index from a2;
4.解答第二问
SELECT FYEAR, FMONTH, VALU AS VALUE, YSUM, SUM
FROM (
SELECT b1.*
, @YSUM := if(@year = fyear, @YSUM + valu, valu) AS YSUM
, @sum := @sum + valu AS SUM
, @year := fyear
FROM (
SELECT year(FDATE) AS FYEAR, month(Fdate) AS FMONTH
, SUM(VALUE) AS valu
FROM A2
GROUP BY FMONTH
ORDER BY FYEAR, FMONTH
) b1, (
SELECT @sum := 0, @YSUM := 0
, @YEAR := NULL
) b2
) B;
面试题目六
1.问题
image.png