面试题目一
1.题目如下
2.下面开始建表、插入数据
create table datafrog_test1
(userid varchar(20),
changjing varchar(20),
inttime varchar(20)
);
insert into datafrog_test1 values
(1,1001,1400),
(2,1002,1401),
(1,1002,1402),
(1,1001,1402),
(2,1003,1403),
(2,1004,1404),
(3,1003,1400)
(4,1004,1402),
(4,1003,1403),
(4,1001,1403),
(4,1002,1404)
(5,1002,1402),
(5,1002,1403),
(5,1001,1404),
(5,1003,1405);
3.解答思路:排序及concat连接
select concat(t.userid,'-',group_concat(t.changjing separator'-')) as result
from(
select userid,changjing,inttime,
if(@tmp=userid,@rank:=@rank+1,@rank:=1) as new_rank,
@tmp:=userid as tmp
from (select userid,changjing, min(inttime) inttime from datafrog_test1 group by userid,changjing)temp
order by userid,inttime
)t
where t.new_rank<=2
group by t.userid;
4.输出结果:
5.注意:
有可能大家的代码会有报错现象,主要是ONLY_FULL_GROUP_BY引起的报错,解决办法是运行上面代码时,运行下这个就好set sql_mode='' 。其实mysql 作为查询还是不错的,但是拿来做分析的话,就是有点乏力了,像排序、中位数等都比较麻烦些的,工作中一般用pandas、sqlserver、oracle、hive、spark这些来做分析。这里可不是说mysql没用,反而是特别有用,也容易上手,是其他的基础。
6.大家来看下hive解法
with tmp as (
select
userid,
changjing,order_num,changjing1
from
(SELECT userid ,
changjing,
row_number() over(partition by userid order by inttime asc) as order_num,
lag(changjing,1,'datafrog') OVER(partition by userid order by inttime asc) AS changjing1
FROM datafrog_test1) as a
where changjing!=changjing1)
, tmp2 as (
select userid,changjing,order_num,changjing1,
row_number() over(partition by userid order by order_num ) as changjing_num
from tmp
)
select concat( userid,'-',concat_ws('-', collect_set(changjing)) )
from tmp2 where changjing_num <3
group by userid
面试题目二
1.题目如下
2.下面开始建表、插入数据
create database xiangji;
use xiangji;
create table userinfo(
uid varchar(10),
app_name varchar(20),
duration int(10),
times int(10),
dayno varchar(30)
);
load data infile 'D:/d.csv'
into table userinfo
fields terminated by ','
ignore 1 lines;
咱们数据蛙小伙伴给大家编辑了一份数据,大家文末查看哟3.先看看活跃度的计算
select dayno, count(distinct uid) as 活跃度
from aui
where app_name='相机'
group by dayno ;
4.次日留存
使用两表自交,利用case when找到符合相差日期为1天的id,计数,得出次日留存人数,最后用distinct去重
select
a.day1,count(distinct case when day2-day1=1 then a.uid end) 次留
from
(select uid,date_format(dayno,'%Y%m%d')as day1 from aui where app_name='相机') a
#用date_format把dayno的文本格式改为可计算的形式
left join
(select uid,date_format(dayno,'%Y%m%d')as day2 from aui where app_name='相机') b
on a.uid=b.uid
group by a.day1;
5.计算次日、三日、七日留存
select
day1,count(distinct a.uid) 活跃,
count(distinct case when day2-day1=1 then a.uid end) 次留,
count(distinct case when day2-day1=3 then a.uid end) 三留,
count(distinct case when day2-day1=7 then a.uid end) 七留,
concat(count(distinct case when day2-day1=1 then a.uid end)/count(distinct a.uid)*100,'%') 次日留存率,
concat(count(distinct case when day2-day1=3 then a.uid end)/count(distinct a.uid)*100,'%') 三日留存率,
concat(count(distinct case when day2-day1=7 then a.uid end)/count(distinct a.uid)*100,'%') 七日留存率
from (select uid,date_format(dayno,'%Y%m%d') day1 from aui where app_name = '相机') a
left join (select uid,date_format(dayno,'%Y%m%d') day2 from aui where app_name = '相机') b
on a.uid=b.uid
group by day1;
知识点:date_format用法,sum()结合条件语句的用法,百分比符号concat添加
面试题目三
1.行转列(图中左变右)
1.行转列(图中左变右)
-- 创建 course 表
create table course (
id varchar(20),
teacher_id varchar(20),
week_day varchar(20),
has_course varchar(20)
);
insert into course value
(1,1,2,"Yes"),
(2,1,3,"Yes"),
(3,2,1,"Yes"),
(4,3,2,"Yes"),
(5,1,2,"Yes")
;
select * from course;
-- 行转列
select id,teacher_id,
(case when week_day = 1 then "Yes" else " " end) "mon",
(case when week_day = 2 then "Yes" else " " end) "tue",
(case when week_day = 3 then "Yes" else " " end) "thi",
(case when week_day = 4 then "Yes" else " " end) "thu",
(case when week_day = 5 then "Yes" else " " end) "fri"
from course;
方法二:
select id,teacher_id,
(case week_day when 1 then "Yes" else " " end) "mon",
(case week_day when 2 then "Yes" else " " end) "tue",
(case week_day when 3 then "Yes" else " " end) "thi",
(case week_day when 4 then "Yes" else " " end) "thu",
(case week_day when 5 then "Yes" else " " end) "fri"
from course;
知识点:case when的用法
4.结果如下:
面试题目四
1.问题
2.数据导入
create table a1 (
name varchar(20),
english int,
maths int,
music int);
insert into a1 values
("Jim",90,88,99);
3.解答如下
SELECT name, "english" AS subject, english AS score
FROM a1
UNION
SELECT name, "maths" AS subject, maths AS score
FROM a1
UNION
SELECT name, "music" AS subject, music AS score
FROM a1
ORDER BY name;
这个题目还有其他方法,大家思考下,主要是考察行转列的
面试题目五
1.问题
2.建表导入数据
create table A2 (
FDATE datetime,
value int
);
insert into a2 values
("2018/11/23",10),
("2018/12/31",3),
("2019/2/9",53),
("2019/3/31",23),
("2019/7/8",11),
("2019/7/31",10);
# dele