(秦路)七周成为数据分析师(第五周)—— MySQL

主键:
表的主键不做强制要求,但建议设立
主键值必须唯一
每行必须有一个主键,不可为空
主键值不可被修改
主键值被删除后不可重用
表A的主键,可以做为表B的字段,此时不受约束
数据类型
在这里插入图片描述
条件select
and条件优先级较高,会把and连接的条件作为一个条件

# 大于
SELECT * FROM data.dataanalyst
where companyid>'3770'
# 等于
SELECT * FROM data.dataanalyst
where companyid='3770'
# 在某个区间
SELECT * FROM data.dataanalyst
where companyid between '3770' and '4000'
# 只有某几个
SELECT * FROM data.dataanalyst
where companyid in ('3770','4000')
# 没有某几个
SELECT * FROM data.dataanalyst
where companyid not in ('3770','4000')
# 不同条件
SELECT * FROM data.dataanalyst
where companyid not in ('3770','4000')
and city='上海'
# and连接的作为一个条件,和or并列
SELECT * FROM data.dataanalyst
where companyid not in ('3770','4000')
and city='上海'
or workyear='应届毕业生'
# 实际效果为
SELECT * FROM data.dataanalyst
where (companyid not in ('3770','4000')
and city='上海')
or workyear='应届毕业生'
# 要实现companyid为必要条件,可改为
SELECT * FROM data.dataanalyst
where companyid not in ('3770','4000')
and (city='上海'
or workyear='应届毕业生')
# 包含某个词
SELECT * FROM data.dataanalyst
where secondtype like '开发'

group by

select * from dataanalyst
group by city;
# 分组前和后筛选
select * from dataanalyst
where industryField like '%电子商务%'
group by city
having count(positionId)>=50;
# 分组后筛选
select * from dataanalyst
group by city
having count(if(industryField like '%电子商务%',1,null))>=50;

函数

# left字符串从左边的前几位
select left(salary,1),salary from dataanalyst;
# 定位所选字符在字符串中的位置,4是从哪一位开始查找
select left(salary,1),locate('k',salary,4),salary from dataanalyst;
select left(salary,locate('k',salary)),salary from dataanalyst;

select left(salary,locate('k',salary)-1),
left(right(salary,length(salary)-locate('-',salary)),length(salary)-locate('-',salary)-1),
salary
from dataanalyst;

命令行加载
load data local infile '文件位置' into table data.userinfo fields terminated by ','; # 用逗号分隔

# 统计用户三月份的回购率和复购率
select t1.name,count(t1.m),count(t2.m)
from (select useid,data_format(paidtime,'%Y-%m-%1') as m
from data.orderinfo
where ispaid='已支付'
group by userid,data_format(paidtime,'%Y-%m-%1')
) t1
left join(select useid,data_format(paidtime,'%Y-%m-%1') as m
from data.orderinfo
where ispaid='已支付'
group by userid,data_format(paidtime,'%Y-%m-%1')
) t2
on t1.userid=t2.userid and t1.m=date_sub(t2.m,interval 1 month)
group by t1.m
# 统计男女用户的消费频次是否有差异
# 不同用户的消费频次
select o.userid,o.sex,count(1) from orderinfo o
inner join (
select * from userinfo
where sex <> ''
) t
on o.userid=t.userid
group by userid,sex
# 男女的消费频次
select sex,avg(ct) from (
select o.userid,o.sex,count(1) ct from orderinfo o
inner join (
select * from userinfo
where sex <> ''
) t
on o.userid=t.userid
group by userid,sex
)t2
group by sex;
# 统计多次消费的用户,第一次和最后一次消费间隔是多久
select userid,datadiff(max(paidtime),min(paidtime))
from orderinfo
where ispaid='已支付'
group by userid
having count(1)>1
# 统计不同年龄段,用户的消费金额是否有差异
select age from(
select userid,age,count(userid) from orderinfo o
left join(
select userid,ceil((year(now())-year(birth))/10) age from userinfo 
where birth>'1901-00-00' and ispaid='已支付') t
on o.useid=t.userid
group by o.userid,age) t2
group by age;
# 统计消费的二八法则,消费的20top用户,贡献了多少额度
select count(1),sum(total) from (
select userid,sum(price) as total
from orderinfo
where ispaid='已支付'
group by userid
order by total desc
limit 17000) t;

参考链接:七周成为数据分析师

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值