2021_lg_01.sql_某招聘网站岗位数据分析案例(sql清洗,分析案例,纯代码)

库名:

recruitment;

1.新建库名

#CREATE DATABASE IF NOT EXISTS recruitment DEFAULT CHARSET utf8 COLLATE utf8_general_ci; 

CREATE DATABASE recruitment CHARSET utf8;

2.导入数据语句:

mysql -uroot -p recruitment< [recruitment.sql(文件路径)]
/*
1.使用数据库
*/
use recruitment;
/*
2.查看data数据表列名
*/
desc data;

/*
3.去除数据缺失
*/
DROP view if EXISTS v_data_clean_null;
CREATE VIEW v_data_clean_null as 
SELECT * FROM data
WHERE 
job_href is not null and job_href != '' and
 job_name is not null and job_name != '' and
 company_href is not null and company_href != '' and
 company_name is not null and company_name != '' and
 providesalary_text is not null and providesalary_text != '' and
 workarea is not null and workarea != '' and
 workarea_text is not null and workarea_text != '' and
 companytype_text is not null and companytype_text != '' and
 degreefrom is not null and degreefrom != '' and
 workyear is not null and workyear != '' and
 updatedate is not null and updatedate != '' and
 issuedate is not null and issuedate != '' and
 parse2_job_detail is not null and parse2_job_detail != '';
 
/*
4.查看条数
*/
SELECT count(1) FROM  v_data_clean_null;
SELECT count(1) FROM data;

/*
5.数据去重 运用窗口函数排序去重
*/
 DROP view if EXISTS v_data_clean_distinct;
 create view  v_data_clean_distinct
 as
 with t1 as
 (
 SELECT 
 *,
 ROW_NUMBER() 
 over
(PARTITION by company_name,job_name ORDER BY issuedate desc) 
 as 
 row_distinct
FROM 
v_data_clean_null
)
SELECT * FROM t1 WHERE  t1.row_distinct=1
;
 
 /*
6.北上广深筛选
*/

 DROP view if EXISTS v_data_clean_workplace;
 create view  v_data_clean_workplace
 as
with p as 
(
SELECT 
*,
(
case 
when workarea_text LIKE '%北京%'  then '北京'
when workarea_text LIKE '%上海%'  then '上海'
when workarea_text LIKE '%广州%'  then '广州'
when workarea_text LIKE '%深圳%'  then '深圳'
end
) as work_place
FROM v_data_clean_distinct

)

SELECT * FROM p WHERE p.work_place is not null;

 /*
7.数据分析师筛选
*/

 DROP view if EXISTS v_data_clean_data;
 create view  v_data_clean_data
 as

SELECT 
* 
FROM 
v_data_clean_workplace 
WHERE 
job_name 
LIKE
'%数据%'
;


 /*
8。清洗完成,建表
*/
DROP VIEW if EXISTS v_data_clean;
CREATE view v_data_clean
as
SELECT * FROM v_data_clean_data;


 /*
9.岗位数量分析
*/


CREATE view v_data_market_damand as 
SELECt
work_place '城市',
sum(degreefrom) '岗位总数量',
count(DISTINCT job_name) '职位数量'
FROM 
v_data_clean
GROUP BY 
work_place order by '岗位总数量' desc;



 /*
10.企业类型岗位人数分布
*/
with p as 
(
SELECT 

companytype_text ,
sum(degreefrom) drg_sum
FROM 
v_data_clean
GROUP BY
companytype_text

)

SELECT 
companytype_text '企业类型',
drg_sum '招聘数量',
concat(ROUND(drg_sum/(SELECT sum(drg_sum) FROM p) ,4)*100,'%') as '招聘占比'
FROM p ORDER BY drg_sum desc;

 /*
11.薪资分析
*/
drop view if EXISTS v_data_clean_unit;

CREATE VIEW v_data_clean_unit
AS
with p as 
(
SELECT 
*,

(
case 
when providesalary_text LIKE '%万/月' then 10000
when providesalary_text LIKE '%千/月' then 1000
when providesalary_text LIKE '%万/年' then 833
end
)
unit,
(
case 
when providesalary_text LIKE '%万/月' then '万/月'
when providesalary_text LIKE '%千/月' then '千/月'
when providesalary_text LIKE '%万/年' then '万/年'
end
)
unit_2
FROM v_data_clean
)

SELECT *,SUBSTRING_INDEX(providesalary_text,unit_2,1) fee

 FROM p ;
 
 
 
 drop view if EXISTS v_data_clean_salary;
 CREATE VIEW v_data_clean_salary
AS
 SELECT 
*,
 cast(
 (
 SUBSTRING_INDEX(fee,'-',1)
 +
 SUBSTRING_INDEX(fee,'-',-1)
 )/2 as DECIMAL(6,2))*unit 'salary'
 FROM v_data_clean_unit WHERE unit_2 is not null;
 
SELECT 
workyear '工作年限' ,
AVG(salary) '平均薪资'
FROM 
v_data_clean_salary
GROUP BY 
workyear
ORDER BY 
LENGTH(workyear),workyear
asc
;


create view v_data_companytype_salary as
select
 companytype_text as '企业类型',
 avg(salary) as '平均薪资'
from v_data_clean_salary
group by companytype_text
order by avg(salary) desc
;
select * from v_data_companytype_salary;

-- 第一步,获取前30名高频技能点及其出现频数

create view v_data_skill_quantity as
(with p as
(select
 skill,
 count(*) as quantity
from skill_table st
inner join v_data_clean v
on v.parse2_job_detail like concat('%',st.skill,'%')
group by st.skill)
select
 *
from p
order by quantity desc
limit 30)
;
select * from v_data_skill_quantity;



-- 2
create view v_data_skill as
select
 skill as '技能点',
 quantity as '出现频数',
 concat(cast(quantity / total_quantity * 100 as decimal
(4,2)),'%') as '出现频率'
from
 v_data_skill_quantity,
 (select count(*) as total_quantity from v_data_clean) as f
;
select * from v_data_skill

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Captain_Data

打赏一下~

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值