物流行业数据分析

数据来源:某企业销售的 6 种商品所对应的送货及用户反馈数据

解决问题

1 、配送服务是否存在问题。
2 、是否存在尚有潜力的销售区域。
3 、商品是否存在质量问题。

结论如下

1 、货品 4 西北,货品 2 马来西亚两条线路存在较大问题,急需提升时效。
2 、货品 2 在华东地区还有较大市场空间,适合加大投入,同时货品 2 在西北配送时效长,用户拒收率高,从成本角度考虑,应减少投入。
3 、货品 1 2 4 质量存在问题,建议扩大抽检范围,增大质检力度。

数据分析

1 数据预处理

#新建表,导入数据
create table logistics_table (
order_num varchar (7),
order_line int (2),
sale_date varchar (12),
delivery_date varchar (12),
delivery varchar (10),
goods varchar (10),
comments varchar (10),
area varchar (10),
quantity int (6),
amounts varchar (16)
);
#预览表
select * from logistics_table limit 5;

1.1 清洗重复值、缺失值、格式调整

#数据预处理,建立临时表temp_table,去除空值和重复值,调整amounts字段的数据类型,order_line
字段对分析无太大意义,可以删除
create table temp_table like logistics_table;#建立临时表
insert into temp_table select distinct * from logistics_table; #去除重复值
delete from temp_table where order_num is null or order_line is null or
sale_date is null or delivery_date is null or delivery is null or goods is null
or comments is null or area is null or quantity is null or amounts is null; #去
除空值
#amounts字段所示金额不仅有不统一单位,且有逗号,调整该字段的数据类型
alter table temp_table add column amounts_1 varchar (16);
update temp_table
set amounts_1 = case
when amounts like '%,%' then replace(amounts, ',','')
else NULL
end; #先对amounts销售金额列的逗号处理掉
alter table temp_table add column converted_amounts int (16);
update temp_table
set converted_amounts = case
when amounts_1 like '%万元' then cast(replace(amounts_1, '万元','') as
decimal(10,0))*10000
when amounts_1 like '%元' then cast(replace(amounts_1, '元','') as
decimal(10,0))
else NULL
end;#处理单位万元及元
alter table temp_table drop column order_line;#删除order_line字段
alter table temp_table drop column amounts;#删除amounts字段
alter table temp_table drop column amounts_1;#删除amounts_1字段
select * from temp_table;#查看数据表

1.2 异常值处理(比如:销售金额存在等于0的)

#异常值处理,删除掉销售金额为0的异常值
delete from temp_table where converted_amounts = 0;
select * from temp_table;

1.3 增加一项辅助列:月份

#增加一列,关于销售时间的月份列,便于后续分析
alter table temp_table add column sale_month int (2);
update temp_table
set sale_month = month(sale_date);
select * from temp_table;

2 数据分析及数据可视化

2.1 分析配送服务是否存在问题

2.1.1 按月份维度,看每月按时交货、晚交货、按时交货率
create table delivery_data
(sale_month int (2),
on_time_delivery_num int (10),
late_delivery_num int (10));#创建交货情况表
insert into delivery_data
(select a.sale_month, a.on_time_delivery_num, b.late_delivery_num
from
(select sale_month, count(delivery) as on_time_delivery_num from temp_table
where delivery = '按时交货'
group by sale_month order by sale_month) as a
left join
(select sale_month, count(delivery) as late_delivery_num from temp_table
where delivery = '晚交货'
group by sale_month order by sale_month) as b
on a.sale_month = b.sale_month
);
alter table delivery_data add column on_time_delivery_rate varchar (10);#创建按时
交货率字段
update delivery_data
set on_time_delivery_rate =
concat(cast(on_time_delivery_num/(on_time_delivery_num+late_delivery_num)*100
as decimal (4,2)),'%');#计算按时交货率
select * from delivery_data;#查看交货情况表

结论:从按时交货率来看,第四季度低于第三季度,猜测可能是气候原因造成。
2.1.2 按销售区域维度,看各销售区域的按时交货、晚交货、按时交货率
create table delivery_data_1
(area varchar (6),
on_time_delivery_num int (10),
late_delivery_num int (10));#创建交货情况表
insert into delivery_data_1
(select
c.area,
c.on_time_delivery_num,
d.late_delivery_num
from
(select area, count(delivery) as on_time_delivery_num from temp_table
where delivery = '按时交货'
group by area order by area) as c
left join
(select area, count(delivery) as late_delivery_num from temp_table
where delivery = '晚交货'
group by area order by area) as d
on c.area = d.area
);
alter table delivery_data_1 add column on_time_delivery_rate varchar (10);#创建按
时交货率字段
update delivery_data_1
set on_time_delivery_rate =
concat(cast(on_time_delivery_num/(on_time_delivery_num+late_delivery_num)*100
as decimal (4,2)),'%');#计算按时交货率
select * from delivery_data_1;#查看交货情况表

结论:泰国地区的按时交货率最高,西北地区的按时交货率最低,西北地区的配送服务明显存在问题,需要重点关注。
2.1.3 按货品维度,看各货品的按时交货、晚交货、按时交货率
create table delivery_data_2
(goods varchar (6),
on_time_delivery_num int (10),
late_delivery_num int (10));#创建交货情况表
insert into delivery_data_2
(select
e.goods,
e.on_time_delivery_num,
f.late_delivery_num
from
(select goods, count(delivery) as on_time_delivery_num from temp_table
where delivery = '按时交货'
group by goods order by goods) as e
left join
(select goods, count(delivery) as late_delivery_num from temp_table
where delivery = '晚交货'
group by goods order by goods) as f
on e.goods = f.goods
);
alter table delivery_data_2 add column on_time_delivery_rate varchar (10);#创建按
时交货率字段
update delivery_data_2
set on_time_delivery_rate =
concat(cast(on_time_delivery_num/(on_time_delivery_num+late_delivery_num)*100
as decimal (4,2)),'%');#计算按时交货率
select * from delivery_data_2;#查看交货情况表

结论:可以看出货品 4 的按时交货率最低。
2.1.4 按货品与销售区域结合的维度,看各货品所销往的各区域的按时交货、晚交货、按时交货率
create table delivery_data_3
(goods varchar (6),
area varchar (6),
on_time_delivery_num int (10),
late_delivery_num int (10));#创建交货情况表
insert into delivery_data_3
(select
g.goods,
g.area,
g.on_time_delivery_num,
h.late_delivery_num
from
(select goods, area, count(delivery) as on_time_delivery_num from temp_table
where delivery = '按时交货'
group by goods, area order by goods, area) as g
left join
(select goods, area, count(delivery) as late_delivery_num from temp_table
where delivery = '晚交货'
group by goods, area order by goods, area) as h
on g.goods = h.goods and g.area = h.area
);
alter table delivery_data_3 add column on_time_delivery_rate varchar (10);#创建按
时交货率字段
update delivery_data_3
set on_time_delivery_rate =
concat(cast(on_time_delivery_num/(on_time_delivery_num+late_delivery_num)*100
as decimal (4,2)),'%');#计算按时交货率
select * from delivery_data_3;#查看交货情况表

结论: 1 、销售区域:最差在西北地区,货品有 1 4 ,主要是货品 4 晚送货率过高导致。
2 、货品:最差的货品 2 ,主要送往华东和马来西亚,主要是马来西亚地区送货较晚导致。

2.2 分析是否存在尚有潜力的销售区域

2.2.1 按月份维度,看各货品每个月的销售数量
create table test_table
(sale_month int (2),
goods varchar (6),
num int (8));#创建表
insert into test_table
select sale_month, goods, sum(quantity) as num
from temp_table
group by sale_month, goods
order by sale_month, goods;#查询各货品每个月的销售数量
create table sale_num_table (
sale_month int (2),
goods1_sale_num int (8),
goods2_sale_num int (8),
goods3_sale_num int (8),
goods4_sale_num int (8),
goods5_sale_num int (8),
goods6_sale_num int (8));
insert into sale_num_table
select
l.sale_month,
max(goods_1_num) as 'goods1_sale_num',
max(goods_2_num) as 'goods2_sale_num',
max(goods_3_num) as 'goods3_sale_num',
max(goods_4_num) as 'goods4_sale_num',
max(goods_5_num) as 'goods5_sale_num',
max(goods_6_num) as 'goods6_sale_num'
from
(select
sale_month,
case goods when '货品1' then num else null end as 'goods_1_num',
case goods when '货品2' then num else null end as 'goods_2_num',
case goods when '货品3' then num else null end as 'goods_3_num',
case goods when '货品4' then num else null end as 'goods_4_num',
case goods when '货品5' then num else null end as 'goods_5_num',
case goods when '货品6' then num else null end as 'goods_6_num'
from test_table) as l
group by l.sale_month;#将test_table的数据行转列按列表形式呈现,方便观察数据总体情况
select * from sale_num_table;#查看表

结论:货品2在10月和12月销量猛增,有可能是公司加大了营销力度,也可能是开发了新的市场。

2.2.2 按销售区域维度,看各销售区域下各货品的销售数量

create table sale_num_table1 (
area varchar (6),
goods1_sale_num int (8),
goods2_sale_num int (8),
goods3_sale_num int (8),
goods4_sale_num int (8),
goods5_sale_num int (8),
goods6_sale_num int (8));
insert into sale_num_table1
select
n.area,
max(goods_1_num) as 'goods1_sale_num',
max(goods_2_num) as 'goods2_sale_num',
max(goods_3_num) as 'goods3_sale_num',
max(goods_4_num) as 'goods4_sale_num',
max(goods_5_num) as 'goods5_sale_num',
max(goods_6_num) as 'goods6_sale_num'
from
(
select
m.area,
case goods when '货品1' then num else null end as 'goods_1_num',
case goods when '货品2' then num else null end as 'goods_2_num',
case goods when '货品3' then num else null end as 'goods_3_num',
case goods when '货品4' then num else null end as 'goods_4_num',
case goods when '货品5' then num else null end as 'goods_5_num',
case goods when '货品6' then num else null end as 'goods_6_num'
from
(
select area, goods, sum(quantity) as num
from temp_table
group by area, goods
order by area, goods
) as m#查询各销售区域下各货品的销售数量
) as n
group by n.area;#使得所查询的数据行转列按列表形式呈现
select * from sale_num_table1;

结论:从销售区域看,每种货品销售区域为1-3个,货品1有三个销售区域,货品2有两个销售区域,货品3、 4、56只有一个销售区域,货品2在华东地区还有较大市场空间,适合加大投入,同时货品2在西北配送时效长,用户拒收率高,从成本角度考虑,应该减少投入。

2.3.3 按月份和销售区域相结合维度,看每月各销售区域的销售数量
create table sale_num_table2 (
sale_month int (2),
area varchar (6),
goods1_sale_num int (8),
goods2_sale_num int (8),
goods3_sale_num int (8),
goods4_sale_num int (8),
goods5_sale_num int (8),
goods6_sale_num int (8));
insert into sale_num_table2
select
p.sale_month, p.area,
max(goods_1_num) as 'goods1_sale_num',
max(goods_2_num) as 'goods2_sale_num',
max(goods_3_num) as 'goods3_sale_num',
max(goods_4_num) as 'goods4_sale_num',
max(goods_5_num) as 'goods5_sale_num',
max(goods_6_num) as 'goods6_sale_num'
from
(
select
o.sale_month, o.area,
case goods when '货品1' then num else null end as 'goods_1_num',
case goods when '货品2' then num else null end as 'goods_2_num',
case goods when '货品3' then num else null end as 'goods_3_num',
case goods when '货品4' then num else null end as 'goods_4_num',
case goods when '货品5' then num else null end as 'goods_5_num',
case goods when '货品6' then num else null end as 'goods_6_num'
from
(
select sale_month, area, goods, sum(quantity) as num
from temp_table
group by sale_month, area, goods
order by sale_month, area, goods
) as o#查询每月各销售区域的销售数量
) as p
group by p.sale_month, p.area;#使得所查询的数据行转列按列表形式呈现
select sale_month, area, goods2_sale_num from sale_num_table2 where
goods2_sale_num is not null;

结论:货品2没有开发新的市场,而是在华东地区加大了营销力度 可以在789月份加大华东地区的营销力度。

2.3 商品是否存在质量问题

2.3.1 各货品在各销售区域的用户反馈情况,包括拒货数、质量合格数、返修数、拒货率、返修率、合格率
create table quality_table (
goods varchar (6),
area varchar (6),
goods_refusal_num int (6),
qualified_goods_num int (6),
repaired_goods_num int (6),
goods_refusal_rate varchar (6),
qualified_goods_rate varchar (6),
repaired_goods_rate varchar (6));
insert into quality_table
select
s.*,
concat(cast(goods_refusal_num/(goods_refusal_num+qualified_goods_num+repaired_g
oods_num)*100 as decimal (4,2)),'%') as 'goods_refusal_rate',
concat(cast(qualified_goods_num/(goods_refusal_num+qualified_goods_num+repaired
_goods_num)*100 as decimal (4,2)),'%') as 'qualified_goods_rate',
concat(cast(repaired_goods_num/(goods_refusal_num+qualified_goods_num+repaired_
goods_num)*100 as decimal (4,2)),'%') as 'repaired_goods_rate'
from
(select
r.goods, r.area, max(拒货数) as 'goods_refusal_num', max(质量合格数) as
'qualified_goods_num', max(返修数) as 'repaired_goods_num'
from
(select
q.goods, q.area,
case comments when '拒货' then num else 0 end as '拒货数',
case comments when '质量合格' then num else 0 end as '质量合格数',
case comments when '返修' then num else 0 end as '返修数'
from
(select goods, area, comments, count(comments) as num from temp_table
group by goods, area, comments
order by 1, 2, 3
) as q#查询各货品在各销售区域的拒货数、质量合格数、返修数
) as r
group by r.goods, r.area
) as s;#查询拒货率、返修率、合格率,并使得数据行转列按列表形式呈现
select * from quality_table;#查看表

结论:货品3 、65 的合格率较高,返修率较低,货品124 的合格率较低,返修率较高,货品124质量存在问题,建议扩大抽检范围,增大质检力度 货品2在马来西亚的拒货率最高,按时交货率最低,猜测马来西亚对时效性要求较高,需要提高时效性。货品2在马来西亚的销售量较低,可以考虑减少投入。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值