目录:数据理解
数据清洗
分析思路
分析过程
总结与建议
一.数据理解
数据来源于美团面试题,考点是分析差评原因,并给出改善方案。
字段理解:
数据大小:542 * 11
备注:骑手姓名重复默认为同一个骑手;同一个骑手可能在不同站点出现差评。
二.数据清洗
1.提取差评标签,转换成one-hot编码:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_excel('C:/Users/Hp/Desktop/temp/美团差评分析/不满意率分析0415.xlsx')
data = data.drop(columns='备注:\n1、分析差评原因,并给出改善方案;\n2、骑手姓名重复默认为同一个骑手;同一个骑手可能在不同站点出现差评\n')
data['评价标签'] = data['顾客配送评价标签'].str.split('|')
# 所有的评价标签
total_list = []
for i in data['评价标签']:
for j in i:
total_list.append(j)
set(total_list)
# 所有差评类型
# 判断是否仪表不整
def judge_1(x):
total = 0
for i in x['评价标签']:
if i == '仪表不整':
total += 1
return total
data['仪表不整'] = data.apply(judge_1, axis=1)
# 判断其他
def judge_2(x):
total = 0
for i in x['评价标签']:
if i == '其他':
total += 1
return total
data['其他'] = data.apply(judge_2, axis=1)
# 判断少餐/洒餐
def judge_3(x):
total = 0
for i in x['评价标签']:
if i == '少餐/洒餐':
total += 1
return total
data['少餐/洒餐'] = data.apply(judge_3, axis=1)
# 判断是否态度不好
def judge_4(x):
total = 0
for i in x['评价标签']:
if i == '态度不好':
total += 1
return total
data['态度不好'] = data.apply(judge_4, axis=1)
# 判断提前点送达
def judge_5(x):
total = 0
for i in x['评价标签']:
if i == '提前点送达':
total += 1
return total
data['提前点送达'] = data.apply(judge_5, axis=1)
# 判断送达不通知
def judge_6(x):
total = 0
for i in x['评价标签']:
if i == '送达不通知':
total += 1
return total
data['送达不通知'] = data.apply(judge_6, axis=1)
# 判断送达超时
def judge_7(x):
total = 0
for i in x['评价标签']:
if i == '送达超时':
total += 1