# coding=utf-8
import pandas as pd
import xlwt as xlwt
from datetime import datetime
from pandas import DataFrame, np
from pyecharts import Line, Bar
data = pd.read_csv(r'D:\data\浏览器下载文件\B\附件1.csv',engine='python')
# data1=data.iloc[0:10,0:10] #iloc转换为行第一个0:10表示1行-10行,0:10表示1列-10列
data_address = data['地点'] #打印地点
data_A = data[data_address=='A'] #将地点满足A的存到data_A里
# print(data_A)
data_B = data[data_address=='B']
data_C = data[data_address=='C']
data_D = data[data_address=='D']
data_E = data[data_address=='E']
# 将数据导出为csv
data_A.to_csv('task1-1A.csv',index=False) #index=False,不加上所以,True是加上索引
data_B.to_csv('task1-1B.csv',index=False)
data_C.to_csv('task1-1C.csv',index=False)
data_D.to_csv('task1-1D.csv',index=False)
data_E.to_csv('task1-1E.csv',index=False)
data_time=data['支付时间'].apply(lambda x:x.split(' ')[0][5:6]) #匹配时间为四月的
data_4 = data[data_time=='4'] #筛选时间为4月的数据
# print(data_4)
'''
筛选A,B,C,D,E,四月份的数据
'''
data_A_4 = data_4[data_4['地点']=='A']
data_B_4 = data_4[data_4['地点']=='B']
data_C_4 = data_4[data_4['地点']=='C']
data_D_4 = data_4[data_4['地点']=='D']
data_E_4 = data_4[data_4['地点']=='E']
# A,B,C,D,E四月份交易额,订单数
data_4_price_sum = [] #交易额
data_4_price_sum.append(sum(data_A_4['实际金额']))
data_4_price_sum.append(sum(data_B_4['实际金额']))
data_4_price_sum.append(sum(data_C_4['实际金额']))
data_4_price_sum.append(sum(data_D_4['实际金额']))
data_4_price_sum.append(sum(data_E_4['实际金额']))
print(data_4_price_sum)
data_4_count = [] #订单数
data_4_count.append(len(data_A_4['订单号'])-1)
data_4_count.append(len(data_B_4['订单号'])-1)
data_4_count.append(len(data_C_4['订单号'])-1)
data_4_count.append(len(data_D_4['订单号'])-1)
data_4_count.append(len(data_E_4['订单号'])-1)
print(data_4_count)
# 四月份总销售额、总len销量
data_4_price_sum_1=sum(data_4['实际金额']) #交易额
data_4_count_1 = len(data_4['订单号'])-1 #订单数
'''
创建excel表,将四月份每台设备的销售额,销量,总销售额,总销量存入
'''
work = xlwt.Workbook(encoding = 'ascii')
worksheet = work.add_sheet('test1')
# 设置表头
table_title = ['设备','销售额','销量']
shape_1 = ['A','B','C','D','E']
for i in range(0,len(table_title)):
worksheet.write(0,i,table_title[i])
# 写入内容
j = 0
for j in range(1,len(shape_1)+1):
worksheet.write(j,0,shape_1[j-1])
worksheet.write(j,1,data_4_price_sum[j-1])
worksheet.write(j,2,data_4_count[j-1])
worksheet.write(7,0,"总销量")
worksheet.write(7,1,data_4_count_1)
worksheet.write(8,0,"总售额")
worksheet.write(8,1,data_4_price_sum_1)
work.save('task1_2.csv')
'''
计算每台售货机每个月的每单平均交易额与日均订单量,以表
格形式体现在报告中
'''
data['支付时间'] = data['支付时间'].apply(lambda x:x.split(' ')[0])
data['支付时间']= data['支付时间'].apply(lambda x:x.split('/')[1])
months=[31,28,31,30,31,30,31,31,30,31,30,31] #2017年每个月的天数
df1 = pd.DataFrame([],columns = ["设备", "月份", "平均交易额","日均订单量"])
line = Line("每台售货机每月总交易额折线图")
bar = Bar("每台售货机交易额月环比增长率柱状图")
for i in ['A','B','C','D','E']:
arr1=[] #x轴月份
arr2=[] #y轴交易额
arr3=['0'] #y轴增长率,1月份无法计算增长率,故设置为0
for j in range(1,13):
d1=data[data['地点']==i] #筛选每台机器
d2=d1[d1['支付时间']==str(j)] #筛选每个月
d3=sum(d2['实际金额'])/len(d2) #每单平均交易额
d4=len(d2)/months[j-1] #日均订单量
arr1.append(j) #x轴月份
arr2.append(sum(d2['实际金额'])) #y轴交易额
df1 = df1.append([{'设备': i, '月份': j, '每单平均交易额': d3, '日均订单量': d4, }])
if j>1: #因为1月份d5无法计算上个月数据,所以要判断
d5 = d1[d1['支付时间'] == str(j - 1)] # 上个月的数据
d6 = (sum(d2['实际金额'])-sum(d5['实际金额']) )/ sum(d5['实际金额'])
arr3.append(d6)
line.add(str(i),arr1, arr2)
bar.add(str(i),arr1,arr3)
line.render('月销售额.html')
bar.render('增长率柱状图.html')
df1.to_csv('task1_3.csv',index=False)
'''
任务 3.1 分析各售货机商品销售数据,总结规律,给出每台售货机饮料
类商品的标签,结果保存在 CSV 文件中,文件名分别为 “task3-1A.csv”、
“task3-1B.csv”、…、“task3-1E.csv”,格式如下表所示。
'''
# for i in ['A','B','C','D','E']:
# d5=data[data['地点']==i] #每台设备
# shop_dict = {}
# for shop in d5['商品']:
# if shop in shop_dict:
# shop_dict[shop] +=1 #如果在shop_dict则加1
# else:
# shop_dict[shop] =1 #如果不再shop_dict则设为1
#
# shop_dict = sorted(shop_dict.items(), key=lambda x: x[1])
# print(sorted(shop_dict) )