前言
之前,公司要求每周做一次用户访问量、活跃用户等统计分析,并以邮件发出。为了快速高效地实现邮件的内容,即word的内容,我写了下面的demo。本demo使用的数据完全是小编杜撰,不具有任何实际意义。运行截图见图1。
date appId visit activity
20180801 1001 4988 906
20180801 1002 2209 950
20180801 1003 1323 455
20180801 1004 1361 128
20180801 1005 2117 373
20180801 1006 7982 633
20180801 1007 5073 115
20180802 1001 5119 909
20180802 1002 2057 948
20180802 1003 1004 400
20180802 1004 1461 108
20180802 1005 2204 391
20180802 1006 8199 640
20180802 1007 5126 115
代码
# -*- coding: utf-8 -*-
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from docx import Document
from docx.shared import Inches
"""
导入数据
"""
file_path = "C:\\Users\\yang\\Desktop\\log.xls"
file_data = pd.read_excel(file_path, sheetname="Sheet1")
date = [datetime.strptime(str(x), '%Y%m%d') for x in file_data['date']] # 把string转化为日期格式
del file_data['date'] # 从excel读到的date的格式是int,所以删除原来得列,然后在拼接上转化为日期格式后的date
file_data['date'] = date
"""
本周与上周的比较
"""
last_week_begin = '2018-08-01' # 上周开始日期
last_week_end = '2018-08-07' # 上周结束日期
this_week_begin = '2018-08-08' # 本周开始日期
this_week_end = '2018-08-14' # 本周结束日期
last_week_data_frame = file_data[file_data['date'].isin(pd.date_range(last_week_begin, last_week_end))] # 截取上周的数据
this_week_data_frame = file_data[file_data['date'].isin(pd.date_range(this_week_begin, this_week_end))] # 截取本周的数据
partner_map = {'1001': 'BaiDu', '1002': 'AiQiYi', '1003': 'WeChat', '1004': 'YY', '1005': '163mail', '1006': 'QQ',
'1007': 'DiDi'}
# 本周与上周的所有合作方的平均活跃用户
last_week_activity_mean = last_week_data_frame['activity'].groupby(last_week_data_frame['appId']).mean()
this_week_activity_mean = this_week_data_frame['activity'].groupby(this_week_data_frame['appId']).mean()
this_week_mean_sort = this_week_activity_mean.sort_values(ascending=False)
message_top3 = "本周活跃用户数排名前三的合作方由高到低依次为(合作方名称:均值) "+partner_map[str(this_week_mean_sort.index[0])]+": "+str(int(this_week_mean_sort.values[0]))+", "+partner_map[str(this_week_mean_sort.index[1])]+": "+str(int(this_week_mean_sort.values[1]))+", "+partner_map[str(this_week_mean_sort.index[2])]+": "+str(int(this_week_mean_sort.values[2]))+";"
message_min = "本周活跃用户数最少的合作方为 "+partner_map[str(this_week_mean_sort.index[6])]+": "+str(int(this_week_mean_sort.values[6]))+";"
print(message_top3)
print(message_min)
"""
生成柱状图
"""
x_label = pd.Series(partner_map)
x = list(range(len(x_label)))
total_width, n = 0.8, 2 # 并列的两个柱状图的总宽度
width = total_width / n # 一个柱状图的宽度
plt.bar(x, last_week_activity_mean, width=width, label='last week', fc='y')
for i in range(len(x)):
x[i] = x[i] + width
plt.bar(x, this_week_activity_mean, width=width, label='this week', tick_label=x_label, fc='r')
plt.legend()
plt.savefig('C:\\Users\\yang\\Desktop\\demo.png')
# plt.show()
"""
把message和图片写入docx文件
"""
document = Document()
document.add_heading('本周周报', 0) # 题目
document.add_heading('Heading, level 1', level=1) # 一级标题
document.add_paragraph(message_top3, style='List Bullet')
document.add_paragraph(message_min, style='List Bullet')
document.add_picture('C:\\Users\\yang\\Desktop\\demo.png', width=Inches(3), height=Inches(2))
document.save('C:\\Users\\yang\\Desktop\\demo.docx')
后续工作
- 实现插入图片居中、添加图名
- 优化代码中不合理的地方
- 美化柱状图,如:添加标题等