2018年销售汇总表-pandas数据分析

import numpy as np
import pandas as pd
from sparklines import sparklines
df = pd.read_csv('2018年销售汇总表.csv',parse_dates = ['日期'],encoding="gbk")
df.head()# 查看前五条数据
t姓名单品数量单价金额日期
0740150Barton LLCB1-200003986.693380.912018-01-01 07:21:00
1714466Trantow-BarrowsS2-77896-163.16-63.162018-01-01 10:00:00
2218895Kulas IncB1-699242390.702086.102018-01-01 13:24:00
3307599Kassulke, Ondricka and MetzS1-654814121.05863.052018-01-01 15:05:00
4412290Jerde-HilpertS2-34077683.21499.262018-01-01 23:26:00
df.info()#显示数据基础信息
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 7 columns):
t     1500 non-null int64
姓名    1500 non-null object
单品    1500 non-null object
数量    1500 non-null int64
单价    1500 non-null float64
金额    1500 non-null float64
日期    1500 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(2), int64(2), object(2)
memory usage: 82.2+ KB
# 讲解:按《姓名》分组,聚合《金额》的均值和总和
# 用法:按客人姓名查看消费金额的平均值与总金额
df.groupby("姓名")["金额"].agg(["mean","sum"])
meansum
姓名
Barton LLC1334.615854109438.50
Cronin, Oberbrunner and Spencer1339.32164289734.55
Frami, Hills and Schmidt1438.466528103569.59
Fritsch, Russel and Anderson1385.366790112214.71
Halvorson, Crona and Champlin1206.97172470004.36
Herman LLC1336.53225882865.00
Jerde-Hilpert1265.072247112591.43
Kassulke, Ondricka and Metz1350.79796986451.07
Keeling LLC1363.977027100934.30
Kiehn-Spinka1260.87050699608.77
Koepp Ltd1264.152927103660.54
Kuhn-Gusikowski1247.86684991094.28
Kulas Inc1461.191064137351.96
Pollich LLC1196.53671287347.18
Purdy-Kunde1469.77754777898.21
Sanford and Sons1391.87295898822.98
Stokes LLC1271.33222291535.92
Trantow-Barrows1312.567872123381.38
White-Trantow1579.558023135841.99
Will LLC1411.318919104437.60
# 讲解:在上一条命令中,并设置平均值与总金额的显示风格
# 用法:格式化数值,在前面添加美元符号,包括2位小数点
(
    df.groupby("姓名")["金额"]
    .agg(["mean","sum"])
    .style.format("${0:,.2f}")
)
meansum
姓名
Barton LLC$1,334.62$109,438.50
Cronin, Oberbrunner and Spencer$1,339.32$89,734.55
Frami, Hills and Schmidt$1,438.47$103,569.59
Fritsch, Russel and Anderson$1,385.37$112,214.71
Halvorson, Crona and Champlin$1,206.97$70,004.36
Herman LLC$1,336.53$82,865.00
Jerde-Hilpert$1,265.07$112,591.43
Kassulke, Ondricka and Metz$1,350.80$86,451.07
Keeling LLC$1,363.98$100,934.30
Kiehn-Spinka$1,260.87$99,608.77
Koepp Ltd$1,264.15$103,660.54
Kuhn-Gusikowski$1,247.87$91,094.28
Kulas Inc$1,461.19$137,351.96
Pollich LLC$1,196.54$87,347.18
Purdy-Kunde$1,469.78$77,898.21
Sanford and Sons$1,391.87$98,822.98
Stokes LLC$1,271.33$91,535.92
Trantow-Barrows$1,312.57$123,381.38
White-Trantow$1,579.56$135,841.99
Will LLC$1,411.32$104,437.60
# 平均值与总金额显示为不带小数点
(
    df.groupby("姓名")["金额"]
    .agg(["mean","sum"])
    .style.format("${0:,.0f}")
)
meansum
姓名
Barton LLC$1,335$109,438
Cronin, Oberbrunner and Spencer$1,339$89,735
Frami, Hills and Schmidt$1,438$103,570
Fritsch, Russel and Anderson$1,385$112,215
Halvorson, Crona and Champlin$1,207$70,004
Herman LLC$1,337$82,865
Jerde-Hilpert$1,265$112,591
Kassulke, Ondricka and Metz$1,351$86,451
Keeling LLC$1,364$100,934
Kiehn-Spinka$1,261$99,609
Koepp Ltd$1,264$103,661
Kuhn-Gusikowski$1,248$91,094
Kulas Inc$1,461$137,352
Pollich LLC$1,197$87,347
Purdy-Kunde$1,470$77,898
Sanford and Sons$1,392$98,823
Stokes LLC$1,271$91,536
Trantow-Barrows$1,313$123,381
White-Trantow$1,580$135,842
Will LLC$1,411$104,438
# 按月分析销售金额,并显示每月销售额占年度销售总额比例
by=[pd.Grouper(key='日期', freq='M')]#自定义字段的格式
monthly_sales=df.groupby(by)["金额"].agg(["sum"]).reset_index()
monthly_sales["月占比"]=monthly_sales["sum"]/df["金额"].sum()

format_dict = {'sum': '${0:,.0f}', '日期': '{:%Y-%m-%d}', '月占比': '{:.4%}'}
# 1.使用普通格式字典控制每列的格式
(
    monthly_sales
    .style
    .format(format_dict)
    .hide_index()
)

日期sum月占比
2018-01-31$185,3629.1818%
2018-02-28$146,2127.2426%
2018-03-31$203,92110.1012%
2018-04-30$174,5748.6475%
2018-05-31$165,4198.1940%
2018-06-30$174,0898.6235%
2018-07-31$191,6629.4939%
2018-08-31$153,7797.6174%
2018-09-30$168,4438.3438%
2018-10-31$171,4958.4950%
2018-11-30$119,9615.9423%
2018-12-31$163,8678.1171%
# 按月分析销售金额,并显示每月销售额占年度销售总额比例
by=[pd.Grouper(key='日期', freq='M')]#自定义字段的格式
monthly_sales=df.groupby(by)["金额"].agg(["sum"]).reset_index()
monthly_sales["月占比"]=monthly_sales["sum"]/df["金额"].sum()

# 2.使用条件格式字典控制每列的格式
format_dict = {'sum': '${0:,.0f}', '日期': '{:%Y-%m-%d}', '月占比': '{:.4%}'}
(
    monthly_sales
    .style
    .format(format_dict)
    .hide_index()
    .highlight_max(color="lightgreen")
    .highlight_min(color="red")
)
日期sum月占比
2018-01-31$185,3629.1818%
2018-02-28$146,2127.2426%
2018-03-31$203,92110.1012%
2018-04-30$174,5748.6475%
2018-05-31$165,4198.1940%
2018-06-30$174,0898.6235%
2018-07-31$191,6629.4939%
2018-08-31$153,7797.6174%
2018-09-30$168,4438.3438%
2018-10-31$171,4958.4950%
2018-11-30$119,9615.9423%
2018-12-31$163,8678.1171%
# 按月分析销售金额,并显示每月销售额占年度销售总额比例
by=[pd.Grouper(key='日期', freq='M')]#自定义字段的格式
monthly_sales=df.groupby(by)["金额"].agg(["sum"]).reset_index()
monthly_sales["月占比"]=monthly_sales["sum"]/df["金额"].sum()

# 3.使用渐变色格式字典控制每列的格式
format_dict = {'sum': '${0:,.0f}', '日期': '{:%Y-%m-%d}', '月占比': '{:.4%}'}
(
    monthly_sales
    .style
    .format(format_dict)
    .background_gradient(subset=["sum"],cmap="PRGn")
)
日期sum月占比
02018-01-31$185,3629.1818%
12018-02-28$146,2127.2426%
22018-03-31$203,92110.1012%
32018-04-30$174,5748.6475%
42018-05-31$165,4198.1940%
52018-06-30$174,0898.6235%
62018-07-31$191,6629.4939%
72018-08-31$153,7797.6174%
82018-09-30$168,4438.3438%
92018-10-31$171,4958.4950%
102018-11-30$119,9615.9423%
112018-12-31$163,8678.1171%
# 按月分析销售金额,并显示每月销售额占年度销售总额比例
by=[pd.Grouper(key='日期', freq='M')]#自定义字段的格式
monthly_sales=df.groupby(by)["金额"].agg(["sum"]).reset_index()
monthly_sales["月占比"]=monthly_sales["sum"]/df["金额"].sum()

# 4.使用单元格柱形格式字典控制每列的格式
format_dict = {'sum': '${0:,.0f}', '日期': '{:%Y-%m-%d}', '月占比': '{:.4%}'}
(
    monthly_sales
    .style
    .format(format_dict)
    .hide_index()
    .bar(color="#FFA07A",vmin=100_000,subset=["sum"],align="zero")
    .bar(color='lightblue', vmin=0, subset=['月占比'], align='zero')
    .set_caption("2018年销售一览表")
)
2018年销售一览表
日期sum月占比
2018-01-31$185,3629.1818%
2018-02-28$146,2127.2426%
2018-03-31$203,92110.1012%
2018-04-30$174,5748.6475%
2018-05-31$165,4198.1940%
2018-06-30$174,0898.6235%
2018-07-31$191,6629.4939%
2018-08-31$153,7797.6174%
2018-09-30$168,4438.3438%
2018-10-31$171,4958.4950%
2018-11-30$119,9615.9423%
2018-12-31$163,8678.1171%
def sparkline_str(x):
    bins=np.histogram(x)[0]#直方图
    s1="".join(sparklines(bins))
    return s1
sparkline_str.__name__ = "走势图"
df.groupby("姓名")["数量","金额"].agg(["mean",sparkline_str])
数量金额
mean走势图mean走势图
姓名
Barton LLC24.890244▄▄▃▂▃▆▄█▁▄1334.615854█▄▃▆▄▄▁▁▁▁
Cronin, Oberbrunner and Spencer24.970149█▄▁▄▄▇▅▁▄▄1339.321642█▅▅▃▃▃▂▂▁▁
Frami, Hills and Schmidt26.430556▄▄▁▂▇█▂▂▅▅1438.466528█▅▄▇▅▃▄▁▁▁
Fritsch, Russel and Anderson26.074074▁▄▇▃▂▂█▃▄▄1385.366790▇█▃▄▂▂▁▂▁▁
Halvorson, Crona and Champlin22.137931▇▆▆▇█▁▄▂▄▃1206.971724██▆▅▁▃▂▂▂▂
Herman LLC24.806452▄▃▅▁▆▄▂▆▃█1336.532258█▅▇▄▅▄▁▃▂▂
Jerde-Hilpert22.460674▄▄█▁▂▅▃▂▄▃1265.072247█▄▅▂▁▂▃▂▂▁
Kassulke, Ondricka and Metz25.734375▂▂▁▁▂▂▁▅▄█1350.797969█▆▆▄▄▃▂▁▁▂
Keeling LLC24.405405▁▄▇▃▅█▃▄▃▆1363.977027▅█▆▃▄▂▂▁▁▁
Kiehn-Spinka22.227848▃▂█▂▃▅▄▁▄▁1260.870506█▇▄▃▃▂▁▂▁▁
Koepp Ltd21.829268▅▇█▆▄▇▅▁▅▇1264.152927█▇▅▂▄▂▂▂▁▁
Kuhn-Gusikowski22.808219▂▄█▄▃▁█▄▂▄1247.866849▆█▄▃▃▃▃▁▁▁
Kulas Inc24.095745▇▃▇▂▇▁▅▆█▆1461.191064█▅█▂▄▄▄▂▁▁
Pollich LLC23.383562█▂▅▂▃▆▁▂▄▇1196.536712█▆▆▃▃▃▂▂▁▁
Purdy-Kunde27.358491▄▅█▁▄▇▅█▇█1469.777547█▅▄▃▄▃▃▂▁▂
Sanford and Sons24.000000▂▂▂▆▁█▂▂▁▅1391.872958▅▇█▃▃▄▁▃▁▂
Stokes LLC24.527778▄▄▄▁▃▂▄█▄▄1271.332222█▇▆▆▄▂▂▃▁▂
Trantow-Barrows24.159574▅▄▅▂▃▃▄▄▁█1312.567872█▇▅▆▃▂▂▁▁▁
White-Trantow26.255814▃▂▆▆▃██▁██1579.558023▄█▆▅▄▃▄▂▁▃
Will LLC24.702703▂█▁▄▄▄▂▅▅▄1411.318919▆█▄▅▄▂▁▂▁▂
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值