python透视表画图_python数据分析-08透视表以及matplotlib库

import pandas as pd

import numpy as np

from pandas import Series,DataFrame

"""

df = pd.read_excel("sales-funnel.xlsx")

#print(df)

# Account Name ... Price Status

# 0 714466 Trantow-Barrows ... 30000 Presented

# 1 714466 Trantow-Barrows ... 10000 Presented

# 2 714466 Trantow-Barrows ... 5000 Pending

# 3 737550 Fritsch,Russel and Anderson ... 35000 declined

# 4 146832 Kiehn-Spinka ... 65000 won

# 5 218895 Kulas Inc ... 40000 Pending

# 6 218895 Kulas Inc ... 10000 Presented

# 7 412290 Jerde-Hilpert ... 5000 Pending

# 8 740150 Barton-LLC ... 35000 declined

# 9 141962 Barton-LLC ... 65000 won

# 10 163416 Purdy-Kunde ... 30000 Presented

# 11 239344 Stokes-LLC ... 5000 Pending

# 12 239344 Stokes-LLC ... 10000 Presented

# 13 307599 Kassulke,Ondricka and Metz ... 7000 won

# 14 688981 Keeling LLC ... 100000 won

# 15 729833 Koepp-Ltd ... 65000 declined

# 16 729833 Koepp-Ltd ... 5000 Presented

#

# [17 rows x 8 columns]

print(pd.pivot_table(df,index=["Name"]))

# Account Price Quantity

# Name

# Barton-LLC 441056 50000 1.500000

# Fritsch,Russel and Anderson 737550 35000 1.000000

# Jerde-Hilpert 412290 5000 2.000000

# Kassulke,Ondricka and Metz 307599 7000 3.000000

# Keeling LLC 688981 100000 5.000000

# Kiehn-Spinka 146832 65000 2.000000

# Koepp-Ltd 729833 35000 2.000000

# Kulas Inc 218895 25000 1.500000

# Purdy-Kunde 163416 30000 1.000000

# Stokes-LLC 239344 7500 1.000000

# Trantow-Barrows 714466 15000 1.333333

print(pd.pivot_table(df,index=["Manager","Rep"],values=["Price","Quantity"],aggfunc=sum))

# Price Quantity

# Manager Rep

# Debra-Henley Craig-Booker 80000 5

# Daniel-Hilton 115000 5

# John-Smith 40000 3

# Fred-Anderson Cedric-Moss 110000 5

# Wendy-Yule 177000 12

"""

"""

#------------------

#分组和透视功能实战

link = "https://projects.fivethirtyeight.com/flights"

#假设文件usa_flights.csv 文件数据完整

df = pd.read_csv("usa_flights.csv")

print(df.shape)#(201664,14)

#获取延误时间最长的top10

print(df.sort_values("arr_delay",ascending=False)[:10])

#计算延误和没有延误所占比例

print(df["cancelled"].value_counts())

# 0 196873

# 1 4791

# Name: cancelled,dtype:int64

df["delayed"] = df["arr_delay"].apply(lambda x:x>0)

print(df.head())

print(df["delayed"].value_counts())

#False 103037

#True 98627

#Name:delayed ,dtype:int64

delay_data = df["delayed"].value_counts()

print(delay_data[1]/(delay_data[0]+delay_data[1]))

#0.4890659...

#每个航空公司延误的情况

delay_group = df.groupby(["unique_carrier","delayed"])

print(delay_group.size())

df_delay = delay_group.size().unstack()

print(df_delay)

import matplotlib.pyplot as plt

df_delay.plot()

plt.show()

"""

#---------------------------------------------

#Matplotlib介绍

#为什么用Python画图

#GUI太复杂

#Excel太头疼

#Python简单免费()

#什么是matplotlib?

#一个Python包

#用于2D绘图

#非常强大和流行

#有很多扩展

import matplotlib.pyplot as plt

import numpy as np

# x = np.linspace(0,2*np.pi,100)

# y = np.sin(x)

# plt.plot(x,y)

# plt.show()

#Matplotlib Architecture 架构

#Backend:主要处理把图显示到哪里和画到哪里:

#Artist:图像显示成什么样?

#Scripting:pyplot,python语法和API

"""

#matplotlib的简单绘图-plot

import numpy as np

import matplotlib.pyplot as plt

# a = [1,2,3]

# #plt.plot(a)#这里画图取值x轴分别是a的index指标,0,1,2,y轴是1,2,3

# b = [4,5,6]

# plt.plot(a,b)#这里画图取值x轴分别是a的值,1,2,3,y轴是4,5,6

# plt.show()

# #会报错

# a = [1,2,3]

# b = [4,5,6,7]

# plt.plot(a,b)

# plt.show()

# a = [1,2,3]

# #plt.plot(a)#这里画图取值x轴分别是a的index指标,0,1,2,y轴是1,2,3

# b = [4,5,6]

# # plt.plot(a,b,"*")#用*号表示点

# # plt.plot(a,b,"b--")

# # plt.show()

#

# c = [10,8,6]

# d = [1,8,3]

# plt.plot(a,b,"b--",c,d,"r*")

# plt.show()

t = np.arange(0.0,2.0,0.1)

print(t.size)

s = np.sin(t*np.pi)

print(s.size)

plt.plot(t,s,"r--",label="aaaa")

plt.plot(t*2,s,"--",label=‘bbbb‘)

plt.xlabel("This is X")

plt.ylabel("This is Y")

plt.title("This is a Demo")

plt.legend()#这里是将label显示出来

plt.show()

"""

#---------------------------------------

#matplotlib简单绘图之subplot

# x = np.linspace(0.0,5.0)

# y1 = np.sin(np.pi*x)

# y2 = np.sin(np.pi*x*2)

# plt.subplot(2,1,1)#表示切换到2行1列子图的第一个位置画图

# plt.plot(x,y1,"b--")

# plt.ylabel("y1")

# plt.subplot(2,1,2)#表示切换到2行1列子图的第二个位置画图

# plt.plot(x,y2,"r--")

# plt.ylabel("y2")

# plt.xlabel("X")

# plt.show()

#更改图片布局:改为两行两列,左右图片

# x = np.linspace(0.0,5.0)

# y1 = np.sin(np.pi*x)

# y2 = np.sin(np.pi*x*2)

# plt.subplot(2,2,1)

# plt.plot(x,y1,"b--")

# plt.ylabel("y1")

# plt.subplot(2,2,2)

# plt.plot(x,y2,"r--")

# plt.ylabel("y2")

# plt.xlabel("X")

# plt.show()

#更改图片布局:改为两行两列

# x = np.linspace(0.0,5.0)

# y1 = np.sin(np.pi*x)

# y2 = np.sin(np.pi*x*2)

#

# plt.subplot(2,2,1)

# plt.plot(x,y1,"b--")

# plt.ylabel("y1")

#

# plt.subplot(2,2,2)#表示切换到2行1列子图的第二个位置画图

# plt.plot(x,y2,"r--")

# plt.ylabel("y2")

#

#

# plt.subplot(2,2,3)#表示切换到2行1列子图的第二个位置画图

# plt.plot(x,y2,"r*")

# plt.ylabel("y2")

#

# plt.subplot(2,2,4)

# plt.plot(x,y1,"b*")

# plt.ylabel("y1")

#

# plt.xlabel("X")

# plt.show()

#a = plt.subplots()

#print(a)#(

, )

# print(type(a))#

# print(a[0])#Figure(640x480) 表示画图,一块画布

# print(a[1])#AxesSubplot(0.125,0.11;0.775x0.77)#表示画笔

# figure,ax = plt.subplots()

# ax.plot([1,2,3,4,5])

# plt.show()#是正常的画图

# x = np.linspace(0.0,5.0)

# y1 = np.sin(np.pi*x)

# y2 = np.sin(np.pi*x*2)

# figure,ax = plt.subplots(2,2)#设为2行2列

# ax[0][0].plot(x,y1)

# ax[0][1].plot(x,y2)

# plt.show()#是正常的画图

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值