欢迎访问个人博客http://www.jkraise.top
jupyter
-
说明:
Jupyter项目是一个交互式的Python科学计算和数据分析生态系统,最常用的是Jupyter Notebook
交互式的编程环境,友好的界面,便于分享的文档格式,对排版语法、绘图、数学公式的支持,使她成为最流行的Python科学计算工具
install:# 1. 进入虚拟环境 workon py3_numpy #2 安装模块 windows pip3 install jupyter pip3 install matplotlib ============== ============== linux: pip3 install notebook pip3 install matplotlib
-
打开
jupyter-notebook
-
关闭 Ctrl + c 输入 y 关闭
Matplotlib 绘图库
1. 基础
-
折线图
# 导入 绘图库 import matplotlib.pyplot as plt # 绘制一条线时, x轴 可以省略 plt.plot([0,2,4,6,8]) # 默认Y轴坐标 plt.plot([0,2,4,6,8],[1,5,3,9,7]) # X轴坐标值,Y轴坐标值 plt.show() #显示图片
- 线条颜色,color=‘g’
- 线条风格,linestyle=’–’
- 线条粗细,linewidth=5.0
- 标记风格,marker=‘o’
- 标记颜色,markerfacecolor=‘b’
- 标记尺寸,markersize=20
===== - 透明度,alpha=0.5
案例
# 汇率 eurcny = [6.8007,6.8007,6.8015,6.8015,6.8060,6.8036,6.8025,6.7877,6.7835,6.7758,6.7700,6.7463,6.7519,6.7595,6.7669,6.7511,6.7511,6.7539,6.7430,6.7374,6.7265] # 日期 date = [3,4,5,6,7,10,11,12,13,14,17,18,19,20,21,24,25,26,27,28,31] plt.plot( date, # x 轴数据 , 日期 eurcny,# y轴数据, 收盘价 color='r', # 线条颜色, linestyle='--', # 线条风格 linewidth=2, # 线条粗细 marker=‘o’, # 标记风格 markerfacecolor='#ffff00' , # 标记颜色 markersize=5, # 标记大小 alpha=0.5, # 透明度 ) plt.plot(eurcny) plt.show()
-
散点图/气泡图
# 数据
x = [1,3,5,7,9,11,13,15,17]
y = [2,-5,19,3,5,8,12,6,1]
# 绘图
plt.scatter(x, y)
plt.show()
- 条形图/柱状图
x = [1,2,3,4,5]
y = [3,6,1,8,2]
# 柱状图,x轴为单个柱子,y轴为柱子高度,Width用于柱子粗细
plt.bar(x, y)
plt.show()
# 条形图,注意x,y含义
plt.barh(
x, # 横条离开x轴的距离
y, # 横条长度
height=0.5, # 横条粗细
)
# y轴标注
plt.yticks(x,['a','b','c','d','e'])
plt.show()
案例
# 指定画版的大小和分辨率
plt.figure(figsize=(6,6), dpi=100)
# 男生平均分, 语文/数学/英语/物理/化学
boy = [85.5,91,72,59,66]
# 女生平均分
girl = [94,82,89.5,62,49]
# 科目坐标
course = [1,2,3,4,5]
# 绘图 男生
plt.bar(
course,
boy,
color = 'g',
width = 0.3,
alpha = 0.3
)
# 绘图 女生
# 间隔 0.3 width =0.3
course2 = [1.3,2.3,3.3,4.3,5.3]
plt.bar(
course2, # x
girl, # y
color = 'r',
width = 0.3,
alpha = 0.3
)
# 科目坐标替换为字符
course3 = [1.15,2.15,3.15,4.15,5.15]
plt.xticks(course3, ['Chi','Math', 'Eng','Phy', 'Che'])
# 保存图像
plt.savefig("./avg_data.png")
# 显示图像
plt.show()
还有一大堆 图表, 这里不一 一列举了
2. 提升
- 图像绘制区域
- 图像组件
案例:
折线图
# 指定画版的大小和分辨率
plt.figure(figsize=(6,6), dpi=100)
# 指定坐标点,注意是列表, 注意x和y 的数量要对应
x = [i for i in range(10)]
y = [12,32,2,34,23,12,56,34,23,44]
#
plt.plot(x,y)
# 保存图像
plt.savefig("./data1.png")
plt.show()
解决中文显示问题
linux 版:
# 比较北京和上海 天气 案例
import matplotlib.pyplot as plt
import random
from matplotlib import font_manager
# 放字体文件目录 必须是绝对路径
my_font = font_manager.FontProperties(r'D:\Pycharm-project\untitled\py3_nmp\arial unicode ms.ttf')
# 指定画板大小和分辨率
plt.figure(figsize=(20, 10), dpi=100)
# 制定坐标点 注意必须是列表 注意x和y的数量要一致
x = range(60)
# 15到18度随机 60个点
y = [random.uniform(15, 18) for i in range(60)]
y_beijing = [random.uniform(0, 5) for i in range(60)]
y_ = [i for i in range(50)]
# 画出折线图 上海的
plt.plot(x, y, label="上海")
# 北京的
plt.plot(x, y_beijing, label="北京", color='r', linestyle="--", linewidth=5)
x_name = ["11点{}分".format(i) for i in range(60)]
# x轴刻度
plt.xticks(x[::5], x_name[::5], fontproperties=my_font)
# y轴刻度
plt.yticks(y_[::5])
# x和y轴的名字
plt.xlabel("时间", fontdict={"fontproperties": my_font})
plt.ylabel("温度", fontdict={"fontproperties": my_font})
# 标题
plt.title("time in 11 to 12")
# 显示折线数据的描述 上海和北京
plt.legend(loc="best",prop = my_font)
plt.savefig("./data_bjhangh.png")
plt.show()
Windows版:
import matplotlib.pyplot as plt
import random
from matplotlib import font_manager
# 指定画板大小和分辨率
plt.figure(figsize=(20, 10), dpi=100)
# Windows 字体设置
# 字体设置
# 1. 方式1
plt.rc('font', family='SimHei', size=13)
# 方式2
#plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体
#plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
# 制定坐标点 注意必须是列表 注意x和y的数量要一致
x = range(60)
# 15到18度随机 60个点
y = [random.uniform(15, 18) for i in range(60)]
y_beijing = [random.uniform(0, 5) for i in range(60)]
y_ = [i for i in range(50)]
# 画出折线图 上海的
plt.plot(x, y, label="上海")
# 北京的
plt.plot(x, y_beijing, label="北京", color='r', linestyle="--", linewidth=5)
x_name = ["11点{}分".format(i) for i in range(60)]
# x轴刻度
plt.xticks(x[::5], x_name[::5], )
# y轴刻度
plt.yticks(y_[::5])
# x和y轴的名字
plt.xlabel("时间", )
plt.ylabel("温度", )
# 标题
plt.title("time in 11 to 12")
# 显示折线数据的描述 上海和北京
plt.legend(loc="best",)
plt.savefig("./data_bjhangh.png")
plt.show()
折线网格图
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = ['Arial Unicode MS', 'sans-serif']
# 指定画板大小和分辨率
plt.figure(figsize=(20, 10), dpi=100)
# 绘图,并赋值给变量
n1, = plt.plot(
[0,2,4,6,8], # X轴坐标值
[1,5,3,9,7], # Y轴坐标值
label='number1图例标题1' # 图例
)
n2, = plt.plot([1,3,5,7,9],[0,4,2,8,6], label = 'number2图例标题2')
########################
# 图表标题
plt.title(
'hello图表标题',
color = '#00ff00',
fontsize = 24,
)
# 坐标轴标注
plt.xlabel('X axis')
plt.ylabel('Y轴标注')
# 刻度和刻度标注
plt.yticks([1,3,6,9,12,15,18,20]) # 设置坐标刻度步长
plt.xticks(
[0,1,2,5,8,10], # 刻度设置
['2000','2001','2002','2005','2008','2010'], # 刻度标注
fontsize = 14, # 文字大小
rotation = 90, # 旋转角度
)
# 坐标范围
# plt.axis([-1,11,-2,12]) # X轴-1到11,Y轴-2到12
plt.xlim([-1,11]) # x轴坐标范围:-1到11
plt.ylim([-2,12]) # y轴坐标范围:-2到12
########################
# 图例:全部绘制(需在绘图方法内加label属性,见前代码)
# plt.legend() # loc 位置,frameon 有无边框
# 图例:指定数据绘制图例,绘图方法前需要赋值变量(绘图方法传给变量,变量名后带逗号)
# 图例位置可用参数:
# best 默认
# right
# center,center left,center right
# lower center,lower left,lower right
# upper center,upper left,upper right
plt.legend(
handles = [n1,n2], # 给指定数据绘制图例
loc = 'upper right', # 图例位置
frameon= False # 有无边框
)
# 网格
plt.grid()
# 存为图片文件,默认png,
# 其他格式如jpg写上后缀即可
# 可带路径如`images/test.jpg`,需要先建立目录
#plt.savefig('test', dpi = 600) # dpi 分辨率,常用:72,300
# 显示图像
plt.show()
直方图
import matplotlib.pyplot as plt
import random
plt.figure(figsize=(15,6), dpi=100)
# 准备时长
time =[131, 98, 125, 131, 124, 139, 131, 117, 128, 108, 135, 138, 131, 102, 107, 114, 119, 128, 121, 142, 127, 130, 124, 101, 110, 116, 117, 110, 128, 128, 115, 99, 136, 126, 134, 95, 138, 117, 111,78, 132, 124, 113, 150, 110, 117, 86, 95, 144, 105, 126, 130,126, 130, 126, 116, 123, 106, 112, 138, 123, 86, 101, 99, 136,123, 117, 119, 105, 137, 123, 128, 125, 104, 109, 134, 125, 127,105, 120, 107, 129, 116, 108, 132, 103, 136, 118, 102, 120, 114,105, 115, 132, 145, 119, 121, 112, 139, 125, 138, 109, 132, 134,156, 106, 117, 127, 144, 139, 139, 119, 140, 83, 110, 102,123,107, 143, 115, 136, 118, 139, 123, 112, 118, 125, 109, 119, 133,112, 114, 122, 109, 106, 123, 116, 131, 127, 115, 118, 112, 135,115, 146, 137, 116, 103, 144, 83, 123, 111, 110, 111, 100, 154,136, 100, 118, 119, 133, 134, 106, 129, 126, 110, 111, 109, 141,120, 117, 106, 149, 122, 122, 110, 118, 127, 121, 114, 125, 126,114, 140, 103, 130, 141, 117, 106, 114, 121, 114, 133, 137, 92,121, 112, 146, 97, 137, 105, 98, 117, 112, 81, 97, 139, 113,134, 106, 144, 110, 137, 137, 111, 104, 117, 100, 111, 101, 110,105, 129, 137, 112, 120, 113, 133, 112, 83, 94, 146, 133, 101,131, 116, 111, 84, 137, 115, 122, 106, 144, 109, 123, 116, 111,111, 133, 150]
# 指定组距 (每隔多少分钟统计一次数量)
width =3
# 组数
num_bins = int((max(time)- min(time))/width)
# 显示直方图
plt.hist(x=time, bins=num_bins, density=True)
# 指定显示刻度的个数
plt.xticks(range(min(time), max(time))[::5])
# 指定标题
plt.title('250个电影的时长分布图', )
plt.grid(True, linestyle='--', alpha=0.5)
plt.show()
饼图api
movie_name = ['雷神3:诸神黄昏','正义联盟','东方快车谋杀案','寻梦环游记','全球风暴',
'降魔传','追捕','七十七天','密战','狂兽','其它']
# 设置字体
plt.rcParams['font.family'] = ['Arial Unicode MS', 'sans-serif']
# 指定画板大小和分辨率
plt.figure(figsize=(20,10), dpi=100)
place_count = [60605,54546,45819,28243,13270,9945,7679,6799,6101,4621,20105]
# 绘制 饼图
# explode突出块,突出比例,
p = plt.pie(place_count,labels=movie_name,
explode=(0,0.1,0,0,0,0,0,0.3,0,0,0.1),
autopct='%0.2f%%',colors=['b','r','g','y','c','m','y','k','c','g','r'])
# linux 解决方法
# 把描述设置为汉字字体
# for t in p[1]:
# t.set_fontproperties(my_font)
# plt.legend(loc="best",prop = my_font)
# plt.title("今日排片占比",fontproperties=my_font)
plt.legend(loc="best")
plt.title("今日排片占比")
plt.axis('equal')
plt.show()
散点图
数值计算库 Numpy
1. 基础
数组
# ndarray 数组
import numpy as np
class1 = np.array([99,60,80,5,50])
print(class1)
print(class1.dtype)
print('++++++++=')
# dtype 查看数组 类型
s1 = np.array([True,80,177.7,'张小明'])
print(s1,s1.dtype)
# [99 60 80 5 50]
# int32
# ++++++++=
# ['True' '80' '177.7' '张小明'] <U32
dtype 查看数组 类型
ndarray数组对象属性
案例
import numpy as np
# 二维数组,1班学生情况
class1 = np.array([
[True,80,177.7,'张小明'],
[False,99,164.5,'李丽'],
[True,59,158,'王华']
])
print(class1.dtype)
print(class1.size)
print(class1.ndim) # 秩
print(class1.shape) # 轴
print("================")
# 三维数组,1班学生 期中,期末情况
class1 = np.array([
[
[True,80,177.7,'张小明'],
[False,99,164.5,'李丽'],
[True,59,158,'王华']
],
[
[True,95,178,'张小明'],
[False,99,164.5,'李丽'],
[True,39,178,'王华']
],
])
print(class1.dtype)
print(class1.size)
print(class1.ndim) # 秩
print(class1.shape) # 轴
print(len(class1.shape))
import numpy as np
# =====
# np.arange()
# 类似range()函数(递增的整数序列),元素从0到n-1
list_01 = np.arange(10)
# 起始 ,终点, 步长,
list_02 = np.arange(1,9,2)
# print(list_01, list_02)
# [0 1 2 3 4 5 6 7 8 9] [1 3 5 7]
# =====
# np.linspace()
# 根据起止数据等间距填充数据形成数组
num01 = np.linspace(1,10,4)
# print(num01) # [ 1. 4. 7. 10.]
# endpoint 表示终止元素
num02 = np.linspace(1,10,4, endpoint = False)
# print(num02) #[1. 3.25 5.5 7.75]
# =====
# np.array()
# 形成数组
n1 = np.array([[1,23,4],[3,4,5]])
# print(n1)
# [[ 1 23 4]
# [ 3 4 5]]
# =====
# np.ones(shape)
# 根据shape生成一个全1数组,shape是元组类型
# np.ones_like(ndarray)
# 以另一个数组为参数,根据其形状和dtype创建全1数组
# n2 = np.ones((2,3,4))
# print(n2,)
# print("==============")
# n3 = np.ones_like(n2)
# print(n3)
# [[[1. 1. 1. 1.]
# [1. 1. 1. 1.]
# [1. 1. 1. 1.]]
#
# [[1. 1. 1. 1.]
# [1. 1. 1. 1.]
# [1. 1. 1. 1.]]]
# ==============
# [[[1. 1. 1. 1.]
# [1. 1. 1. 1.]
# [1. 1. 1. 1.]]
#
# [[1. 1. 1. 1.]
# [1. 1. 1. 1.]
# [1. 1. 1. 1.]]]
#
# =======
# np.zeros(shape) 根据shape生成一个全0数组,shape是元组类型
# np.zeros_like(ndarray) 以另一个数组为参数,根据其形状和dtype创建全0数组
# 全0
# n3 = np.zeros((3, 6), dtype = np.int32)
# n4 = np.zeros_like(n3)
# print(n3)
# print("===========")
# print(n4)
# [[0 0 0 0 0 0]
# [0 0 0 0 0 0]
# [0 0 0 0 0 0]]
# ===========
# [[0 0 0 0 0 0]
# [0 0 0 0 0 0]
# [0 0 0 0 0 0]]
# ======
# np.empty(shape) 创建新数组只分配内存空间,随意填充一些垃圾值
# np.empty_like(ndarray) 以另一个数组为参数,根据其形状和dtype创建填充值数组
# 填充数组
# n1 = np.empty((6, 7))
# n2 = np.empty_like(n1)
# print(n1)
# print("=========")
# print(n2)
# [[1.18036057e-311 1.18024123e-311 0.00000000e+000 0.00000000e+000
# 1.18036090e-311 0.00000000e+000 1.18036090e-311]
# [0.00000000e+000 0.00000000e+000 1.18022288e-311 0.00000000e+000
# 1.18022288e-311 0.00000000e+000 1.18022288e-311]
# [0.00000000e+000 1.18022288e-311 0.00000000e+000 1.18022288e-311
# 0.00000000e+000 1.18022288e-311 0.00000000e+000]
# [1.18022288e-311 0.00000000e+000 1.18022288e-311 0.00000000e+000
# 1.18022288e-311 0.00000000e+000 1.18022288e-311]
# [0.00000000e+000 0.00000000e+000 0.00000000e+000 6.95331186e-310
# 0.00000000e+000 0.00000000e+000 6.95331185e-310]
# [6.95331185e-310 6.95331184e-310 0.00000000e+000 0.00000000e+000
# 0.00000000e+000 0.00000000e+000 0.00000000e+000]]
# =========
# [[1.18024210e-311 1.18024123e-311 9.29433783e+242 1.49174223e+195
# 4.95261533e+223 7.19464630e+159 1.99501687e+161]
# [2.76518167e+180 4.45511939e-091 2.75383585e+212 2.97762064e+228
# 7.66991258e+170 1.06112891e-153 4.64501053e+151]
# [2.63265729e+267 1.32882271e-258 3.68777421e+180 4.47593816e-091
# 2.93573416e+222 1.36455813e+161 5.37649537e+242]
# [9.92152605e+247 5.03734573e+180 5.28595595e-085 4.64501053e+151
# 5.50436598e+257 5.03734573e+180 9.92152728e+247]
# [5.03734573e+180 4.83245960e+276 8.03408340e-095 1.95575364e-109
# 2.87903286e-152 2.58400946e+161 7.61384359e-010]
# [5.34083717e+228 5.02383426e+223 4.27195504e+270 2.59345414e+161
# 5.03734574e+180 8.03408340e-095 1.05894728e-153]]
# ======
# np.full(shape,val) 根据shape生成一个数组,每个元素都是val
# np.full_like(a,val) 根据数组a的形状生成一个全 val 数组
# 设定值数组
n1 = np.full(6, 3)
n2 = np.full((2,3,4), 25)
n3 = np.full_like(n1, 25)
print(n1)
print('==========')
print(n2)
print('==========')
print(n3)
# ======
# np.eye(n),np.identity(n) 创建一个正方的n*n单位矩阵,对角线为1,其余为0
# np.diag(list) 创建一个正方形矩阵,对角线为参数值
# n1 = np.eye(5)
# n2 = np.identity(5)
# print(n1)
# print("===========")
# print(n2)
# 正方形矩阵,对角线为参数值
# n1 = np.diag([1,3,5,7,9])
# print(n1)
# [[1 0 0 0 0]
# [0 3 0 0 0]
# [0 0 5 0 0]
# [0 0 0 7 0]
# [0 0 0 0 9]]
数组运算
import numpy as np
# arr01 = np.ones([30])
# print(arr01)
# print('=================')
#
# arr02 = arr01.reshape((5,6))
# print(arr02)
# print('=================')
#
# arr03 = arr01.reshape((5,2,3))
# print(arr03)
# print('=========')
# arr1 = np.random.normal(1.5,0.2,(4,5))
#
# print(arr1)
# print('-'*30)
#
# print(arr1[1])
# print('-'*30)
#
# print(arr1[0:2])
# print('-'*30)
#
# print(arr1[0:2, 3])
# print('-'*30)
#
# # 0 到 2 的数组 中的 1列到3列
# print(arr1[0:2, 1:3])
# print('-'*30)
#
# print(arr1[0][1])
# print('-'*30)
# arr1 = np.arange(24).reshape((4,6))
# print(arr1)
#
# # numpy 三元运算
# arr2 = np.where(arr1 < 10, 0, 10)
# print(arr2)
# [[ 0 1 2 3 4 5]
# [ 6 7 8 9 10 11]
# [12 13 14 15 16 17]
# [18 19 20 21 22 23]]
# [[ 0 0 0 0 0 0]
# [ 0 0 0 0 10 10]
# [10 10 10 10 10 10]
# [10 10 10 10 10 10]]
###
### 数组间的运算
#
# a1 = np.array([[1, 2, 3], [4, 5, 6]])
# a2 = np.array([[5, 10, 15], [5, 6, 7]])
#
# print(a1 + a2)
# a = np.array([6,15,25])
# print(a.mean())
#
# print(np.average(a, weights=[1,2,3]))
# a = np.array ([
# [80, 86], [82, 80], [85, 78], [90, 90], [86, 82], [82, 90], [78, 80],
# [92, 94]
#
# ])
# print(np.max(a))
# print(np.min(a))
# print(np.mean(a)) # 平均数
# print(np.mean(a, axis=0)) # 平均值
# print(np.max(a, axis=1))
数值分析库 Pandas
1. 读取csv 文件
import pandas as pd
content_csv = pd.read_csv('./IMDB-Movie-Data.csv')
print(content_csv)
2. 数据处理
import pandas as pd
import numpy as np
#
# s1 = pd.Series([1,2,3,4,5])
#
# arr1 = np.arange(10)
#
# print(pd.Series(arr1))
# print('-------------')
# print(pd.Series([[3,45,6],['hehe','a']]))
# print('-------------')
#
#
# # 取出数据 切片
# print(s1[2])
# print('-'*30)
# print(s1[2:4])
# print('-'*30)
# # 自定义索引
# s2 = pd.Series([1,23,4,5],[10,'a','b','c'])
# print(s2)
# print(s2[10])
# print(s2['a'])
# print(s2['b'])
#
#
#
# csv文件读取
s1 = pd.read_csv('./dogNames2.csv')
# print(s1.shape)
# print('---------------')
# print(s1.dtypes)
# print('-----------')
# print(s1.ndim) # 维度
# print('------------')
# print(s1.index) # 行索引
# print('------------')
# print(s1.columns) # 列索引
# print('------------')
# print(s1.values) # 值索引
#
#
#
# s1.head(3) # 显示头部几行,默认5行
# s1.tail(3) # 显示末尾几行,默认5行
#
# s1.info() # 相关信息概览:行数,列数,列索引,列非空值个数,列类型,列类型,内存占用
#
# s1.describe() #快速综合统计结果:计数,均值,标准差,最大值,四分位数,最小s1
### ======================
### 过滤 判断
print(s1['Count_AnimalName'] > 800)
print('-------------')
# 获取结果为 true 名字超过800
print(s1[s1['Count_AnimalName'] > 800])
# 排序 处理
print(s1.sort_values('Count_AnimalName'))
# ascending=False 从大到小
print('------------')
print(s1.sort_values('Count_AnimalName', ascending=False))
获取nan 打印type 类型
import pandas as pd
# 读取csv文件 得到DataFrame对象
s1 = pd.read_csv('./IMDB-Movie-Data.csv')
print(s1['Revenue (Millions)'][7]) # 获取一个nan数据
print(type(s1['Revenue (Millions)'][7])) #<class 'numpy.float64'> 注意nan是float类型
替换缺失值 为Nan
import pandas as pd
import numpy as np
# 读取csv文件 获取Dataframe 对象
s1 = pd.read_csv('./breast.data')
s2 = s1.replace(to_replace='?', value=np.nan) #把数据替换成nan
print(s2[20:34])
groupby分组
import pandas as pd
import numpy as np
# content_csv = pd.read_csv('./IMDB-Movie-Data.csv')
#
# print(content_csv)
df = pd.DataFrame({
'name': ['张三','李四','王五','李四','王五','王五','赵六'],
'chinese':np.random.randint(35,100,7),
'math':np.random.randint(35,100,7),
'english':np.random.randint(35,100,7),
'test': ['一','一','一','二','二','三','一']
})
# print(df)
### groupby 分组
### =================
# 创建group对象,未进行计算
print(df.groupby('name'))
# 调用函数做分组计算,分组后求每组平均值
# 可将列名或列值当作分组对象,分组中数值列会被聚合,非数值列会从结果中排除
a = df.groupby('name').mean()
# print(a)
# chinese math english
# name
# 张三 69.000000 40.000000 69.000000
# 李四 72.000000 91.000000 49.500000
# 王五 64.333333 60.666667 58.333333
# 赵六 86.000000 88.000000 37.000000
# 如果不想使用分组列作为索引,设置参数as_index=Falsed
b = df.groupby('name',as_index=False).mean()
# print(b)
# name chinese math english
# 0 张三 69.000000 40.000000 69.000000
# 1 李四 72.000000 91.000000 49.500000
# 2 王五 64.333333 60.666667 58.333333
# 3 赵六 86.000000 88.000000 37.000000
# 对多列分组后求平均值
c = df.groupby(['name','chinese']).mean()
c2 = df.groupby(['name','chinese'],as_index=False).mean()
print(c)
math english
# name chinese
# 张三 40 85 67
# 李四 44 40 57
# 84 85 78
# 王五 35 44 56
# 40 80 81
# 65 35 85
# 赵六 39 92 56
print(c2)
#
# name chinese math english
# 0 张三 40 85 67
# 1 李四 44 40 57
# 2 李四 84 85 78
# 3 王五 35 44 56
# 4 王五 40 80 81
# 5 王五 65 35 85
# 6 赵六 39 92 56