写在前面:
1.数据来源均为2023年华为杯数学竞赛C题,不方便放数据,因此贴图
2.图的名字分不清,有点乱
图的样式和目录对应↓
![对应](https://i-blog.csdnimg.cn/blog_migrate/7fd5ccf87d9fe2085c460db2a7552407.jpeg)
1.直方图
#数据形式
#目标
希望画出一个专家的均值与标准分分布,并展示在一张图内
#代码
import numpy as np
import pandas as pd
import ast
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = [u'SimHei'] # 中文字体可修改
mpl.rcParams['axes.unicode_minus'] = False
path= 'your data.xlsx'
data = pd.read_excel(path,decimal=",",sheet_name=1)
experts = set(data["专家编码"]) #16个专家
#遍历
dict_origin = defaultdict(list)
dict_standard = defaultdict(list)
for row in data.itertuples():
code = row[1]
origin = eval(row[2])
standard = ast.literal_eval(row[3])
dict_origin[code]=origin
#只能用列表存,原始分可能相同
dict_standard[code]=standard
# 所有的都归类,每位专家所评作品的所有评分:原始+标准
# 画图
bins = np.linspace(0,100,50)
def double_hist(ax,data1,data2,expert):
#data1原始分,data2标准分
ax.hist(data1,bins,alpha=0.6,color='r')
ax.hist(data2,bins,alpha=0.6,color='g')
ax.set_title(expert,fontsize=12)
ax.tick_params(axis='x', labelsize=9)
ax.tick_params(axis='y', labelsize=9)
fig,ax = plt.subplots()
double_hist(ax,dict_origin['P464'],data2 = dict_standard['P464'],expert='P464')
plt.show()
#效果
2.直方图-多张1
#数据形式
#目标
希望画出多个专家的均值与标准分分布,并展示在一张图内
#代码
## 每位专家的打分特点
import numpy as np
import pandas as pd
import ast
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = [u'SimHei'] # 中文字体可修改
mpl.rcParams['axes.unicode_minus'] = False
path= 'your data.xlsx'
data = pd.read_excel(path,decimal=",",sheet_name=1)
experts = set(data["专家编码"]) #16个专家
#遍历
dict_origin = defaultdict(list)
dict_standard = defaultdict(list)
for row in data.itertuples():
code = row[1]
origin = eval(row[2])
standard = ast.literal_eval(row[3])
dict_origin[code]=origin
#只能用列表存,原始分可能相同
dict_standard[code]=standard
# 所有的都归类,每位专家所评作品的所有评分:原始+标准
# 画图
bins = np.linspace(0,100,50)
def double_hist(ax,data1,data2,expert):
#data1原始分,data2标准分
ax.hist(data1,bins,alpha=0.6,color='r')
ax.hist(data2,bins,alpha=0.6,color='g')
ax.set_title(expert,fontsize=12) #每个图的标题文字大小调整
ax.tick_params(axis='x', labelsize=9)
ax.tick_params(axis='y', labelsize=9)
# 子图位置的序列
fig1,axes1 = plt.subplots(4,4)
axes1_list = []
for i in range(axes1.shape[0]):
for j in range(axes1.shape[1]):
axes1_list.append(axes1[i, j])
experts = list(experts)
for i in range(16):
ax1= axes1_list[i]
expert = experts[i]
data1 = dict_origin[expert]
data2 = dict_standard[expert]
double_hist(ax1,data1,data2,expert)
#fig,axes = plt.subplots(2,3)
plt.tight_layout()
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, \
wspace=None, hspace=0.45)
plt.show()
#plt.savefig("your photo name.png",bbox_inches='tight')
#效果
3.直方图多张2
#数据形式
下表对应该行代码
data1 = pd.read_excel(path,decimal=“,”,sheet_name=2)
#代码
## 每位专家的打分特点
import numpy as np
import pandas as pd
import tqdm
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib as mpl
import pylab as pl
from scipy.stats import norm
import seaborn as sns
import mpl_toolkits.axisartist as axisartist
mpl.rcParams['font.sans-serif'] = [u'SimHei'] # 中文字体可修改
mpl.rcParams['axes.unicode_minus'] = False
path= 'your data.xlsx'
data1 = pd.read_excel(path,decimal=",",sheet_name=2)
data2 = pd.read_excel(path,decimal=",",sheet_name=3)
data3 = pd.read_excel(path,decimal=",",sheet_name=4)
data4 = pd.read_excel(path,decimal=",",sheet_name=5)
data5 = pd.read_excel(path,decimal=",",sheet_name=6)
data = pd.concat([data1,data2,data3,data4,data5],axis=0)
experts = set(data["专家编码"]) #97个专家
#遍历
dict_origin = defaultdict(list)
dict_standard = defaultdict(list)
dict_delhl = defaultdict(list)
for row in data.itertuples():
code = row[2]
origin = row[3]
standard = row[4]
delhl = row[5]
dict_origin[code].append(origin)
#只能用列表存,原始分可能相同
dict_standard[code].append(standard)
dict_delhl[code].append(delhl)
# 所有的都归类,每位专家所评作品的所有评分:原始+标准
# 画图
bins = np.linspace(0,100,50)
def double_hist(ax,data1,data2,expert):
#data1原始分,data2标准分
ax.hist(data1,bins,alpha=0.6,color='r')
ax.hist(data2,bins,alpha=0.6,color='g')
ax.set_title(expert,fontsize=6)
ax.tick_params(axis='x', labelsize=6)
ax.tick_params(axis='y', labelsize=6)
# 子图位置的序列
fig1,axes1 = plt.subplots(2,3)
#fig2,axes2 = plt.subplots(2,3)
axes1_list = []
for i in range(axes1.shape[0]):
for j in range(axes1.shape[1]):
axes1_list.append(axes1[i, j])
axes2_list = axes1_list
experts = list(experts)
count=0
while count<=5:
ax1= axes1_list[count]
ax2 = axes2_list[count]
expert = experts[count]
count = count+1
data1 = dict_origin[expert]
data2 = dict_standard[expert]
data3 = dict_delhl[expert]
#double_hist(ax1,data1,data2,expert)
double_hist(ax2,data1,data3,expert)
#fig,axes = plt.subplots(2,3)
#fig.delaxes(axes[1][2])
#fig.delaxes(axes[32][3-1])
# fig1.suptitle(u"每位专家的原始分和标准分分布")
# plt.savefig("Problem2.1-每位专家表1.png",bbox_inches='tight')
fig1.suptitle(u"每位专家的原始分和去高去低分分布")
plt.show()
#plt.savefig("Problem2.1-每位专家2表1.png",bbox_inches='tight')
#效果
4.大概是折线
#数据形式
#目标
希望画出多个专家的均值与标准分分布,并展示在一张图内
#代码
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import mpl_toolkits.axisartist as axisartist
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = [u'SimHei'] # 中文字体可修改
mpl.rcParams['axes.unicode_minus'] = False
# 每份作品:实际计算每个作品
path = 'your data.xlsx'
data = pd.read_excel(path,sheet_name=7)
#data = np.array(data)
df1 = data.loc[:,(u'原始分均值',u'去高去低平均分')]
fig = plt.figure(figsize=(7,4))
ax1 = axisartist.Subplot(fig,111)
fig.add_axes(ax1)
ax1.axis['right'].set_visible(False)
ax1.axis['top'].set_visible(False)
ax1.axis["bottom"].set_axisline_style("->", size = 1.5)
ax1.axis["left"].set_axisline_style("->", size = 1.5)
x = range(2015)
ax1.plot(x,data[u'原始分均值'],alpha=0.7,linestyle='-.',color='#b03d26',label="原始分均值")
ax1.plot(x,data[u'标准分均值'],alpha=0.5,linestyle='-.',color='#005f81',label='标准分均值')
plt.legend(loc='upper right',fontsize='small')
plt.ylabel("分数",fontsize=12)
plt.grid(axis='y')
#plt.show()
plt.savefig("原始分与标准分均值.png",bbox_inches='tight')
#效果
5.直方+分布拟合1
#数据形式
#代码
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib as mpl
import pylab as pl
from scipy.stats import norm
import seaborn as sns
import mpl_toolkits.axisartist as axisartist
mpl.rcParams['font.sans-serif'] = [u'SimHei'] # 中文字体可修改
mpl.rcParams['axes.unicode_minus'] = False
###第二阶段,一等奖二等奖三等奖分布
path = 'your data.xlsx'
data = pd.read_excel(path,sheet_name=0)
df1 = data[data["奖项"]=="一等奖"]
df11 = df1['均值']
df12 = df1['极差']
df2 = data[data["奖项"]=="二等奖"]
df21 = df2['均值']
df22 = df2['极差']
df3 = data[data["奖项"]=="三等奖"]
df31 = df3['均值']
df32 = df3['极差']
df4 = data[data["奖项"]=="未获奖"]
df41 = df3['均值']
df42 = df3['极差']
fig = plt.figure()
ax1 = axisartist.Subplot(fig,111)
fig.add_axes(ax1)
#fig, ax1 = plt.subplots()
ax1.axis['right'].set_visible(False)
ax1.axis['top'].set_visible(False)
ax1.axis["bottom"].set_axisline_style("->", size = 1.5)
ax1.axis["left"].set_axisline_style("->", size = 1.5)
ax2 = ax1.twinx()
mu = np.mean(data['极差'])
sigma = np.std(data['极差'])
bins = np.linspace(0,100,30)
ax1.hist(df11,bins,alpha=0.95,label="一等奖",color='#d9412b')
ax1.hist(df21,bins,alpha=0.6,label="二等奖",color='#c7c7c7')
ax1.hist(df31,bins,alpha=0.6,label="三等奖",color='#278383')
ax1.hist(df41,bins,alpha=0.4,label="未获奖",color='y')
ax1.set_ylabel("获奖人数",fontsize=10)
ax1.set_xlabel("获奖成绩",fontsize=10)
y = norm.pdf(bins,mu,sigma)
ax2.plot(bins,y,"--",color='k',linewidth=2,label='分布拟合图')
fig.legend(loc='upper right',bbox_to_anchor=(1,1),bbox_transform=ax1.transAxes)
plt.title("获奖成绩的均值分布",fontsize=18)
plt.show()
#效果
6.直方+分布拟合2
#数据形式
#代码
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib as mpl
import pylab as pl
from scipy.stats import norm
import seaborn as sns
import mpl_toolkits.axisartist as axisartist
mpl.rcParams['font.sans-serif'] = [u'SimHei'] # 中文字体可修改
mpl.rcParams['axes.unicode_minus'] = False
##第二阶段,一等奖二等奖三等奖均值分布
path = 'your data.xlsx'
data = pd.read_excel(path,sheet_name=0)
df1 = data[data["奖项"]=="一等奖"]
df11 = df1['均值']
df12 = df1['极差']
df2 = data[data["奖项"]=="二等奖"]
df21 = df2['均值']
df22 = df2['极差']
df3 = data[data["奖项"]=="三等奖"]
df31 = df3['均值']
df32 = df3['极差']
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
mu = np.mean(data['均值'])
sigma = np.std(data['均值'])
bins = np.linspace(25,75,50)
ax1.hist(df11,bins,alpha=0.85,label="一等奖",color='#e76254')
ax1.hist(df21,bins,alpha=0.85,label="二等奖",color='#ffd06f')
ax1.hist(df31,bins,alpha=0.85,label="三等奖",color='#72bcd5')
ax1.set_ylabel("获奖人数",fontsize=10)
plt.grid()
y = norm.pdf(bins,mu,sigma)
ax2.plot(bins,y,"--",color='#376795',linewidth=3,label='分布拟合图')
fig.legend(loc='upper right',bbox_to_anchor=(1,1),bbox_transform=ax1.transAxes)
#plt.legend(loc='upper right')
ax1.set_xlabel("获奖成绩",fontsize=10)
#plt.title("获奖成绩的均值分布",fontsize=18)
plt.grid()
plt.show()
#效果
7.双折线图,多图
#数据形式
#代码
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib as mpl
import pylab as pl
from scipy.stats import norm
import seaborn as sns
import mpl_toolkits.axisartist as axisartist
mpl.rcParams['font.sans-serif'] = [u'SimHei'] # 中文字体可修改
mpl.rcParams['axes.unicode_minus'] = False
path = 'your data.xlsx'
data = pd.read_excel(path,sheet_name=0)
df1 = data[['第一阶段标准分均值','第二阶段标准分均值']]
df2 = data[['第一阶段标准分极差','第二阶段标准分极差']]
fig1 =plt.figure()
#ax1 = fig1.add_subplot()
ax1 = plt.subplot(211)
df1.plot(
ax=ax1,
kind='line',
style = '--.',
alpha = 0.5,
use_index = True,
rot = 45,
grid = True,
figsize = (8,6),
title = '均值',
legend = True,
subplots = False, #False是将这4条线绘制到一个图里边。
color=(['#C75C64','#F0B57D']))
ax1.set_ylim([45, 70])
ax2 = plt.subplot(2,1,2)
df2.plot(ax = ax2,
kind='line',
style = '--.',
alpha = 0.5,
use_index = True,
rot = 45,
grid = True,
figsize = (8,6),
title = '极差',
legend = True,
subplots = False, #False是将这4条线绘制到一个图里边。
color=(["#7895C1","#007D7D"]))
ax2.set_ylim([0, 40])
plt.show()
#效果