本文基于python介绍若干种图的画法~
以下所用到的数据均出自2021年长三角数学建模比赛,想要实验的可从下面的百度网盘链接获取
链接:https://pan.baidu.com/s/1Jl7ljHuJvGbusDUS2ODd6w?pwd=g4rv
提取码:g4rv
目录
在介绍绘制图之前,先记录一下一些小操作:
一、支持画图标题里出现中文
matplotlib.rc("font", family='Microsoft YaHei')
二、修改画布大小
plt.figure(figsize=(20, 14))
plt.style.use('ggplot')
三、存的好看颜色
color = ['LightSkyBlue','RoyalBlue','MediumPurple','Lavender']
colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', '#c2c2f0', '#ffb3e6', '#ffdd99', '#ff6666', '#c2f0c2', '#e6ac00']
plt.hist(x,bins=50,density=True,color='SkyBlue',edgecolor='b',alpha=0.6)
四、横坐标简单变幻(角度和字号)
plt.xticks(rotation = 20,fontsize = 6)
五、横坐标字符标签
ind = np.arange(0,12)
plt.xticks(ind, xname)
plt.text(x,y,'标签')
1、散点图:
plt.scatter(ind, ydata, width,color = 'r')
用例:
import pandas as pd
from pandas import DataFrame
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
infodata = pd.read_csv('data.csv',low_memory = False)
types = infodata['类型'].tolist()
school_level = infodata['办学层次'].tolist()
subject_level = infodata['学科水平'].tolist()
resources = infodata['办学资源'].tolist()
typelst = list(set(types))
level = []
for i in range(0,len(typelst)):
Sum = 0
num = 0
for j in range(0,len(types)):
if typelst[i]==types[j]:
Sum = Sum + school_level[j]
num = num + 1
level.append(Sum/num)
matplotlib.rc("font", family='Microsoft YaHei') #中文
xdata = typelst
ydata = level
ind = np.arange(len(level))
width = 10
plt.ylabel('办学层次')
plt.title('不同类型学校办学层次散点图')
plt.scatter(ind, ydata, width,color = 'r')
plt.xticks(ind, xdata)
plt.show()
2、柱状图
plt.bar(ind, ydata, width,color = 'RoyalBlue')
plt.barh(ind, ydata, width,color = 'RoyalBlue') #横向
多图绘制
fig,axes = plt.subplots(ncols = 2)
v_bars = axes[0].bar(x,y,color = 'red')
v_bars = axes[0].barh(x,y,color = 'red')
axes[0].axhline(0,color = 'grey',linewidth = 2)
多颜色绘制
fig,ax = plt.subplots()
v_bars = ax.bar(x,y,color = 'lightblue')
for bar,height in zip(v_bars,y):
if height < 0:
bar.set(edgecolor = 'darkred',color = 'red',linewidth = 3)
>>填充
fig,ax = plt.subplots()
ax.fill_between(x,y,color = 'lightblue')
用例:
import pandas as pd
from pandas import DataFrame
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
infodata = pd.read_csv('data.csv',low_memory = False)
types = infodata['类型'].tolist()
school_level = infodata['办学层次'].tolist()
subject_level = infodata['学科水平'].tolist()
resources = infodata['办学资源'].tolist()
typelst = list(set(types))
level = []
for i in range(0,len(typelst)):
Sum = 0
num = 0
for j in range(0,len(types)):
if typelst[i]==types[j]:
Sum = Sum + subject_level[j]
num = num + 1
level.append(Sum/num)
matplotlib.rc("font", family='Microsoft YaHei') #中文
xdata = typelst
ydata = level
ind = np.arange(len(level))
width = 0.35
plt.ylabel('学科水平')
plt.title('不同类型学校学科水平柱状图')
plt.bar(ind, ydata, width,color = 'RoyalBlue')
plt.xticks(ind, xdata)
plt.show()
3、折线图_简略
plt.plot(xdata, ydata, 'b-', alpha=0.5, linewidth=1)
plt.title(title[k])
plt.legend()
plt.xticks(ind, typelst)
plt.show()
4、饼状图
import matplotlib.pyplot as plt
import pandas as pd
infodata = pd.read_csv('1_390.csv')
data = infodata['入睡方式'].tolist()
frequency = {}
for num in data:
if num in frequency:
frequency[num] += 1
else:
frequency[num] = 1
x = list(frequency.keys())
y = list(frequency.values())
colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', '#c2c2f0', '#ffb3e6', '#ffdd99', '#ff6666', '#c2f0c2', '#e6ac00']
plt.pie(y, labels=x, autopct='%1.1f%%', startangle=140, shadow=True,colors = colors)
plt.axis('equal')
plt.show()
5、动态命名技术
figure_name = '药品' + str(i + 1) + 'LineChart.html'
trace = [go.Scatter (x = date_time, y = soldMonthPrice[i], mode = 'lines+markers', name = 'lines+markers')]
layout = dict(title = '2020-2021月销售总额')
fig = dict(data = trace, layout = layout)
pyplt (fig, filename=figure_name)
用例演示_柱状图_折线图
import pandas as pd
from pandas import DataFrame
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objs as go
infodata = pd.read_csv('data.csv',low_memory = False)
types = infodata['类型'].tolist()
school_level = infodata['办学层次'].tolist()
subject_level = infodata['学科水平'].tolist()
resources = infodata['办学资源'].tolist()
title = ['办学层次','学科水平','办学资源','师资规模与结构','人才培养','科学研究','服务社会','高端人才','重大项目与成果','国际竞争力']
color = ['r-','b-','y-','g-','c-']
solddata = [] #二维数组
matplotlib.rc("font", family='Microsoft YaHei') #中文
for k in range(0,10):
data = infodata[title[k]].tolist()
typelst = list(set(types))
level = []
for i in range(0,len(typelst)):
Sum = 0
num = 0
for j in range(0,len(types)):
if typelst[i]==types[j]:
Sum = Sum + data[j]
num = num + 1
level.append(Sum/num)
solddata.append(level)
figure_name = title[k] + 'lineChart.html'
trace = [go.Scatter (x = typelst, y = solddata[k], mode = 'lines+markers', name = 'lines+markers')]
ind = np.arange(len(typelst))
#折线图
# plt.plot(typelst, solddata[k], color[k % 5], alpha=0.5, linewidth=1, label = title[k])
width = 0.35
p = plt.bar(typelst, solddata[k], width)
plt.title(title[k])
plt.bar_label(p, label_type='edge')
plt.legend()
plt.xticks(ind, typelst)
plt.show()
6、多图技术
灵活运用 plt.show 即可
import pandas as pd
from pandas import DataFrame
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objs as go
infodata = pd.read_csv('data.csv',low_memory = False)
types = infodata['类型'].tolist()
school_level = infodata['办学层次'].tolist()
subject_level = infodata['学科水平'].tolist()
resources = infodata['办学资源'].tolist()
title = ['办学层次','学科水平','办学资源','师资规模与结构','人才培养','科学研究','服务社会','高端人才','重大项目与成果','国际竞争力']
color = ['r-','b-','y-','g-','c-']
solddata = [] #二维数组
matplotlib.rc("font", family='Microsoft YaHei') #中文
for k in range(0,10):
data = infodata[title[k]].tolist()
typelst = list(set(types))
level = []
for i in range(0,len(typelst)):
Sum = 0
num = 0
for j in range(0,len(types)):
if typelst[i]==types[j]:
Sum = Sum + data[j]
num = num + 1
level.append(Sum/num)
solddata.append(level)
figure_name = title[k] + 'lineChart.html'
trace = [go.Scatter (x = typelst, y = solddata[k], mode = 'lines+markers', name = 'lines+markers')]
ind = np.arange(len(typelst))
plt.plot(typelst, solddata[k], color[k % 5], alpha=0.5, linewidth=1, label = title[k])
plt.legend()
plt.xticks(ind, typelst)
if k%5 == 4:
plt.show()
7、层叠柱状图
plt.rcParams['figure.figsize']=10,6
ind = np.arange(5)
width = 0.35
ydata1 = solddata[0]
ydata2 = solddata[1]
ydata3 = solddata[2]
ydata4 = solddata[3]
ydata5 = solddata[4]
xdata = typelst
p1 = plt.bar(ind, ydata1, width, color='LightSkyBlue')
p2 = plt.bar(ind, ydata2, width, color='RoyalBlue', bottom=ydata1)
p3 = plt.bar(ind, ydata3, width, color='MediumPurple', bottom=np.sum([ydata1, ydata2], axis=0).tolist())
p4 = plt.bar(ind, ydata4, width, color='Lavender', bottom=np.sum([ydata1, ydata2, ydata3], axis=0).tolist())
p5 = plt.bar(ind, ydata5, width, color='LightPink', bottom=np.sum([ydata1, ydata2, ydata3, ydata4], axis=0).tolist())
plt.ylabel('累计总量')
plt.title('办学层次、学科水平、办学资源、师资规模与结构和人才培养层叠柱状图')
plt.xticks(ind, xdata)
plt.legend((p1[0], p2[0], p3[0], p4[0], p5[0] ), (title[0],title[1],title[2],title[3],title[4]))
plt.show()
用例:
import pandas as pd
from pandas import DataFrame
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objs as go
infodata = pd.read_csv('data.csv',low_memory = False)
types = infodata['类型'].tolist()
school_level = infodata['办学层次'].tolist()
subject_level = infodata['学科水平'].tolist()
resources = infodata['办学资源'].tolist()
title = ['办学层次','学科水平','办学资源','师资规模与结构','人才培养','科学研究','服务社会','高端人才','重大项目与成果','国际竞争力']
color = ['r-','b-','y-','g-','c-']
solddata = [] #二维数组
matplotlib.rc("font", family='Microsoft YaHei') #中文
for k in range(0,10):
data = infodata[title[k]].tolist()
typelst = list(set(types))
level = []
for i in range(0,len(typelst)):
Sum = 0
num = 0
for j in range(0,len(types)):
if typelst[i]==types[j]:
Sum = Sum + data[j]
num = num + 1
level.append(Sum/num)
solddata.append(level)
plt.rcParams['figure.figsize']=10,6
ind = np.arange(5)
width = 0.35
ydata1 = solddata[0]
ydata2 = solddata[1]
ydata3 = solddata[2]
ydata4 = solddata[3]
ydata5 = solddata[4]
xdata = typelst
p1 = plt.bar(ind, ydata1, width, color='LightSkyBlue')
p2 = plt.bar(ind, ydata2, width, color='RoyalBlue', bottom=ydata1)
p3 = plt.bar(ind, ydata3, width, color='MediumPurple', bottom=np.sum([ydata1, ydata2], axis=0).tolist())
p4 = plt.bar(ind, ydata4, width, color='Lavender', bottom=np.sum([ydata1, ydata2, ydata3], axis=0).tolist())
p5 = plt.bar(ind, ydata5, width, color='LightPink', bottom=np.sum([ydata1, ydata2, ydata3, ydata4], axis=0).tolist())
plt.ylabel('累计总量')
plt.title('办学层次、学科水平、办学资源、师资规模与结构和人才培养层叠柱状图')
plt.xticks(ind, xdata)
plt.legend((p1[0], p2[0], p3[0], p4[0], p5[0] ), (title[0],title[1],title[2],title[3],title[4]))
plt.show()
8、相关热力图
infodata = pd.read_csv('1_390_pro.csv')
# 提取特征
features = ['母亲年龄', '教育程度', '妊娠时间', 'CBTS', 'EPDS', 'HADS', '婴儿行为特征', '整晚睡眠时间', '睡醒次数', '入睡方式']
# 计算Spearman相关系数矩阵
correlation_matrix = infodata[features].corr(method='spearman')
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
sns.set(font_scale=1.2)
sns.set_style("whitegrid")
plt.show()
用例:
import pandas as pd
from pandas import DataFrame
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objs as go
import seaborn as sns
infodata = pd.read_csv('data.csv',low_memory = False)
types = infodata['类型'].tolist()
school_level = infodata['办学层次'].tolist()
subject_level = infodata['学科水平'].tolist()
resources = infodata['办学资源'].tolist()
title = ['办学层次','学科水平','办学资源','师资规模与结构','人才培养','科学研究','服务社会','高端人才','重大项目与成果','国际竞争力','综合评分']
color = ['r-','b-','y-','g-','c-']
solddata = [] #二维数组
matplotlib.rc("font", family='Microsoft YaHei') #中文
for k in range(0,11):
data = infodata[title[k]].tolist()
typelst = list(set(types))
level = []
for i in range(0,len(typelst)):
Sum = 0
num = 0
for j in range(0,len(types)):
if typelst[i]==types[j]:
Sum = Sum + data[j]
num = num + 1
level.append(Sum/num)
solddata.append(level)
data = {'办学层次':solddata[0],'学科水平':solddata[1],'办学资源':solddata[2],'师资规模与结构':solddata[3],'人才培养':solddata[4],'科学研究':solddata[5],'服务社会':solddata[6],'高端人才':solddata[7],'重大项目与成果':solddata[8],'国际竞争力':solddata[9],'综合评分':solddata[10]}
df = DataFrame(data)
df_corr = df.corr()
plt.figure(figsize=(20, 14))
plt.title('相关性热力图')
fig = sns.heatmap(df_corr, annot=True, fmt='.2g', cmap="YlGnBu")
plt.show()
plt.savefig("heatmap.png")
9、箱型图绘制
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
from matplotlib.cm import get_cmap
matplotlib.rc("font", family='Microsoft YaHei')
data = pd.read_csv('test.csv')
filename = '箱型图'
xname = data.columns
data_box_list = []
labels = []
for i in range(0, len(xname)):
value = data[xname[i]].tolist()
data_box_list.append(value)
labels.append(xname[i])
cmap = get_cmap('plasma')
num_colors = len(labels)
colors = [cmap(i / num_colors) for i in range(num_colors)]
bplot = plt.boxplot(data_box_list, patch_artist=True, labels=labels)
for patch, color in zip(bplot['boxes'], colors):
patch.set_facecolor(color)
plt.title(filename)
plt.grid(True)
plt.show()
不同数据量的箱型图绘制(附加各个值的计算)
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
def BoxFeature(input_list):
percentile = np.percentile(input_list, (25, 50, 75), interpolation='linear')
Q1 = percentile[0]
Q2 = percentile[1]
Q3 = percentile[2]
IQR = Q3 - Q1
ulim = Q3 + 1.5*IQR
llim = Q1 - 1.5*IQR
right_list = []
Error_Point_num = 0
value_total = 0
average_num = 0
for item in input_list:
if item < llim or item > ulim:
Error_Point_num += 1
else:
right_list.append(item)
value_total += item
average_num += 1
average_value = value_total/average_num
out_list = [average_value,min(right_list), Q1, Q2, Q3, max(right_list), Error_Point_num]
return out_list
matplotlib.rc("font", family='Microsoft YaHei')
data = pd.read_csv('第四问1_390(1).csv')
xname = data.columns
data_box_list = pd.DataFrame()
labels = []
for i in range(0,len(xname)):
value = data[xname[i]].values
data_box_list[xname[i]] = value
labels.append(xname[i])
fig, ax = plt.subplots()
boxprops = dict(facecolor='#ff9999')
bplot = data_box_list.boxplot(patch_artist=True, labels=labels, boxprops=boxprops)
for i in range(len(xname)):
t = data[xname[i]].dropna()
tmp = BoxFeature(t)
Q1 = tmp[2]
Q3 = tmp[4]
ax.text(i + 1, Q1, f'Q1: {Q1:.2f}', ha='center', va='top', color='b', fontsize=8)
ax.text(i + 1, Q3, f'Q3: {Q3:.2f}', ha='center', va='bottom', color='b', fontsize=8)
plt.grid(True)
plt.show()