import statistics
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
excelFile = r"C:\工作簿.xlsx"
# 读取车位长度列
df = pd.read_excel(excelFile, sheet_name="列名", usecols="AC", names=None)
lengths = df.values.tolist()
result = []
for length in lengths:
if math.isnan(length[0]):
pass
else:
result.append(length[0])
mu = statistics.mean(result) # 均值
print("均值:{0:.2f}".format(mu))
sigma = statistics.pstdev(result, mu) # 标准差
print("标准差:{0:.2f}".format(sigma))
print("最大值:"+str(max(result)))
print("最小值:"+str(min(result)))
# normfun正态分布函数
# 根据均值、标准差,求指定范围的正态分布概率值
def normfun(x, mu, sigma):
pdf = np.exp(-((x - mu) ** 2) / (2 * sigma ** 2)) / (sigma * np.sqrt(2 * np.pi))
return pdf
# 计算置信区间
def calculate_conf_intveral(num, mu, sigma):
conf_intveral = stats.norm.interval(num, mu, sigma)
print("{0:.0f}%置信区间范围:{1:.2f}~{2:.2f}".format(num * 100, conf_intveral[0], conf_intveral[1]))
return conf_intveral
# 设置表标题、x轴标签、y轴标签
def set_title_and_label(title, xlabel, ylabel):
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
conf1 = calculate_conf_intveral(0.5, mu, sigma)
conf2 = calculate_conf_intveral(0.6, mu, sigma)
conf3 = calculate_conf_intveral(0.7, mu, sigma)
conf4 = calculate_conf_intveral(0.8, mu, sigma)
conf5 = calculate_conf_intveral(0.85, mu, sigma)
conf6 = calculate_conf_intveral(0.9, mu, sigma)
conf7 = calculate_conf_intveral(0.95, mu, sigma)
conf8 = calculate_conf_intveral(0.99, mu, sigma)
# # 60%置信区间
# conf_intveral1 = stats.norm.interval(0.6, loc=mu, scale=sigma)
# print("60%置信区间范围:{0:.2f}~{1:.2f}".format(conf_intveral1[0], conf_intveral1[1]))
# # 70%置信区间
# conf_intveral2 = stats.norm.interval(0.7, loc=mu, scale=sigma)
# print("70%置信区间范围:{0:.2f}~{1:.2f}".format(conf_intveral2[0], conf_intveral2[1]))
# # 80%置信区间
# conf_intveral3 = stats.norm.interval(0.8, loc=mu, scale=sigma)
# print("80%置信区间范围:{0:.2f}~{1:.2f}".format(conf_intveral3[0], conf_intveral3[1]))
# # 85%置信区间
# conf_intveral4 = stats.norm.interval(0.85, loc=mu, scale=sigma)
# print("85%置信区间范围:{0:.2f}~{1:.2f}".format(conf_intveral4[0], conf_intveral4[1]))
# # 90%置信区间
# conf_intveral5 = stats.norm.interval(0.9, loc=mu, scale=sigma)
# print("90%置信区间范围:{0:.2f}~{1:.2f}".format(conf_intveral5[0], conf_intveral5[1]))
# # 95%置信区间
# conf_intveral6 = stats.norm.interval(0.95, loc=mu, scale=sigma)
# print("95%置信区间范围:{0:.2f}~{1:.2f}".format(conf_intveral6[0], conf_intveral6[1]))
# # 99%置信区间
# conf_intveral7 = stats.norm.interval(0.99, loc=mu, scale=sigma)
# print("99%置信区间范围:{0:.2f}~{1:.2f}".format(conf_intveral7[0], conf_intveral7[1]))
# # print(conf_intveral)
x = np.arange(min(result), max(result), 1)
y = normfun(x, mu, sigma)
print(type(y))
# 绘制正态分布曲线
plt.plot(x, y, linewidth=2)
# 绘制柱状图
# hist(数据,组数,柱体颜色,颜色深浅,)
plt.hist(result, bins=int((max(result) - min(result)) / 100 + 1), rwidth=0.95, density=True, stacked=True) # 需要修改的地方
# 50%置信区间线
plt.vlines(conf1[0], 0, 0.0025, colors="dimgray", linestyles='--', label="50%")
plt.vlines(conf1[1], 0, 0.0025, colors="dimgray", linestyles='--')
# 60%置信区间线
# plt.vlines(conf2[0], 0, 0.0025, colors="lightgreen", linestyles='--', label="60%")
# plt.vlines(conf2[1], 0, 0.0025, colors="lightgreen", linestyles='--')
# 70%置信区间线
# plt.vlines(conf3[0], 0, 0.0025, colors="tomato", linestyles='--', label="70%")
# plt.vlines(conf3[1], 0, 0.0025, colors="tomato", linestyles='--')
# 80%置信区间线
plt.vlines(conf4[0], 0, 0.0025, colors="cyan", linestyles='--', label="80%")
plt.vlines(conf4[1], 0, 0.0025, colors="cyan", linestyles='--')
# 95%置信区间线
# plt.vlines(conf7[0], 0, 0.0025, colors="navy", linestyles='--', label="95%")
# plt.vlines(conf7[1], 0, 0.0025, colors="navy", linestyles='--')
# 99%置信区间线
plt.vlines(conf8[0], 0, 0.0025, colors="violet", linestyles='--', label="99%")
plt.vlines(conf8[1], 0, 0.0025, colors="violet", linestyles='--')
plt.legend() # 显示vlines的标签
set_title_and_label("表名", "x轴标签", "y轴标签") # 需要修改的地方
plt.show()
读取表格数据绘制正态分布图
于 2022-03-23 17:44:46 首次发布