def fit_model(data,dist):
from scipy import stats
from fitter import Fitter
f = Fitter(data,timeout=100,distributions=[dist])
f.fit()
return f.get_best()
def run_():
from scipy import stats
import matplotlib.pyplot as plt
data = stats.gamma.rvs(2, loc=1.5, scale=2, size=10000)
from fitter import Fitter
f = Fitter(data, timeout=60)
f.fit()
print(f.summary())
plt.show()
print('最佳分布:',f.get_best())
def run():
from pylab import linspace, plot
import scipy.stats
dist = scipy.stats.gamma
param = (1.9870, 1.5026, 2.0174)
X = linspace(0, 10, 10)
pdf_fitted = dist.pdf(X, *param)
plot(X, pdf_fitted, 'o-')
if __name__ == '__main__':
run_()
"""偏度计算,判断数据分布是否存在长尾"""
import numpy as np
def sample_skewness(data):
"""样本偏度"""
mean = np.mean(data)
mid = np.median(data)
std = np.std(data)
if std==0:
return 0
skewness = 3*(mean-mid)/std
return skewness
def third_central_moment(data):
"""三阶中心矩计算"""
mean = np.mean(data)
std = np.std(data)
data_list = [((x-mean)/std)**3 for x in data]
skewness = np.mean(data_list)
return skewness
if __name__ == '__main__':
data = [1,2,3,4,2,1]
print(sample_skewness(data),third_central_moment(data))
import numpy as np
from fit import skewness as sk
from fit import prodist as dt
def dist(data,buff):
'''
拟合data,得到的最佳的概率分布函数
:param data: 待拟合分布的数据
:param buff: 偏度阈值,在偏度范围内拟合高斯分布,之外拟合gamma分布
:return: 返回概率分布,字典方式
'''
skew = sk.sample_skewness(data)
print("skew:",skew)
dtb = None
if np.absolute(skew)>=buff:
"gammma or 泊松分布 拟合"
dtb = dt.fit_model(data,"gamma")
else:
"高斯分布拟合"
dtb = dt.fit_model(data,"norm")
print(dtb)
return dtb
def quantile(dist):
'''
返回分布的分位数
:param dist: 分布
:return: 分位数
'''
from scipy.stats import gamma,norm
upper_quantitle = 0
if dist is not None:
dist_name = list(dist.keys())[0]
if dist_name == "gamma":
values = dist["gamma"]
a = values['a']
loc = values['loc']
scale = values['scale']
upper_quantitle = gamma.ppf(0.9,a=a,scale=scale)
elif dist_name == "norm":
values = dist["norm"]
loc = values['loc']
scale = values['scale']
upper_quantitle = norm.ppf(0.9,loc,scale)
print(upper_quantitle)
return upper_quantitle
if __name__ == "__main__":
from scipy import stats
data = stats.gamma.rvs(2, loc=1.5, scale=2, size=10000)
dtb = dist(data,0.5)
quantile(dtb)