给定一维随机分布生成随机数
假设概率分布为f(x)
,其CDF为g(x)
,我们不加证明的给出,产生[0, 1)
之间的随机数,并将其输入g(x)
的反函数,得到的值服从f(x)
分布,实现的代码如下:
import numpy as np
from scipy.interpolate import interp1d, UnivariateSpline
from scipy.integrate import cumtrapz, trapz
#import matplotlib.pyplot as plt
def get_inv_cdf(f, x=None, x_range=None):
'''
Get inverse of CDF given PDF f(x)
f: PDF, no need for normalization
x: None or array_like, input x to evaluate f, if None, use xmin, xmax and npoints to generate x
x_range: None or array_like with length 3, [xmin, xmax, npoints]
xmin: the minimum for x to evaluate f, f(x)\approx 0 for x<xmin
xmax: the maximum for x to evaluate f, f(x)\approx 0 for x>xmax
npoints: number of points to evaluate f
return: callable
inverse of CDF
'''
assert x is not None or x_range is not None, 'Input x or x_range'
if x is None:
x = np.linspace(*x_range)
else:
x = np.asarray(x)
y = f(x)
cdf = cumtrapz(y, x)
cdf = np.append(0, cdf)
dcdf = np.diff(cdf)
dcdf = np.append(0, dcdf)
cdf = cdf[dcdf>0]
# print(np.diff(cdf).min())
x = x[dcdf>0]
cdf = cdf/cdf[-1]
invcdf = UnivariateSpline(cdf, x, k=3, s=0, ext=3)
# plt.figure()
# plt.plot(x, cdf)
return invcdf
def get_rvs(f, x=None, x_range=None):
'''
Get random variable sampler given PDF f(x)
f: PDF, no need for normalization
x: None or array_like, input x to evaluate f, if None, use xmin, xmax and npoints to generate x
x_range: None or array_like with length 3, [xmin, xmax, npoints]
xmin: the minimum for x to evaluate f, f(x)\approx 0 for x<xmin
xmax: the maximum for x to evaluate f, f(x)\approx 0 for x>xmax
npoints: number of points to evaluate f
return: rvs, callable
random variable sampler, usage is the same as np.random.rand
'''
invcdf = get_inv_cdf(f=f, x=x, x_range=x_range)
def _rvs(*args, **kwargs):
return invcdf(np.random.rand(*args, **kwargs))
return _rvs
if __name__ == '__main__':
import matplotlib.pyplot as plt
def normalize(x, y):
x = np.asarray(x)
y = np.asarray(y)
norm = trapz(y, x)
return y/norm
def pdf(xin):
xin = np.asarray(xin)
result = np.zeros_like(xin)
x = xin[(xin>=1)&(xin<3)]
result[(xin>=1)&(xin<3)] = x + 2*x**2 + x**3/2.
return result
x = np.linspace(-2, 5, 1001)
y = pdf(x)
inv_cdf = get_inv_cdf(pdf, x_range=[-3, 6, 1001])
rvs = get_rvs(pdf, x_range = [-3, 6, 1001])
rvs2 = get_rvs(pdf, x = np.logspace(-2, 1.2, 1001))
plt.figure()
plt.hist(rvs(100000), bins=100, alpha=0.5, density=True)
plt.plot(x, normalize(x, y))
plt.figure()
plt.hist(rvs2(100000), bins=100, alpha=0.5, density=True)
plt.plot(x, normalize(x, y))
plt.figure()
plt.hist(inv_cdf(np.random.rand(100000)), bins=100, alpha=0.5, density=True)
plt.plot(x, normalize(x, y))
plt.show()
def gaus(x, scale, mean):
return np.exp(-(x-mean)**2/2./scale**2)/np.sqrt(2*np.pi*scale**2)
s1 = np.random.normal(scale=1.23, loc=0.32, size=100000)
s2 = get_rvs(lambda x:gaus(x, 1.23, 0.32), x_range=[-10,10,1001])(100000)
x = np.linspace(-12, 12, 1001)
plt.plot(x, gaus(x, 1.23, 0.32))
plt.hist(s1, bins=200, density=True, alpha=0.5)
plt.hist(s2, bins=200, density=True, alpha=0.5)
plt.show()