Python编程--利用ENGS求最佳样本容量n--Bayes-5.18

最新推荐文章于 2022-05-31 21:49:09 发布

小果一粒沙

最新推荐文章于 2022-05-31 21:49:09 发布

阅读量1k

点赞数 1

分类专栏： python 统计学文章标签：贝叶斯统计

本文链接：https://blog.csdn.net/qq_35167821/article/details/109830478

版权

python 同时被 2 个专栏收录

26 篇文章 3 订阅

订阅专栏

统计学

18 篇文章 1 订阅

订阅专栏

（参考课本：《贝叶斯统计》-茆诗松-第一版-5.5最佳样本量的确定
例题5.18-5.19

题目：

某上考虑是否向一县办厂订购一种家用电器（以下简称电器）。该厂生产的电器有一等品和二等品两个等级，一等品与二等品的数量之比有1:1和2:1两种可能，其概率分别为0.45和0.55。

如果买到的是一等品，与一般市场价格相比较，每只可赚10元。
如果买到二等品，每只要亏15元。

假如该厂允许在一批电器中抽取若干只进行检验，根据抽样结果决定是否订购该批（900只）电器。但抽样总的费用为每只20元。这时商店必须考虑多少只最合算？

求上界

$n^* \le \frac{\text{先验}EVPI-C_f}{C_v}$
此题求得为41

求最佳样本量

若对每一个 $\cdots, n^*)$ 值，都算得到 $E N S I (n)$ 和抽样成本 $C (n)$ 。
由1计算得到所有的 $E N G S (n)$ 。
找到2中最大的 $E N G S (n)$ 所对应的 $n$ 即为最佳的样本容量 $n^*$ 。

详细步骤

Step1:计算 $\theta$ 的后验分布

求 $p(x_i|\theta)$ ; $p(x_i|\theta)=\theta^{x_i}(1-\theta)^{1-x_i}$ ，计算出 $x_i$ 的分布。
求 $m(x_i)$ , $m(x_i)=\sum_j p(x_i|\theta_j)\pi(\theta_j)$ 。
求 $p(\theta|x_i)$ , $p(\theta|x_i)=\frac{p(x_i|\theta)\pi(\theta)}{m(x_i)}$

Step2:计算后验信息期望值(后验EVSI)

对每一个行动 $a(a_1, a_2)$ 计算其后验期望损失值 $E^{\theta | x}[L(\theta, a)]$ 。
计算在抽样信息值 $x_1$ 下的后验最优决策函数 $\delta'(x).$
计算后验EVSI的期望值 $E^x\{E^{\theta|x}[L(\theta,a(x))]\}$

Step3:计算抽样信息期望值EVSI

先验EVPI
后验EVPI
计算抽样信息期望值EVSI

$\text{先验}EVPI - \text{后验}EVPI$

Step4: 计算抽样净益ENGS(n)

给出抽样成本 $C (n)$ .
由 $E N G S (n) = E V S I (n) - C (n)$ 计算得到抽样净益ENGS.

python编程实现

import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt



# ==================基本参数==================
PI_THETA = np.array([0.45, 0.55])

THETA = [[1/2, 1/2], [2/3, 1/3]]
THETA = np.array(THETA)

# 固定成本、一等品和二等品的收益亏损情况
C_F = 0
COST = np.array([10, -15])

# sample cost
C_V = 20
# 购进的电器数量
N = 900
# ==================一、计算上界==================

# 收益矩阵
def revenue_mat(COST=COST, THETA=THETA):
    return COST.T @ THETA.T

# Q矩阵
def calc_Qmat(N=N):
    R_mat = revenue_mat()
    R_mat = np.concatenate((R_mat, [0, 0]), axis=0).reshape(2,2).T
    Q_mat = N * R_mat
    return Q_mat

# L矩阵--损失矩阵
def clac_Lmat(Q_mat:np.array):
    col_max = np.max(Q_mat, axis=1, keepdims=True)
    print(col_max)
    L_mat = col_max - Q_mat
    return L_mat

# 先验EVPI
def clac_afore_EVPI(L_mat, PI_THETA=PI_THETA):
    afore_EVPI_mat = PI_THETA.T @ L_mat
    return np.min(afore_EVPI_mat)

# 计算上界
def calc_bound(afore_EVPI, C_V=C_V):
    return np.floor(afore_EVPI / C_V)

# ==================二、计算最佳样本量==================
class ENGS(object):
    def __init__(self, n, L_mat, afore_EVPI, THETA=THETA, PI_THETA=PI_THETA, C_V=C_V):
        self.n = n
        self.L_mat = L_mat
        self.afore_EVPI = afore_EVPI
        self.THETA = THETA
        self.PI_THETA = PI_THETA
        self.C_V = C_V

    # 计算一个分布列
    def calc_pmf(self, p):
        p_li = []
        for i in range(self.n+1):
            p_li.append(stats.binom.pmf(i, self.n, p))
        return p_li

    # x的条件分布|\theta
    def calc_xi_condition_theta(self):
        a_li = []
        for pi in self.THETA:
            a_li.append(self.calc_pmf(pi[1]))
        self.xi_condition_theta = np.array(a_li).T
        return np.array(a_li).T

    # x_i 每个取值的边缘分布
    def calc_mx(self):
        xi_condition_theta = self.calc_xi_condition_theta()
        mx = self.xi_condition_theta @ self.PI_THETA
        self.mx = mx
        return mx

    # \theta的后验分布
    def calc_theta_posterior(self):
        xi_condition_theta = self.calc_xi_condition_theta()
        part_up = xi_condition_theta @ np.diag(self.PI_THETA)
        part_down = self.calc_mx()
        theta_posterior = part_up / part_down.reshape(self.n+1, 1)
        self.theta_posterior = theta_posterior
        return theta_posterior

    # 后验期望损失
    def calc_post_expect_loss(self):
        theta_posterior = self.calc_theta_posterior()
        post_expect_loss = theta_posterior @ self.L_mat
        self.post_expect_loss = post_expect_loss
        return post_expect_loss

    # 后验EVPI期望
    def calc_post_EVPI_expect(self):
        cols = ['theta_1', 'theta_2']
        post_expect_loss = self.calc_post_expect_loss()
        df = pd.DataFrame(data=post_expect_loss)
        df['min_loss_expect'] = np.min(post_expect_loss, axis=1)
        df['delta_x'] = np.argmin(post_expect_loss, axis=1)

        mx = self.calc_mx()
        dada= df['min_loss_expect'] @ mx
        return dada

    # 抽样信息净益
    def calc_EVSI(self, afore_EVPI, post_EVPI):
        self.EVSI = afore_EVPI - post_EVPI
        return afore_EVPI - post_EVPI

    # 抽样净益
    def calc_ENGS(self):
        # return EVSI - C_V*n
        post_expect_loss = self.calc_post_expect_loss()
        post_EVPI_expect = self.calc_post_EVPI_expect()
        EVSI = self.calc_EVSI(self.afore_EVPI, post_EVPI_expect)
        self.ENGS_value = EVSI - C_V*self.n

# 收益矩阵和损失矩阵
Q_mat = calc_Qmat()
L_mat = clac_Lmat(Q_mat)
# 先验EVI
afore_EVPI = clac_afore_EVPI(L_mat)
# 上界
n_star = calc_bound(afore_EVPI)
print(Q_mat)
print(L_mat)
print(afore_EVPI)
print(n_star)

# 可以由这几行，输出你想检查的数据、结果
# sample = ENGS(7, L_mat, afore_EVPI)
# sample.calc_ENGS()

# ==================二、计算最佳样本量--输出==================
def find_best_n(n_star=n_star):
    lala = []
    for i in range(1, int(n_star)+1):
        sample = ENGS(i, L_mat, afore_EVPI)
        sample.calc_ENGS()
        lala.append([i, sample.ENGS_value])
    df = pd.DataFrame(lala, columns=['n', 'ENGS'])
    best_n, best_ENGS = df.sort_values(by='ENGS').iloc[-1]
    # best_sample = ENGS(int(best_n), L_mat, afore_EVPI)
    # best_sample.calc_ENGS()
    # 均检查用 可输出最优ENGS类的所有属性
    # print(best_sample.mx)
    # print(best_sample.theta_posterior)
    # print(best_sample.post_expect_loss)
    return best_n, best_ENGS


best_n, best_ENGS = find_best_n()
print('best_n, best_ENGS:\n',best_n, best_ENGS)

# ==================三、绘图==================
# 绘图数据
data = []
for i in range(1, int(n_star)+1):
    sample = ENGS(i, L_mat, afore_EVPI)
    sample.calc_ENGS()
    # print('ENGS', sample.ENGS_value) # 打印输出检查
    data.append([i, sample.ENGS_value, sample.afore_EVPI, sample.EVSI])
# print(data)
df = pd.DataFrame(data, columns=['n', 'ENGS', 'EVPI', 'EVSI'])

x_data = list(range(1, int(n_star)+1))
y_data = df['ENGS']
y_data2 = [n*C_V for n in x_data]
y_data3 = df['EVPI']
y_data4 = df['EVSI']

# 设置字体、正负号
plt.rcParams['font.family'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 画图
plt.plot(x_data,y_data,label='ENGS',color='r',linewidth=1.0,linestyle='-')
plt.plot(x_data,y_data2,label='C',color='k',linewidth=2.0,linestyle='-')
plt.plot(x_data,y_data3,label='EVPI',color='y',linewidth=1.0,linestyle='-')
plt.plot(x_data,y_data4,label='EVSI',color='b',linewidth=1.0,linestyle='-')

plt.legend(loc="upper right")
plt.xlabel('n')
plt.ylabel('y轴')

# 标记一个点
plt.plot(best_n, best_ENGS,'o')
plt.annotate('(7.0, 101.38736175411486)', xy=(best_n, best_ENGS), xytext=(best_n+2, best_ENGS-250),
             arrowprops=dict(facecolor='black', shrink=0.05)
             )
plt.axhline(y=best_ENGS,ls=":",c="k")#添加水平直线
plt.title("抽样净益期望值曲线") #设置标题及字体

plt.show()
plt.savefig('ENGS_line.jpg')

结果

# Q_mat
[[-2250.     0.]
 [ 1500.     0.]]
# L_mat
[[2250.    0.]
 [   0. 1500.]]
# afore_EVPI 先验EVPI
824.9999999999998
# n_star 样本量的上界
41.0
# best_mx X的边缘分布
[0.03570584 0.13727513 0.24282675 0.26387907 0.19346297 0.09495295
 0.02813018 0.00376711]
# $\theta|x$ theta的后验分布
[[0.09846078 0.90153922]
 [0.17927046 0.82072954]
 [0.30403621 0.69596379]
 [0.46630025 0.53369975]
 [0.63602288 0.36397712]
 [0.77752321 0.22247679]
 [0.87483888 0.12516112]
 [0.93324167 0.06675833]]
# post_expect_loss 后验期望损失
[[ 221.5367646  1352.3088236 ]
 [ 403.35853181 1231.09431213]
 [ 684.08146558 1043.94568962]
 [1049.17557035  800.54961977]
 [1431.0514751   545.96568326]
 [1749.42721706  333.71518862]
 [1968.38748389  187.74167741]
 [2099.79375089  100.13749941]]
# 最佳样本量、ENGS
best_n, best_ENGS:
 7.0 101.38736175411486

在这里插入图片描述

小果一粒沙

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
2
评论
Python编程--利用ENGS求最佳样本容量n--Bayes-5.18

题目：某上考虑是否向一县办厂订购一种家用电器（以下简称电器）。该厂生产的电器有一等品和二等品两个等级，一等品与二等品的数量之比有1:1和2:1两种可能，其概率分别为0.45和0.55。如果买到的是一等品，与一般市场价格相比较，每只可赚10元。如果买到二等品，每只要亏15元。假如该厂允许在一批电器中抽取若干只进行检验，根据抽样结果决定是否订购该批（900只）电器。但抽样总的费用为每只20元。这时商店必须考虑多少只最合算？求上界n∗≤先验EVPI−CfCvn^* \le \frac{\text{
复制链接

扫一扫

专栏目录