题目
解:
准备好数据sample.csv
跟表格一一对应.
读取数据将两层数据分开
import pandas as pd
import numpy as np
#读取数据
data = pd.read_csv("sample.csv")
data.fillna(0, inplace=True)
data1 = data.iloc[:, [1, 2]] # 第一层的数据
data2 = data.iloc[0:10, [3, 4]] # 第二层的数据
print("______________________________________________________")
print("下标为1的表示一般职员层(h=1)")
print("下标为2的表示高管(h=2)")
print("______________________________________________________")
N1 = 390
N2 = 84
print('N1:{} \nN2:{}'.format(N1,N2))
N = N1 + N2
print("N:",N)
print("______________________________________________________")
n1 = len(data1)
n2 = len(data2)
#fh
f1 = n1 / N1
f2 = n2 / N2
print("f1:{} \nf2:{}".format(f1,f2))
print("______________________________________________________")
n = n1 + n2
W1 = N1 / N
W2 = N2 / N
print("W1:{} \nW2:{}".format(W1,W2))
print("______________________________________________________")
X1 = 5523965 # 第一层的总数 职员层
X2 = 2541660 # 第二层的总数 高管层
X = X1 + X2
print("X1:{} \nX2:{}".format(X1,X2))
print("X",X)
print("______________________________________________________")
X1_mean = X1 / N1
X2_mean = X2 / N2
X_mean = X / N
print("X1_mean:{} \nX2_mean:{}".format(X1_mean,X2_mean))
print("X_mean:",X_mean)
print("______________________________________________________")
x1 = data1.iloc[:, 0]
y1 = data1.iloc[:, 1]
x2 = data2.iloc[:, 0]
y2 = data2.iloc[:, 1]
x1_mean = data1.iloc[:, 0].mean() # 普通职员层
y1_mean = data1.iloc[:, 1].mean()
x2_mean = data2.iloc[:, 0].mean() # 高管层
y2_mean = data2.iloc[:, 1].mean()
print("x1_mean:{} \ny1_mean:{} \nx2_mean:{} \ny2_mean:{}".format(x1_mean,y1_mean,x2_mean,y2_mean))
print("______________________________________________________")
# 方差
var_y1 = np.var(data1.iloc[:, 1],ddof = 1)
var_y2 = np.var(data2.iloc[:, 1],ddof = 1)
var_x1 = np.var(data1.iloc[:, 0],ddof = 1)
var_x2 = np.var(data2.iloc[:, 0],ddof = 1)
# 协方差
cov_xy1 = np.cov(data1.iloc[:, 0], data1.iloc[:, 1])[0][1]
cov_xy2 = np.cov(data2.iloc[:, 0], data2.iloc[:, 1])[0][1]
print("var_y1:{} \nvar_y1:{} \nvar_x1:{} \nvar_x2:{}".format(var_y1,var_y1,var_x1,var_x2))
print("cov_xy1:{} \ncov_xy2:{}".format(cov_xy1,cov_xy2))
print("______________________________________________________")
#Rh
R1 = y1_mean / x1_mean
R2 = y2_mean / x2_mean
print("R1:{} \nR2:{}".format(R1,R2))
print("______________________________________________________")
r1_squre = np.corrcoef(data1.iloc[:, 0], data1.iloc[:, 1])[0][1] ** 2
r2_squre = np.corrcoef(data2.iloc[:, 0], data2.iloc[:, 1])[0][1] ** 2
print("r1_squre:{} \nr2_squre:{}".format(r1_squre,r2_squre))
print("______________________________________________________")
分别比估计
def rs_result():
Yrs = R1 * X1 + R2 * X2 # 分别比估计的Y的总值
# 分别比 均值估计的方差
Ymrs_var = ((W1 ** 2) * (1 - f1)) / n1 * (var_y1 + (R1 ** 2) * var_x1 - 2 * R1 * cov_xy1) + \
((W2 ** 2) * (1 - f2)) / n2 * (var_y2 + (R2 ** 2) * var_x2 - 2 * R2 * cov_xy2)
Yrs_sd = N * np.sqrt(Ymrs_var) # 分别比总值标准差的估计
RS = {"Y总值的估计": Yrs, "Y均值方差的估计": Ymrs_var, "Y标准差的估计": Yrs_sd} # 分别比估计结果
return RS
rs_result() # 调用求分别比估计的函数
联合比估计
def rc_result():
yst_mean = W1 * y1_mean + W2 * y2_mean
xst_mean = W1 * x1_mean + W2 * x2_mean
Rc = yst_mean / xst_mean
Yrc = yst_mean / xst_mean * X
# 均值估计的方差
Ymrc_var = ((W1 ** 2) * (1 - f1)) / n1 * (var_y1 + (Rc ** 2) * var_x1 - 2 * Rc * cov_xy1) + \
((W2 ** 2) * (1 - f2)) / n2 * (var_y2 + (Rc ** 2) * var_x2 - 2 * Rc * cov_xy2)
Yrc_sd = N * np.sqrt(Ymrc_var) # 总值估计的方差
RC = {"Y总值的估计": Yrc, "Y均值方差的估计": Ymrc_var, "Y标准差的估计": Yrc_sd} # 联合比估计结果
return RC
rc_result()
分别回归估计
def lrs_result():
b1 = np.sum((y1 - y1_mean) * (x1 - x1_mean)) / np.sum((x1 - x1_mean) ** 2)
b2 = np.sum((y2 - y2_mean) * (x2 - x2_mean)) / np.sum((x2 - x2_mean) ** 2)
Ylrs = N1 * (y1_mean + b1 * (X1_mean - x1_mean)) + N2 * (y2_mean + b2 * (X2_mean - x2_mean))
Ymlrs_var = (W1 ** 2) * (1 - f1) / (n1 * (n1 - 2)) * (n1 - 1) * var_y1 * (1 - r1_squre) + \
(W2 ** 2) * (1 - f2) / (n2 * (n2 - 2)) * (n2 - 1) * var_y2 * (1 - r2_squre)
Ylrs_sd = N * np.sqrt(Ymlrs_var) # 总值估计的方差
LRS = {"Y总值的估计": Ylrs, "Y均值方差的估计": Ymlrs_var, "Y标准差的估计": Ylrs_sd} # 分别回归估计结果
return LRS
lrs_result()
联合回归估计
def lrc_result():
yst_mean = W1 * y1_mean + W2 * y2_mean
xst_mean = W1 * x1_mean + W2 * x2_mean
bc = ((W1 ** 2) * (1 - f1) * cov_xy1 / n1 + (W2 ** 2) * (1 - f2) * cov_xy2 / n2) / \
((W1 ** 2) * (1 - f1) * var_x1 / n1 + (W2 ** 2) * (1 - f2) * var_x2 / n2)
# print("bc:",bc)
Ylrc = N * (yst_mean + bc * (X_mean - xst_mean))
Ymlrc_var = ((W1 ** 2) * (1 - f1)) / n1 * (var_y1 + (bc ** 2) * var_x1 - 2 * bc * cov_xy1) + \
((W2 ** 2) * (1 - f2)) / n2 * (var_y2 + (bc ** 2) * var_x2 - 2 * bc * cov_xy2)
Ylrc_sd = N * np.sqrt(Ymlrc_var) # 总值估计的方差
LRC = {"Y总值的估计": Ylrc, "Y均值方差的估计": Ymlrc_var, "Y标准差的估计": Ylrc_sd} # 联合比估计结果
return LRC
lrc_result()
差估计(β或βh设定为常数1)
def d_result():
yst_mean = W1 * y1_mean + W2 * y2_mean
xst_mean = W1 * x1_mean + W2 * x2_mean
Yd = N * (yst_mean + (X_mean - xst_mean))
Ymd_var = ((W1 ** 2) * (1 - f1) / n1) * (var_y1 + var_x1 - 2 * cov_xy1) + \
((W2 ** 2) * (1 - f2) / n2) * (var_y2 + var_x2 - 2 * cov_xy2)
Yd_sd = N * np.sqrt(Ymd_var)
D = {"Y总值的估计": Yd, "Y均值方差的估计": Ymd_var, "Y标准差的估计": Yd_sd} # 联合比估计结果
return D
d_result()