一个Bin的信号强度上限的假设检验(代码)
如有错误,欢迎指正!
此篇基础在一个Bin的信号强度上限的假设检验(上)
本篇主要是其中代码的实现。
问题
一个Bin中模型在单位时间内给出S=10.5,B=100.5的结果但是不能确定S是否真实存在,而且目前模型对于B来说有1%不确定度,通过分析确定多长时间能够达到1倍信号强度上限,以及给定数据Nobs=99确定上限。
代码
以下代码均在Python
,PyROOT
下。
模型建立
需要建立最大似然函数为
L
(
n
o
b
s
;
μ
,
B
)
=
exp
[
−
(
100
−
B
)
2
2
(
B
×
1
%
)
2
]
×
(
10.5
μ
+
B
)
n
o
b
s
e
−
10.5
S
−
B
n
o
b
s
!
L(n_{obs};\mu,B)=\exp{[-\frac{(100-B)^2}{2(B\times1\%)^2}]}\times\frac{(10.5\mu+B)^{n_{obs}}e^{-10.5S-B}}{n_{obs}!}
L(nobs;μ,B)=exp[−2(B×1%)2(100−B)2]×nobs!(10.5μ+B)nobse−10.5S−B
Sstr="s[10.5]*mu[1.0,0.,100.]"
Bstr="b[100.5]*ratioBkgEff[1.,0.,3.]"
obsstr="obs[111,0,200]"
wspace = ROOT.RooWorkspace()
wspace.factory("mutime[1.0,0.,10]")
wspace.factory("Poisson::countingModel("+obsstr+",prod::Numberofcount(mutime,sum("+Sstr+","+Bstr+")))") # counting model
wspace.factory("Gaussian::bkgConstraint(gSigBkg[1,0,3],ratioBkgEff,0.01)") #1%
wspace.factory("PROD::modelWithConstraints(countingModel,bkgConstraint)") # product of terms
wspace["gSigBkg"].setConstant(True)#让bkg理论中心为1#一定要有"True"
wspace["mutime"].setConstant(True)
wspace.SetName("w")
#wspace.Print()
上述方法是最方便定义变量、函数、模型的一种方法。后续要取任一元素只需要wspcae["mu"]
即可。
假设检验模型建立
modelConfig_b = ROOT.RooStats.ModelConfig("ModelConfig_b",wspace)
modelConfig_b.SetPdf(wspace["modelWithConstraints"])
modelConfig_b.SetObservables({wspace["obs"]})
wspace["mu"].setVal(0.0)#设置这个mode特殊的mu值
modelConfig_b.SetParametersOfInterest({wspace["mu"]})
modelConfig_b.SetSnapshot({wspace["mu"]})#合并
modelConfig_b.SetGlobalObservables({wspace["gSigBkg"],wspace["mutime"]})
modelConfig_b.SetNuisanceParameters({wspace["ratioBkgEff"]})
modelConfig = ROOT.RooStats.ModelConfig("ModelConfig",wspace)
modelConfig.SetPdf(wspace["modelWithConstraints"])
modelConfig.SetObservables({wspace["obs"]})
modelConfig.SetGlobalObservables({wspace["gSigBkg"],wspace["mutime"]})
modelConfig.SetNuisanceParameters({wspace["ratioBkgEff"]})
modelConfig.SetParametersOfInterest({wspace["mu"]})
modelConfig_sb = ROOT.RooStats.ModelConfig("ModelConfig_sb",wspace)
modelConfig_sb.SetPdf(wspace["modelWithConstraints"])
modelConfig_sb.SetObservables({wspace["obs"]})
modelConfig_sb.SetGlobalObservables({wspace["gSigBkg"],wspace["mutime"]})
modelConfig_sb.SetNuisanceParameters({wspace["ratioBkgEff"]})
wspace["mu"].setVal(1.0)#设置这个mode特殊的mu值
modelConfig_sb.SetSnapshot({wspace["mu"]})#合并
modelConfig_sb.SetParametersOfInterest({wspace["mu"]})
wspace.Import(modelConfig)
wspace.Import(modelConfig_b)
wspace.Import(modelConfig_sb)
这些定义了纯本底模型和带信号的模型。其实拟合过程也跟这个差不多。
数据导入
obs=wspace["obs"]
if Nobs>-0.01:
obs.setVal(Nobs)
else:
obs.setVal(time*(1*S+B))
data=ROOT.RooDataSet('data_','data_',{obs})
data.add(obs)
#data=wspace["modelWithConstraints"].generate(wspace["obs"], 10)#产生数据集
wspace.Import(data)
计算器选择
利用AsymptoticCalculator计算器
AC=ROOT.RooStats.AsymptoticCalculator(data,modelConfig_b, modelConfig_sb)#,nominalAsimov=True
AC.SetOneSided(True)#Discovery
# muhat=AC.GetMuHat()
# ACHT=AC.GetHypoTest()
# #ACHT.SetPValueIsRightTail(True)
# CLb=ACHT.CLb();CLsb=ACHT.CLsplusb()
calc = ROOT.RooStats.HypoTestInverter(AC)
calc.SetVerbose(False)
calc.SetConfidenceLevel(1-α)
calc.UseCLs(True)
HypoTestInverter方便后续扫描参数值。
检验统计量选择
利用SimpleLikelihoodRatioTestStat统计量
slrts=ROOT.RooStats.SimpleLikelihoodRatioTestStat(modelConfig_sb.GetPdf(), modelConfig_b.GetPdf())
nullParams=ROOT.RooArgSet(modelConfig_sb.GetSnapshot())
nullParams.add(modelConfig_sb.GetNuisanceParameters())
slrts.SetNullParameters(nullParams)
altParams=ROOT.RooArgSet(modelConfig_b.GetSnapshot())
altParams.add(modelConfig_b.GetNuisanceParameters())
slrts.SetAltParameters(altParams)
calc.SetTestStatistic(slrts)
# ropl=ROOT.RooStats.RatioOfProfiledLikelihoodsTestStat(modelConfig_sb.GetPdf(), modelConfig_b.GetPdf(), modelConfig_b.GetSnapshot())#def
# ropl.SetSubtractMLE(False)
# calc.SetTestStatistic(ropl)
# min and max for scan (better to choose smaller intervals)
# poimin = wspace.var("mu").getMin()
# poimax = wspace.var("mu").getMax()
计算输出
calc.SetFixedScan(*mu区间)
r = calc.GetInterval()
upperLimit = r.UpperLimit()
ulError = r.UpperLimitEstimatedError()
print( "The computed upper limit is: ", upperLimit," +/- ",ulError)
# compute expected limit
print("Expected upper limits using b-only model : ")
print("median limit = " , r.GetExpectedUpperLimit(0) )
print("med. limit (-1 sig) " , r.GetExpectedUpperLimit(-1))
print("med. limit (+1 sig) " , r.GetExpectedUpperLimit(1))
print("med. limit (-2 sig) " , r.GetExpectedUpperLimit(-2))
print("med. limit (+2 sig) " , r.GetExpectedUpperLimit(2))
obsUL=upperLimit;obsULerr=ulError
ExpUL=r.GetExpectedUpperLimit(0)
ExpULUp1=r.GetExpectedUpperLimit(1)
ExpULUp2=r.GetExpectedUpperLimit(2)
ExpULDn1=r.GetExpectedUpperLimit(-1)
ExpULDn2=r.GetExpectedUpperLimit(-2)
绘图
用的RooStats的HypoTestInverterPlot绘图,非常方便。
if os.path.exists(savepath):
plot=ROOT.RooStats.HypoTestInverterPlot("HypoTestInverterPlot_"+title,"HypoTestInverterPlot"+title,r)
c1=ROOT.TCanvas("cfortest"+title,"cfortest"+title,800,800)
plot.Draw("CLb 2CL")
c1.Update()
c1.SaveAs(os.path.join(savepath,"HypoTestInverterPlot_"+title+".png"))
完整代码
'''
利用root本身自带函数进行分析
'''
import ROOT
import os
def binCLs(S,B,bkgConstraint,mu区间=[100,0,100],Nobs=-1,time=1.0,α=0.05,savepath='',title=''):
'''
一个bin的假设检验,默认mu=1
输入
bkgConstraint:高斯限制的sigma,如果输入5%则应该是+-5%
mu区间:[N,mumin,mumax]
Nobs:>=0表示数据,<0的时候(默认)输出s+b模型的上限结果
time:几倍的时间
α:一般为0.05
'''
# fsig=mu*ratioSigEff*fsigExpected,假设除mu外均确定
# fbkg=边带区限制*fsigExpected
Sstr="s["+str(S)+"]*mu[1.0,0.,"+str(float(mu区间[2]))+"]"
Bstr="b["+str(B)+"]*ratioBkgEff[1.,0.,3.]"
obsstr="obs["+str(S+B)+",0,"+str(30*(S+B))+"]"
wspace = ROOT.RooWorkspace()
wspace.factory("mutime["+str(time)+",0.,"+str(2*time)+"]")
wspace.factory("Poisson::countingModel("+obsstr+",prod::Numberofcount(mutime,sum("+Sstr+","+Bstr+")))") # counting model
wspace.factory("Gaussian::bkgConstraint(gSigBkg[1,0,3],ratioBkgEff,"+str(2*bkgConstraint)+")") # 10% background efficiency uncertainty
wspace.factory("PROD::modelWithConstraints(countingModel,bkgConstraint)") # product of terms
wspace["gSigBkg"].setConstant(True)#让bkg理论中心为1#一定要有"True"
wspace["mutime"].setConstant(True)
wspace.SetName("w")
#wspace.Print()
modelConfig_b = ROOT.RooStats.ModelConfig("ModelConfig_b",wspace)
modelConfig_b.SetPdf(wspace["modelWithConstraints"])
modelConfig_b.SetObservables({wspace["obs"]})
wspace["mu"].setVal(0.0)#设置这个mode特殊的mu值
modelConfig_b.SetParametersOfInterest({wspace["mu"]})
modelConfig_b.SetSnapshot({wspace["mu"]})#合并
modelConfig_b.SetGlobalObservables({wspace["gSigBkg"],wspace["mutime"]})
modelConfig_b.SetNuisanceParameters({wspace["ratioBkgEff"]})
modelConfig = ROOT.RooStats.ModelConfig("ModelConfig",wspace)
modelConfig.SetPdf(wspace["modelWithConstraints"])
modelConfig.SetObservables({wspace["obs"]})
modelConfig.SetGlobalObservables({wspace["gSigBkg"],wspace["mutime"]})
modelConfig.SetNuisanceParameters({wspace["ratioBkgEff"]})
modelConfig.SetParametersOfInterest({wspace["mu"]})
modelConfig_sb = ROOT.RooStats.ModelConfig("ModelConfig_sb",wspace)
modelConfig_sb.SetPdf(wspace["modelWithConstraints"])
modelConfig_sb.SetObservables({wspace["obs"]})
modelConfig_sb.SetGlobalObservables({wspace["gSigBkg"],wspace["mutime"]})
modelConfig_sb.SetNuisanceParameters({wspace["ratioBkgEff"]})
wspace["mu"].setVal(1.0)#设置这个mode特殊的mu值
modelConfig_sb.SetSnapshot({wspace["mu"]})#合并
modelConfig_sb.SetParametersOfInterest({wspace["mu"]})
wspace.Import(modelConfig)
wspace.Import(modelConfig_b)
wspace.Import(modelConfig_sb)
obs=wspace["obs"]
if Nobs>-0.01:
obs.setVal(Nobs)
else:
obs.setVal(time*(1*S+B))
data=ROOT.RooDataSet('data_','data_',{obs})
data.add(obs)
#data=wspace["modelWithConstraints"].generate(wspace["obs"], 10)#产生数据集
wspace.Import(data)
wspace["mutime"].setVal(time)
wspace["mutime"].setConstant(True)
AC=ROOT.RooStats.AsymptoticCalculator(data,modelConfig_b, modelConfig_sb)#,nominalAsimov=True
AC.SetOneSided(True)#Discovery
# muhat=AC.GetMuHat()
# ACHT=AC.GetHypoTest()
# #ACHT.SetPValueIsRightTail(True)
# CLb=ACHT.CLb();CLsb=ACHT.CLsplusb()
calc = ROOT.RooStats.HypoTestInverter(AC)
calc.SetVerbose(False)
calc.SetConfidenceLevel(1-α)
calc.UseCLs(True)
slrts=ROOT.RooStats.SimpleLikelihoodRatioTestStat(modelConfig_sb.GetPdf(), modelConfig_b.GetPdf())
nullParams=ROOT.RooArgSet(modelConfig_sb.GetSnapshot())
nullParams.add(modelConfig_sb.GetNuisanceParameters())
slrts.SetNullParameters(nullParams)
altParams=ROOT.RooArgSet(modelConfig_b.GetSnapshot())
altParams.add(modelConfig_b.GetNuisanceParameters())
slrts.SetAltParameters(altParams)
calc.SetTestStatistic(slrts)
# ropl=ROOT.RooStats.RatioOfProfiledLikelihoodsTestStat(modelConfig_sb.GetPdf(), modelConfig_b.GetPdf(), modelConfig_b.GetSnapshot())#def
# ropl.SetSubtractMLE(False)
# calc.SetTestStatistic(ropl)
# min and max for scan (better to choose smaller intervals)
# poimin = wspace.var("mu").getMin()
# poimax = wspace.var("mu").getMax()
calc.SetFixedScan(*mu区间)
r = calc.GetInterval()
upperLimit = r.UpperLimit()
ulError = r.UpperLimitEstimatedError()
print( "The computed upper limit is: ", upperLimit," +/- ",ulError)
# compute expected limit
print("Expected upper limits using b-only model : ")
print("median limit = " , r.GetExpectedUpperLimit(0) )
print("med. limit (-1 sig) " , r.GetExpectedUpperLimit(-1))
print("med. limit (+1 sig) " , r.GetExpectedUpperLimit(1))
print("med. limit (-2 sig) " , r.GetExpectedUpperLimit(-2))
print("med. limit (+2 sig) " , r.GetExpectedUpperLimit(2))
obsUL=upperLimit;obsULerr=ulError
ExpUL=r.GetExpectedUpperLimit(0)
ExpULUp1=r.GetExpectedUpperLimit(1)
ExpULUp2=r.GetExpectedUpperLimit(2)
ExpULDn1=r.GetExpectedUpperLimit(-1)
ExpULDn2=r.GetExpectedUpperLimit(-2)
if os.path.exists(savepath):
plot=ROOT.RooStats.HypoTestInverterPlot("HypoTestInverterPlot_"+title,"HypoTestInverterPlot"+title,r)
c1=ROOT.TCanvas("cfortest"+title,"cfortest"+title,800,800)
plot.Draw("CLb 2CL")
c1.Update()
c1.SaveAs(os.path.join(savepath,"HypoTestInverterPlot_"+title+".png"))
return obsUL,ExpUL,ExpULUp1,ExpULUp2,ExpULDn1,ExpULDn2
if __name__ == '__main__':
print()
obsUL,ExpUL,ExpULUp1,ExpULUp2,ExpULDn1,ExpULDn2=binCLs(
10.5,100.5,1e-2,mu区间=[100,0,5],Nobs=99,savepath='./',
title='_time=1',time=1.0
)
结果
结果理解
输出的结果如下:
The computed upper limit is: 1.9034098214552804 +/- 0.0
Expected upper limits using b-only model :
median limit = 2.0038483172727086
med. limit (-1 sig) 1.4184354442004783
med. limit (+1 sig) 2.8563863739302864
med. limit (-2 sig) 1.0450678586432423
med. limit (+2 sig) 3.9409640786392015
看一下结果图能够加深理解。
图的横坐标是
μ
t
e
s
t
\mu_{test}
μtest,纵坐标是p值.
由前一篇的介绍,对于每一个检验的
μ
t
e
s
t
\mu_{test}
μtest,都有一个中心卡方分布(
H
0
:
μ
=
μ
t
e
s
t
H_0:\mu=\mu_{test}
H0:μ=μtest)和非中心卡方分布(
H
1
:
μ
=
0
H_1:\mu=0
H1:μ=0).
而
C
L
s
+
b
CL_{s+b}
CLs+b和
C
L
b
CL_b
CLb是观测
t
μ
,
t
e
s
t
,
o
b
s
t_{\mu,test,obs}
tμ,test,obs分别对
H
0
H_0
H0和
H
1
H_1
H1分布得到的“截断角”的面积(概率)。对应
C
L
s
+
b
CL_{s+b}
CLs+b是蓝色的点,对应
C
L
b
CL_{b}
CLb是黑色的点。所以对每一个
μ
t
e
s
t
\mu_{test}
μtest都有对应的
C
L
s
+
b
,
C
L
s
CL_{s+b},CL_s
CLs+b,CLs.
- 可以看到在 μ t e s t = 0 \mu_{test}=0 μtest=0时候 C L s + b = C L b CL_{s+b}=CL_{b} CLs+b=CLb,意味着这两个是同一个分布。
- 图中有黑色虚线是纯本底假设下的 C L s CL_s CLs的中心值,这个获取是可以从Asimov数据得到的,包括绿色和黄色的误差带。
- 大约在
μ
=
1.9
\mu=1.9
μ=1.9的时候红色点与红色水平线(
p
v
a
l
u
e
=
0.05
p_{value}=0.05
pvalue=0.05)相交而输出的结果中
The computed upper limit is: 1.9034098214552804
也说了这个是在 N o b s = 99 N_{obs}=99 Nobs=99情况下的观测上限,而Expected upper limits
则是黑色虚线的交点。
随时间变化
修改if __name__ == '__main__':
后面的代码为
if __name__ == '__main__':
print()
# obsUL,ExpUL,ExpULUp1,ExpULUp2,ExpULDn1,ExpULDn2=binCLs(
# 10.5,100.5,1e-2,mu区间=[100,0,5],Nobs=99,savepath='./',
# title='_time=1',time=1.0
# )
import numpy
import matplotlib.pyplot as plt
Xtimes=numpy.array([1,2,3,5,8,13])
UL=numpy.zeros((len(Xtimes),7))
UL[:,0]=Xtimes[:]
for i,times in enumerate(Xtimes):
obsUL,ExpUL,ExpULUp1,ExpULUp2,ExpULDn1,ExpULDn2=binCLs(
10.5,100.5,1e-2,mu区间=[100,0,5],
title='_time=1',time=times
)
UL[i,1]=obsUL
UL[i,2]=ExpUL
UL[i,3]=ExpULUp1
UL[i,4]=ExpULUp2
UL[i,5]=ExpULDn1
UL[i,6]=ExpULDn2
plt.figure(1)
# plt.yscale('log')
plt.fill_between(UL[:,0],UL[:,4],UL[:,6],where=(UL[:,4]>UL[:,6])|(UL[:,0]>0),facecolor='yellow',label='$\pm 2 \sigma$')
plt.fill_between(UL[:,0],UL[:,3],UL[:,5],where=(UL[:,3]>UL[:,5])|(UL[:,0]>0),facecolor='greenyellow',label='$\pm 1 \sigma$')
plt.plot(UL[:,0],UL[:,2],'k:',label='exp_upperlimit')
plt.axhline(1,color='red',linestyle='dotted',label='k=1.0')
plt.legend()
plt.xlabel('time(day)')
plt.ylabel('k times')
plt.title(' UL vs time')
plt.savefig('./Hyplot2.png')
plt.close()
再次运行,可以在当前文件夹下获得./Hyplot2.png
可以看到在纯本底假设下灵敏度在大约4天达到。