说明
此笔记 是cs229-notes2讲义中的第二部分学习内容,与B站上的“05 生成学习方法”视频对应,主要是对讲义中一些推理的补充以及一些重点内容的记录,另外还会附加该部分相对应的习题解答和算法的C++实现。
课程相关视频、讲义等资料可参照《斯坦福CS229(吴恩达授)学习笔记(1)》 获取。
正文
Problem Set #1: Supervised learning
4. Naive Bayes
原文题目如下:
解答:
(a)
只考虑一个样本点时
p
(
x
,
y
)
=
p
(
x
∣
y
)
p
(
y
)
=
p
(
x
∣
y
=
0
)
p
(
y
=
0
)
+
p
(
x
∣
y
=
1
)
p
(
y
=
1
)
p(x,y)=p(x|y)p(y)=p(x|y=0)p(y=0)+p(x|y=1)p(y=1)
p(x,y)=p(x∣y)p(y)=p(x∣y=0)p(y=0)+p(x∣y=1)p(y=1)
考虑
m
m
m个样本点时(每个i只能有一项存在)
ℓ
(
φ
)
=
l
o
g
∏
i
=
1
m
p
(
x
(
i
)
,
y
(
i
)
;
φ
)
=
∑
i
=
1
m
l
o
g
p
(
x
(
i
)
,
y
(
i
)
;
φ
)
=
∑
i
=
1
m
l
o
g
(
p
(
x
(
i
)
∣
y
(
i
)
=
0
)
p
(
y
(
i
)
=
0
)
+
p
(
x
(
i
)
∣
y
(
i
)
=
1
)
p
(
y
(
i
)
=
1
)
)
=
∑
i
=
1
m
l
o
g
(
(
1
−
ϕ
y
(
i
)
=
0
)
∏
j
=
1
n
(
ϕ
j
∣
y
(
i
)
=
0
)
x
j
(
i
)
(
1
−
ϕ
j
∣
y
(
i
)
=
0
)
1
−
x
j
(
i
)
+
ϕ
y
(
i
)
=
1
∏
j
=
1
n
(
ϕ
j
∣
y
(
i
)
=
1
)
x
j
(
i
)
(
1
−
ϕ
j
∣
y
(
i
)
=
1
)
1
−
x
j
(
i
)
)
\begin{aligned} \ell(\varphi)=&log\prod_{i=1}^mp(x^{(i)},y^{(i)};\varphi)\\ =&\sum_{i=1}^mlogp(x^{(i)},y^{(i)};\varphi)\\ =&\sum_{i=1}^mlog(p(x^{(i)}|y^{(i)}=0)p(y^{(i)}=0)+p(x^{(i)}|y^{(i)}=1)p(y^{(i)}=1))\\ =&\sum_{i=1}^mlog((1-\phi_{y^{(i)}=0})\prod_{j=1}^n(\phi_{j|y^{(i)}=0})^{x_j^{(i)}}(1-\phi_{j|y^{(i)}=0})^{1-x_j^{(i)}}+\\ &\phi_{y^{(i)}=1}\prod_{j=1}^n(\phi_{j|y^{(i)}=1})^{x_j^{(i)}}(1-\phi_{j|y^{(i)}=1})^{1-x_j^{(i)}}) \end{aligned}
ℓ(φ)====logi=1∏mp(x(i),y(i);φ)i=1∑mlogp(x(i),y(i);φ)i=1∑mlog(p(x(i)∣y(i)=0)p(y(i)=0)+p(x(i)∣y(i)=1)p(y(i)=1))i=1∑mlog((1−ϕy(i)=0)j=1∏n(ϕj∣y(i)=0)xj(i)(1−ϕj∣y(i)=0)1−xj(i)+ϕy(i)=1j=1∏n(ϕj∣y(i)=1)xj(i)(1−ϕj∣y(i)=1)1−xj(i))
(b)每个j都是唯一的
∂
ℓ
(
φ
)
∂
ϕ
j
∣
y
=
0
=
∂
∂
ϕ
j
∣
y
=
0
∑
i
=
1
m
l
o
g
(
1
−
ϕ
y
(
i
)
=
0
)
∏
j
=
1
n
(
ϕ
j
∣
y
(
i
)
=
0
)
x
j
(
i
)
(
1
−
ϕ
j
∣
y
(
i
)
=
0
)
1
−
x
j
(
i
)
=
∂
∂
ϕ
j
∣
y
=
0
∑
i
=
1
m
(
l
o
g
(
1
−
ϕ
y
(
i
)
=
0
)
+
(
∑
j
=
1
n
x
j
(
i
)
l
o
g
(
ϕ
j
∣
y
(
i
)
=
0
)
+
(
1
−
x
j
(
i
)
)
l
o
g
(
1
−
ϕ
j
∣
y
(
i
)
=
0
)
)
)
=
∑
i
=
1
m
x
j
(
i
)
1
ϕ
j
∣
y
(
i
)
=
0
−
(
1
−
x
j
(
i
)
)
1
1
−
ϕ
j
∣
y
(
i
)
=
0
\begin{aligned} \frac{\partial\ell(\varphi)}{\partial\phi_{j|y=0}}=&\frac{\partial}{\partial\phi_{j|y=0}}\sum_{i=1}^mlog(1-\phi_{y^{(i)}=0})\prod_{j=1}^n(\phi_{j|y^{(i)}=0})^{x_j^{(i)}}(1-\phi_{j|y^{(i)}=0})^{1-x_j^{(i)}}\\ =&\frac{\partial}{\partial\phi_{j|y=0}}\sum_{i=1}^m(log(1-\phi_{y^{(i)}=0})+\\ &(\sum_{j=1}^n{x_j^{(i)}}log(\phi_{j|y^{(i)}=0})+(1-x_j^{(i)})log(1-\phi_{j|y^{(i)}=0})))\\ =&\sum_{i=1}^mx_j^{(i)}\frac{1}{\phi_{j|y^{(i)}=0}}-(1-x_j^{(i)})\frac{1}{1-\phi_{j|y^{(i)}=0}} \end{aligned}
∂ϕj∣y=0∂ℓ(φ)===∂ϕj∣y=0∂i=1∑mlog(1−ϕy(i)=0)j=1∏n(ϕj∣y(i)=0)xj(i)(1−ϕj∣y(i)=0)1−xj(i)∂ϕj∣y=0∂i=1∑m(log(1−ϕy(i)=0)+(j=1∑nxj(i)log(ϕj∣y(i)=0)+(1−xj(i))log(1−ϕj∣y(i)=0)))i=1∑mxj(i)ϕj∣y(i)=01−(1−xj(i))1−ϕj∣y(i)=01
令
∂
ℓ
(
φ
)
∂
ϕ
j
∣
y
=
0
=
0
\frac{\partial\ell(\varphi)}{\partial\phi_{j|y=0}}=0
∂ϕj∣y=0∂ℓ(φ)=0可得
∑
i
=
1
m
x
j
(
i
)
1
ϕ
j
∣
y
(
i
)
=
0
=
∑
i
=
1
m
(
1
−
x
j
(
i
)
)
1
1
−
ϕ
j
∣
y
(
i
)
=
0
(
1
−
ϕ
j
∣
y
(
i
)
=
0
)
∑
i
=
1
m
x
j
(
i
)
=
(
ϕ
j
∣
y
(
i
)
=
0
)
∑
i
=
1
m
(
1
−
x
j
(
i
)
)
∑
i
=
1
m
x
j
(
i
)
=
∑
i
=
1
m
ϕ
j
∣
y
(
i
)
=
0
\begin{aligned} \sum_{i=1}^mx_j^{(i)}\frac{1}{\phi_{j|y^{(i)}=0}}=&\sum_{i=1}^m(1-x_j^{(i)})\frac{1}{1-\phi_{j|y^{(i)}=0}}\\ (1-\phi_{j|y^{(i)}=0})\sum_{i=1}^mx_j^{(i)}=&(\phi_{j|y^{(i)}=0})\sum_{i=1}^m(1-x_j^{(i)})\\ \sum_{i=1}^mx_j^{(i)}=&\sum_{i=1}^m\phi_{j|y^{(i)}=0}\\ \end{aligned}
i=1∑mxj(i)ϕj∣y(i)=01=(1−ϕj∣y(i)=0)i=1∑mxj(i)=i=1∑mxj(i)=i=1∑m(1−xj(i))1−ϕj∣y(i)=01(ϕj∣y(i)=0)i=1∑m(1−xj(i))i=1∑mϕj∣y(i)=0
因为仅考虑
y
(
i
)
=
0
y^{(i)}=0
y(i)=0的情况,且
x
j
(
i
)
=
{
0
,
1
}
x_j^{(i)}=\{0,1\}
xj(i)={0,1}所以
∑
i
=
1
m
x
j
(
i
)
=
∑
i
=
1
m
ϕ
j
∣
y
(
i
)
=
0
∑
i
=
1
m
I
{
x
j
(
i
)
=
1
∧
y
(
i
)
=
0
}
=
ϕ
j
∣
y
(
i
)
=
0
∑
i
=
1
m
I
{
y
(
i
)
=
0
}
\begin{aligned} \sum_{i=1}^mx_j^{(i)}=&\sum_{i=1}^m\phi_{j|y^{(i)}=0}\\ \sum_{i=1}^mI\{x_j^{(i)}=1\wedge y^{(i)}=0\}=&\phi_{j|y^{(i)}=0}\sum_{i=1}^mI\{y^{(i)}=0\}\\ \end{aligned}
i=1∑mxj(i)=i=1∑mI{xj(i)=1∧y(i)=0}=i=1∑mϕj∣y(i)=0ϕj∣y(i)=0i=1∑mI{y(i)=0}
所以
ϕ
j
∣
y
(
i
)
=
0
=
∑
i
=
1
m
I
{
x
j
(
i
)
=
1
∧
y
(
i
)
=
0
}
∑
i
=
1
m
I
{
y
(
i
)
=
0
}
\phi_{j|y^{(i)}=0}=\frac{\sum_{i=1}^mI\{x_j^{(i)}=1\wedge y^{(i)}=0\}}{\sum_{i=1}^mI\{y^{(i)}=0\}}
ϕj∣y(i)=0=∑i=1mI{y(i)=0}∑i=1mI{xj(i)=1∧y(i)=0}
而
ϕ
j
∣
y
(
i
)
=
1
\phi_{j|y^{(i)}=1}
ϕj∣y(i)=1同理可得
ϕ
j
∣
y
(
i
)
=
1
=
∑
i
=
1
m
I
{
x
j
(
i
)
=
1
∧
y
(
i
)
=
1
}
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
\phi_{j|y^{(i)}=1}=\frac{\sum_{i=1}^mI\{x_j^{(i)}=1\wedge y^{(i)}=1\}}{\sum_{i=1}^mI\{y^{(i)}=1\}}
ϕj∣y(i)=1=∑i=1mI{y(i)=1}∑i=1mI{xj(i)=1∧y(i)=1}
而
∂
ℓ
(
φ
)
∂
ϕ
y
=
∂
∂
ϕ
y
∑
i
=
1
m
l
o
g
(
(
1
−
ϕ
y
(
i
)
=
0
)
∏
j
=
1
n
(
ϕ
j
∣
y
(
i
)
=
0
)
x
j
(
i
)
(
1
−
ϕ
j
∣
y
(
i
)
=
0
)
1
−
x
j
(
i
)
+
ϕ
y
(
i
)
=
1
∏
j
=
1
n
(
ϕ
j
∣
y
(
i
)
=
1
)
x
j
(
i
)
(
1
−
ϕ
j
∣
y
(
i
)
=
1
)
1
−
x
j
(
i
)
)
=
∂
∂
ϕ
y
(
i
)
=
0
∑
i
l
o
g
(
1
−
ϕ
y
(
i
)
=
0
)
∏
j
=
1
n
(
ϕ
j
∣
y
(
i
)
=
0
)
x
j
(
i
)
(
1
−
ϕ
j
∣
y
(
i
)
=
0
)
1
−
x
j
(
i
)
+
∂
∂
ϕ
y
(
i
)
=
1
∑
i
l
o
g
ϕ
y
(
i
)
=
1
∏
j
=
1
n
(
ϕ
j
∣
y
(
i
)
=
1
)
x
j
(
i
)
(
1
−
ϕ
j
∣
y
(
i
)
=
1
)
1
−
x
j
(
i
)
=
∂
∂
ϕ
y
(
i
)
=
0
∑
i
l
o
g
(
1
−
ϕ
y
(
i
)
=
0
)
+
∂
∂
ϕ
y
(
i
)
=
1
∑
i
l
o
g
ϕ
y
(
i
)
=
1
=
∑
i
−
1
1
−
ϕ
y
(
i
)
=
0
+
∑
i
1
ϕ
y
(
i
)
=
1
\begin{aligned} \frac{\partial\ell(\varphi)}{\partial\phi_y}=&\frac{\partial}{\partial\phi_y}\sum_{i=1}^mlog((1-\phi_{y^{(i)}=0})\prod_{j=1}^n(\phi_{j|y^{(i)}=0})^{x_j^{(i)}}(1-\phi_{j|y^{(i)}=0})^{1-x_j^{(i)}}+\phi_{y^{(i)}=1}\prod_{j=1}^n(\phi_{j|y^{(i)}=1})^{x_j^{(i)}}(1-\phi_{j|y^{(i)}=1})^{1-x_j^{(i)}})\\ =&\frac{\partial}{\partial\phi_{y^{(i)}=0}}\sum_ilog(1-\phi_{y^{(i)}=0})\prod_{j=1}^n(\phi_{j|y^{(i)}=0})^{x_j^{(i)}}(1-\phi_{j|y^{(i)}=0})^{1-x_j^{(i)}}+\\ &\frac{\partial}{\partial\phi_{y^{(i)}=1}}\sum_ilog\phi_{y^{(i)}=1}\prod_{j=1}^n(\phi_{j|y^{(i)}=1})^{x_j^{(i)}}(1-\phi_{j|y^{(i)}=1})^{1-x_j^{(i)}}\\ =&\frac{\partial}{\partial\phi_{y^{(i)}=0}}\sum_ilog(1-\phi_{y^{(i)}=0})+\frac{\partial}{\partial\phi_{y^{(i)}=1}}\sum_ilog\phi_{y^{(i)}=1}\\ =&\sum_i\frac{-1}{1-\phi_{y^{(i)}=0}}+\sum_i\frac{1}{\phi_{y^{(i)}=1}} \end{aligned}
∂ϕy∂ℓ(φ)====∂ϕy∂i=1∑mlog((1−ϕy(i)=0)j=1∏n(ϕj∣y(i)=0)xj(i)(1−ϕj∣y(i)=0)1−xj(i)+ϕy(i)=1j=1∏n(ϕj∣y(i)=1)xj(i)(1−ϕj∣y(i)=1)1−xj(i))∂ϕy(i)=0∂i∑log(1−ϕy(i)=0)j=1∏n(ϕj∣y(i)=0)xj(i)(1−ϕj∣y(i)=0)1−xj(i)+∂ϕy(i)=1∂i∑logϕy(i)=1j=1∏n(ϕj∣y(i)=1)xj(i)(1−ϕj∣y(i)=1)1−xj(i)∂ϕy(i)=0∂i∑log(1−ϕy(i)=0)+∂ϕy(i)=1∂i∑logϕy(i)=1i∑1−ϕy(i)=0−1+i∑ϕy(i)=11
令
∂
ℓ
(
φ
)
∂
ϕ
y
=
0
\frac{\partial\ell(\varphi)}{\partial\phi_y}=0
∂ϕy∂ℓ(φ)=0得
∑
i
1
1
−
ϕ
y
(
i
)
=
0
=
∑
i
1
ϕ
y
(
i
)
=
1
1
1
−
ϕ
y
(
i
)
=
0
∑
i
=
1
m
I
{
y
(
i
)
=
0
}
=
1
ϕ
y
(
i
)
=
1
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
ϕ
y
(
i
)
=
1
∑
i
=
1
m
I
{
y
(
i
)
=
0
}
=
(
1
−
ϕ
y
(
i
)
=
0
)
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
ϕ
y
(
i
)
=
1
∑
i
=
1
m
I
{
y
(
i
)
=
0
}
+
ϕ
y
(
i
)
=
0
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
=
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
\begin{aligned} \sum_i\frac{1}{1-\phi_{y^{(i)}=0}}=&\sum_i\frac{1}{\phi_{y^{(i)}=1}}\\ \frac{1}{1-\phi_{y^{(i)}=0}}\sum_{i=1}^mI\{y^{(i)}=0\}=&\frac{1}{\phi_{y^{(i)}=1}}\sum_{i=1}^mI\{y^{(i)}=1\}\\ \phi_{y^{(i)}=1}\sum_{i=1}^mI\{y^{(i)}=0\}=&(1-\phi_{y^{(i)}=0})\sum_{i=1}^mI\{y^{(i)}=1\}\\ \phi_{y^{(i)}=1}\sum_{i=1}^mI\{y^{(i)}=0\}+\phi_{y^{(i)}=0}\sum_{i=1}^mI\{y^{(i)}=1\}=&\sum_{i=1}^mI\{y^{(i)}=1\}\\ \end{aligned}
i∑1−ϕy(i)=01=1−ϕy(i)=01i=1∑mI{y(i)=0}=ϕy(i)=1i=1∑mI{y(i)=0}=ϕy(i)=1i=1∑mI{y(i)=0}+ϕy(i)=0i=1∑mI{y(i)=1}=i∑ϕy(i)=11ϕy(i)=11i=1∑mI{y(i)=1}(1−ϕy(i)=0)i=1∑mI{y(i)=1}i=1∑mI{y(i)=1}
因为
ϕ
y
(
i
)
=
1
\phi_{y^{(i)}=1}
ϕy(i)=1和
ϕ
y
(
i
)
=
0
\phi_{y^{(i)}=0}
ϕy(i)=0就是同一个
ϕ
y
\phi_y
ϕy,所以上式
ϕ
y
(
i
)
=
1
∑
i
=
1
m
I
{
y
(
i
)
=
0
}
+
ϕ
y
(
i
)
=
0
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
=
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
ϕ
y
∑
i
=
1
m
I
{
y
(
i
)
=
0
}
+
ϕ
y
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
=
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
ϕ
y
(
∑
i
=
1
m
I
{
y
(
i
)
=
0
}
+
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
)
=
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
ϕ
y
m
=
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
ϕ
y
=
∑
i
=
1
m
I
{
y
(
i
)
=
1
}
m
\begin{aligned} \phi_{y^{(i)}=1}\sum_{i=1}^mI\{y^{(i)}=0\}+\phi_{y^{(i)}=0}\sum_{i=1}^mI\{y^{(i)}=1\}=&\sum_{i=1}^mI\{y^{(i)}=1\}\\ \phi_y\sum_{i=1}^mI\{y^{(i)}=0\}+\phi_y\sum_{i=1}^mI\{y^{(i)}=1\}=&\sum_{i=1}^mI\{y^{(i)}=1\}\\ \phi_y(\sum_{i=1}^mI\{y^{(i)}=0\}+\sum_{i=1}^mI\{y^{(i)}=1\})=&\sum_{i=1}^mI\{y^{(i)}=1\}\\ \phi_ym=&\sum_{i=1}^mI\{y^{(i)}=1\}\\ \phi_y=&\frac{\sum_{i=1}^mI\{y^{(i)}=1\}}{m}\\ \end{aligned}
ϕy(i)=1i=1∑mI{y(i)=0}+ϕy(i)=0i=1∑mI{y(i)=1}=ϕyi=1∑mI{y(i)=0}+ϕyi=1∑mI{y(i)=1}=ϕy(i=1∑mI{y(i)=0}+i=1∑mI{y(i)=1})=ϕym=ϕy=i=1∑mI{y(i)=1}i=1∑mI{y(i)=1}i=1∑mI{y(i)=1}i=1∑mI{y(i)=1}m∑i=1mI{y(i)=1}
(c)
比如在二维平面上,
θ
0
+
θ
1
x
1
+
θ
2
x
2
≥
0
\theta_0+\theta_1x_1+\theta_2x_2 \geq 0
θ0+θ1x1+θ2x2≥0
根据贝叶斯公式
p
(
y
=
1
∣
x
)
=
p
(
x
∣
y
=
1
)
p
(
y
=
1
)
p
(
x
)
p
(
y
=
0
∣
x
)
=
p
(
x
∣
y
=
0
)
p
(
y
=
0
)
p
(
x
)
=
1
−
p
(
y
=
1
∣
x
)
\begin{aligned} p(y=1|x)=&\frac{p(x|y=1)p(y=1)}{p(x)}\\ p(y=0|x)=&\frac{p(x|y=0)p(y=0)}{p(x)}=1-p(y=1|x) \end{aligned}
p(y=1∣x)=p(y=0∣x)=p(x)p(x∣y=1)p(y=1)p(x)p(x∣y=0)p(y=0)=1−p(y=1∣x)
所以比较分子即可,即证明
p
(
y
=
1
∣
x
)
p
(
y
=
0
∣
x
)
≥
1
\frac{p(y=1|x)}{p(y=0|x)}\geq1
p(y=0∣x)p(y=1∣x)≥1
其中
p
(
x
∣
y
=
1
)
p
(
y
=
1
)
=
ϕ
y
=
1
∏
j
=
1
n
(
ϕ
j
∣
y
=
1
)
x
j
(
1
−
ϕ
j
∣
y
=
1
)
1
−
x
j
=
e
x
p
{
l
o
g
ϕ
y
=
1
∏
j
=
1
n
(
ϕ
j
∣
y
=
1
)
x
j
(
1
−
ϕ
j
∣
y
=
1
)
1
−
x
j
}
=
e
x
p
{
l
o
g
ϕ
y
+
∑
j
=
1
n
l
o
g
(
ϕ
j
∣
y
=
1
)
x
j
(
1
−
ϕ
j
∣
y
=
1
)
1
−
x
j
}
=
e
x
p
{
l
o
g
ϕ
y
+
∑
j
=
1
n
x
j
l
o
g
(
ϕ
j
∣
y
=
1
)
+
(
1
−
x
j
)
l
o
g
(
1
−
ϕ
j
∣
y
=
1
)
}
\begin{aligned} p(x|y=1)p(y=1)=&\phi_{y=1}\prod_{j=1}^n(\phi_{j|y=1})^{x_j}(1-\phi_{j|y=1})^{1-x_j}\\ =&exp\{log\phi_{y=1}\prod_{j=1}^n(\phi_{j|y=1})^{x_j}(1-\phi_{j|y=1})^{1-x_j}\}\\ =&exp\{log\phi_y+\sum_{j=1}^nlog(\phi_{j|y=1})^{x_j}(1-\phi_{j|y=1})^{1-x_j}\}\\ =&exp\{log\phi_y+\sum_{j=1}^nx_jlog(\phi_{j|y=1})+(1-x_j)log(1-\phi_{j|y=1})\}\\ \end{aligned}
p(x∣y=1)p(y=1)====ϕy=1j=1∏n(ϕj∣y=1)xj(1−ϕj∣y=1)1−xjexp{logϕy=1j=1∏n(ϕj∣y=1)xj(1−ϕj∣y=1)1−xj}exp{logϕy+j=1∑nlog(ϕj∣y=1)xj(1−ϕj∣y=1)1−xj}exp{logϕy+j=1∑nxjlog(ϕj∣y=1)+(1−xj)log(1−ϕj∣y=1)}
所以
p
(
x
∣
y
=
0
)
p
(
y
=
0
)
=
e
x
p
{
l
o
g
(
1
−
ϕ
y
)
+
∑
j
=
1
n
x
j
l
o
g
(
ϕ
j
∣
y
=
0
)
+
(
1
−
x
j
)
l
o
g
(
1
−
ϕ
j
∣
y
=
0
)
}
p(x|y=0)p(y=0)=exp\{log(1-\phi_y)+\sum_{j=1}^nx_jlog(\phi_{j|y=0})+(1-x_j)log(1-\phi_{j|y=0})\}
p(x∣y=0)p(y=0)=exp{log(1−ϕy)+j=1∑nxjlog(ϕj∣y=0)+(1−xj)log(1−ϕj∣y=0)}
那么
p
(
x
∣
y
=
1
)
p
(
y
=
1
)
p
(
x
∣
y
=
0
)
p
(
y
=
0
)
=
e
x
p
{
l
o
g
ϕ
y
+
∑
j
=
1
n
x
j
l
o
g
(
ϕ
j
∣
y
=
1
)
+
(
1
−
x
j
)
l
o
g
(
1
−
ϕ
j
∣
y
=
1
)
}
e
x
p
{
l
o
g
(
1
−
ϕ
y
)
+
∑
j
=
1
n
x
j
l
o
g
(
ϕ
j
∣
y
=
0
)
+
(
1
−
x
j
)
l
o
g
(
1
−
ϕ
j
∣
y
=
0
)
}
=
e
x
p
{
l
o
g
ϕ
y
+
∑
j
=
1
n
(
x
j
l
o
g
(
ϕ
j
∣
y
=
1
)
+
(
1
−
x
j
)
l
o
g
(
1
−
ϕ
j
∣
y
=
1
)
)
−
(
l
o
g
(
1
−
ϕ
y
)
+
∑
j
=
1
n
x
j
l
o
g
(
ϕ
j
∣
y
=
0
)
+
(
1
−
x
j
)
l
o
g
(
1
−
ϕ
j
∣
y
=
0
)
)
}
=
e
x
p
{
(
l
o
g
ϕ
y
−
l
o
g
(
1
−
ϕ
y
)
)
+
∑
j
=
1
n
x
j
(
l
o
g
(
ϕ
j
∣
y
=
1
)
−
l
o
g
(
ϕ
j
∣
y
=
0
)
)
+
(
1
−
x
j
)
(
l
o
g
(
1
−
ϕ
j
∣
y
=
1
)
)
−
l
o
g
(
1
−
ϕ
j
∣
y
=
0
)
)
}
=
e
x
p
{
l
o
g
ϕ
y
1
−
ϕ
y
+
∑
j
=
1
n
x
j
l
o
g
ϕ
j
∣
y
=
1
ϕ
j
∣
y
=
0
+
(
1
−
x
j
)
l
o
g
1
−
ϕ
j
∣
y
=
1
1
−
ϕ
j
∣
y
=
0
}
=
e
x
p
{
l
o
g
ϕ
y
1
−
ϕ
y
+
∑
j
=
1
n
l
o
g
1
−
ϕ
j
∣
y
=
1
1
−
ϕ
j
∣
y
=
0
+
∑
j
=
1
n
x
j
l
o
g
ϕ
j
∣
y
=
1
ϕ
j
∣
y
=
0
−
x
j
l
o
g
1
−
ϕ
j
∣
y
=
1
1
−
ϕ
j
∣
y
=
0
}
=
e
x
p
{
l
o
g
ϕ
y
1
−
ϕ
y
+
∑
j
=
1
n
l
o
g
1
−
ϕ
j
∣
y
=
1
1
−
ϕ
j
∣
y
=
0
+
∑
j
=
1
n
x
j
(
l
o
g
ϕ
j
∣
y
=
1
ϕ
j
∣
y
=
0
−
l
o
g
1
−
ϕ
j
∣
y
=
1
1
−
ϕ
j
∣
y
=
0
)
}
=
e
x
p
{
l
o
g
ϕ
y
1
−
ϕ
y
+
∑
j
=
1
n
l
o
g
1
−
ϕ
j
∣
y
=
1
1
−
ϕ
j
∣
y
=
0
+
∑
j
=
1
n
x
j
(
l
o
g
ϕ
j
∣
y
=
1
ϕ
j
∣
y
=
0
.
1
−
ϕ
j
∣
y
=
0
1
−
ϕ
j
∣
y
=
1
)
}
=
e
x
p
{
l
o
g
ϕ
y
1
−
ϕ
y
+
∑
j
=
1
n
l
o
g
1
−
ϕ
j
∣
y
=
1
1
−
ϕ
j
∣
y
=
0
+
∑
j
=
1
n
x
j
(
l
o
g
(
ϕ
j
∣
y
=
1
1
−
ϕ
j
∣
y
=
1
)
(
1
−
ϕ
j
∣
y
=
0
ϕ
j
∣
y
=
0
)
)
}
\begin{aligned} \frac{p(x|y=1)p(y=1)}{p(x|y=0)p(y=0)}=&\frac{exp\{log\phi_y+\sum_{j=1}^nx_jlog(\phi_{j|y=1})+(1-x_j)log(1-\phi_{j|y=1})\}}{exp\{log(1-\phi_y)+\sum_{j=1}^nx_jlog(\phi_{j|y=0})+(1-x_j)log(1-\phi_{j|y=0})\}}\\ =&exp\{log\phi_y+\sum_{j=1}^n(x_jlog(\phi_{j|y=1})+(1-x_j)log(1-\phi_{j|y=1}))-\\ &(log(1-\phi_y)+\sum_{j=1}^nx_jlog(\phi_{j|y=0})+(1-x_j)log(1-\phi_{j|y=0}))\}\\ =&exp\{(log\phi_y-log(1-\phi_y))+\sum_{j=1}^nx_j(log(\phi_{j|y=1})-log(\phi_{j|y=0}))+\\ &(1-x_j)(log(1-\phi_{j|y=1}))-log(1-\phi_{j|y=0}))\}\\ =&exp\{log\frac{\phi_y}{1-\phi_y}+\sum_{j=1}^nx_jlog\frac{\phi_{j|y=1}}{\phi_{j|y=0}}+(1-x_j)log\frac{1-\phi_{j|y=1}}{1-\phi_{j|y=0}}\}\\ =&exp\{log\frac{\phi_y}{1-\phi_y}+\sum_{j=1}^nlog\frac{1-\phi_{j|y=1}}{1-\phi_{j|y=0}}+\sum_{j=1}^nx_jlog\frac{\phi_{j|y=1}}{\phi_{j|y=0}}-x_jlog\frac{1-\phi_{j|y=1}}{1-\phi_{j|y=0}}\}\\ =&exp\{log\frac{\phi_y}{1-\phi_y}+\sum_{j=1}^nlog\frac{1-\phi_{j|y=1}}{1-\phi_{j|y=0}}+\sum_{j=1}^nx_j(log\frac{\phi_{j|y=1}}{\phi_{j|y=0}}-log\frac{1-\phi_{j|y=1}}{1-\phi_{j|y=0}})\}\\ =&exp\{log\frac{\phi_y}{1-\phi_y}+\sum_{j=1}^nlog\frac{1-\phi_{j|y=1}}{1-\phi_{j|y=0}}+\sum_{j=1}^nx_j(log\frac{\phi_{j|y=1}}{\phi_{j|y=0}}.\frac{1-\phi_{j|y=0}}{1-\phi_{j|y=1}})\}\\ =&exp\{log\frac{\phi_y}{1-\phi_y}+\sum_{j=1}^nlog\frac{1-\phi_{j|y=1}}{1-\phi_{j|y=0}}+\sum_{j=1}^nx_j(log(\frac{\phi_{j|y=1}}{1-\phi_{j|y=1}})(\frac{1-\phi_{j|y=0}}{\phi_{j|y=0}}))\}\\ \end{aligned}
p(x∣y=0)p(y=0)p(x∣y=1)p(y=1)========exp{log(1−ϕy)+∑j=1nxjlog(ϕj∣y=0)+(1−xj)log(1−ϕj∣y=0)}exp{logϕy+∑j=1nxjlog(ϕj∣y=1)+(1−xj)log(1−ϕj∣y=1)}exp{logϕy+j=1∑n(xjlog(ϕj∣y=1)+(1−xj)log(1−ϕj∣y=1))−(log(1−ϕy)+j=1∑nxjlog(ϕj∣y=0)+(1−xj)log(1−ϕj∣y=0))}exp{(logϕy−log(1−ϕy))+j=1∑nxj(log(ϕj∣y=1)−log(ϕj∣y=0))+(1−xj)(log(1−ϕj∣y=1))−log(1−ϕj∣y=0))}exp{log1−ϕyϕy+j=1∑nxjlogϕj∣y=0ϕj∣y=1+(1−xj)log1−ϕj∣y=01−ϕj∣y=1}exp{log1−ϕyϕy+j=1∑nlog1−ϕj∣y=01−ϕj∣y=1+j=1∑nxjlogϕj∣y=0ϕj∣y=1−xjlog1−ϕj∣y=01−ϕj∣y=1}exp{log1−ϕyϕy+j=1∑nlog1−ϕj∣y=01−ϕj∣y=1+j=1∑nxj(logϕj∣y=0ϕj∣y=1−log1−ϕj∣y=01−ϕj∣y=1)}exp{log1−ϕyϕy+j=1∑nlog1−ϕj∣y=01−ϕj∣y=1+j=1∑nxj(logϕj∣y=0ϕj∣y=1.1−ϕj∣y=11−ϕj∣y=0)}exp{log1−ϕyϕy+j=1∑nlog1−ϕj∣y=01−ϕj∣y=1+j=1∑nxj(log(1−ϕj∣y=1ϕj∣y=1)(ϕj∣y=01−ϕj∣y=0))}
令
θ
0
=
l
o
g
ϕ
y
1
−
ϕ
y
+
∑
j
=
1
n
l
o
g
1
−
ϕ
j
∣
y
=
1
1
−
ϕ
j
∣
y
=
0
\theta_0=log\frac{\phi_y}{1-\phi_y}+\sum_{j=1}^nlog\frac{1-\phi_{j|y=1}}{1-\phi_{j|y=0}}
θ0=log1−ϕyϕy+∑j=1nlog1−ϕj∣y=01−ϕj∣y=1,
θ
j
=
l
o
g
(
ϕ
j
∣
y
=
1
1
−
ϕ
j
∣
y
=
1
)
(
1
−
ϕ
j
∣
y
=
0
ϕ
j
∣
y
=
0
)
\theta_j=log(\frac{\phi_{j|y=1}}{1-\phi_{j|y=1}})(\frac{1-\phi_{j|y=0}}{\phi_{j|y=0}})
θj=log(1−ϕj∣y=1ϕj∣y=1)(ϕj∣y=01−ϕj∣y=0),则
p
(
x
∣
y
=
1
)
p
(
y
=
1
)
p
(
x
∣
y
=
0
)
p
(
y
=
0
)
=
e
x
p
{
θ
0
+
∑
j
=
1
n
x
j
θ
j
}
\begin{aligned} \frac{p(x|y=1)p(y=1)}{p(x|y=0)p(y=0)}=&exp\{\theta_0+\sum_{j=1}^nx_j\theta_j\}\\ \end{aligned}
p(x∣y=0)p(y=0)p(x∣y=1)p(y=1)=exp{θ0+j=1∑nxjθj}
设
θ
T
=
[
θ
0
,
θ
1
,
.
.
.
θ
j
.
.
.
,
θ
n
]
\theta^T=[\theta_0,\theta_1,...\theta_j...,\theta_n]
θT=[θ0,θ1,...θj...,θn],
x
T
=
[
x
1
,
.
.
.
x
j
.
.
.
,
x
n
]
x^T=[x_1,...x_j...,x_n]
xT=[x1,...xj...,xn]则
p
(
x
∣
y
=
1
)
p
(
y
=
1
)
p
(
x
∣
y
=
0
)
p
(
y
=
0
)
=
e
x
p
{
θ
T
[
1
x
]
}
\begin{aligned} \frac{p(x|y=1)p(y=1)}{p(x|y=0)p(y=0)}=&exp\{\theta^T\left[ \begin{matrix} &1&\\ &x& \end{matrix} \right]\}\\ \end{aligned}
p(x∣y=0)p(y=0)p(x∣y=1)p(y=1)=exp{θT[1x]}
要想
p
(
x
∣
y
=
1
)
p
(
y
=
1
)
p
(
x
∣
y
=
0
)
p
(
y
=
0
)
≥
1
\frac{p(x|y=1)p(y=1)}{p(x|y=0)p(y=0)}\geq1
p(x∣y=0)p(y=0)p(x∣y=1)p(y=1)≥1,当且仅当
θ
T
[
1
x
]
≥
0
\begin{aligned} \theta^T\left[ \begin{matrix} &1&\\ &x& \end{matrix} \right]\geq0\\ \end{aligned}
θT[1x]≥0