1
1
+
e
−
θ
T
x
\frac{1}{1+e^{-\theta^\mathrm{T}x}}
1+e−θTx1
h
θ
(
x
)
=
P
(
y
=
1
∣
x
;
θ
)
h_\theta(x)=P(y=1|x;\theta)
hθ(x)=P(y=1∣x;θ)
3 决策界限
若
h
θ
(
x
)
⩾
0.5
h_\theta(x)\geqslant0.5
hθ(x)⩾0.5,预测
y
=
1
y=1
y=1
θ
T
x
⩾
0
\theta^\mathrm{T}x\geqslant0
θTx⩾0
若
h
θ
(
x
)
<
0.5
h_\theta(x)<0.5
hθ(x)<0.5,预测
y
=
0
y=0
y=0
θ
T
x
<
0
\theta^\mathrm{T}x<0
θTx<0
边界
θ
T
x
=
0
\theta^\mathrm{T}x=0
θTx=0
4 代价函数
-
表达式
训练集
{
(
x
(
1
)
,
y
(
1
)
)
,
(
x
(
2
)
,
y
(
2
)
)
,
⋯
,
(
x
(
m
)
,
y
(
m
)
)
}
\{(x^{(1)},y^{(1)}),(x^{(2)},y^{(2)}),\cdots,(x^{(m)},y^{(m)})\}
{(x(1),y(1)),(x(2),y(2)),⋯,(x(m),y(m))}
m
m
m个样本
x
∈
[
x
0
x
1
⋯
x
n
]
x
0
=
1
,
y
∈
{
0
,
1
}
x\in\left[\begin{matrix}x_0\\x_1\\\cdots\\x_n\end{matrix}\right]\quad x_0=1,y\in\{0,1\}
x∈⎣⎢⎢⎡x0x1⋯xn⎦⎥⎥⎤x0=1,y∈{0,1}
h
θ
(
x
)
h_\theta(x)
hθ(x)
1
1
+
e
−
θ
T
x
\frac{1}{1+e^{-\theta^\mathrm{T}x}}
1+e−θTx1
J
(
θ
)
J(\theta)
J(θ)
1
m
∑
i
=
1
m
C
o
s
t
(
h
θ
(
x
(
i
)
)
,
y
(
i
)
)
\frac{1}{m}\sum\limits^m_{i=1}\mathop{Cost}(h_\theta(x^{(i)}),y^{(i)})
m1i=1∑mCost(hθ(x(i)),y(i))
C
o
s
t
(
h
θ
(
x
)
,
y
)
\sout{\mathop{Cost}(h_\theta(x),y)}
Cost(hθ(x),y)
1
2
(
h
θ
(
x
)
−
y
)
2
:
导
致
J
(
θ
)
为
非
凸
函
数
\sout{\frac{1}{2}(h_\theta(x)-y)^2:导致\sout{J(\theta)}为非凸函数}
21(hθ(x)−y)2:导致J(θ)为非凸函数
C
o
s
t
(
h
θ
(
x
)
,
y
)
\mathop{Cost}(h_\theta(x),y)
Cost(hθ(x),y)
{
−
log
(
h
θ
(
x
)
)
若
y
=
1
−
log
(
1
−
h
θ
(
x
)
)
若
y
=
0
\left\{\begin{matrix}-\log(h_\theta(x)) & 若y=1\\-\log(1-h_\theta(x))& 若y=0\end{matrix}\right.
{−log(hθ(x))−log(1−hθ(x))若y=1若y=0
1
m
∑
i
=
1
m
C
o
s
t
(
h
θ
(
x
(
i
)
)
,
y
(
i
)
)
\frac{1}{m}\sum\limits^m_{i=1}\mathop{Cost}(h_\theta(x^{(i)}),y^{(i)})
m1i=1∑mCost(hθ(x(i)),y(i))
C
o
s
t
(
h
θ
(
x
)
,
y
)
\mathop{Cost}(h_\theta(x),y)
Cost(hθ(x),y)
{
−
log
(
h
θ
(
x
)
)
若
y
=
1
−
log
(
1
−
h
θ
(
x
)
)
若
y
=
0
\left\{\begin{matrix}-\log(h_\theta(x))&若y=1\\-\log(1-h_\theta(x))&若y=0\end{matrix}\right.
{−log(hθ(x))−log(1−hθ(x))若y=1若y=0
C
o
s
t
(
h
θ
(
x
)
,
y
)
\mathop{Cost}(h_\theta(x),y)
Cost(hθ(x),y)
−
y
log
(
h
θ
(
x
)
)
−
(
1
−
y
)
log
(
1
−
h
θ
(
x
)
)
-y\log(h_\theta(x))-(1-y)\log(1-h_\theta(x))
−ylog(hθ(x))−(1−y)log(1−hθ(x))
J
(
θ
)
J(\theta)
J(θ)
−
1
m
[
∑
i
=
1
m
y
(
i
)
log
h
θ
(
x
(
i
)
)
+
(
1
−
y
(
i
)
)
log
(
1
−
h
θ
(
x
(
i
)
)
)
]
-\frac{1}{m}\left[\sum\limits^m_{i=1}y^{(i)}\log h_\theta(x^{(i)})+(1-y^{(i)})\log(1-h_\theta(x^{(i)}))\right]
−m1[i=1∑my(i)loghθ(x(i))+(1−y(i))log(1−hθ(x(i)))]
min
θ
J
(
θ
)
\min_\theta J(\theta)
minθJ(θ)
重
复
{
θ
j
:
=
θ
j
−
α
1
m
∑
i
=
1
m
(
h
θ
(
x
(
i
)
)
−
y
(
i
)
)
x
j
(
i
)
}
(
j
=
0
,
…
,
n
)
重复\{\\\theta_j:=\theta_j-\alpha\frac{1}{m}\sum\limits^m_{i=1}(h_\theta(x^{(i)})-y^{(i)})x^{(i)}_j\\\}\qquad (j=0,\dots,n)
重复{θj:=θj−αm1i=1∑m(hθ(x(i))−y(i))xj(i)}(j=0,…,n)