# 逻辑回归

http://blog.csdn.net/u013719780?viewmode=contents

f(x)=11+ex
<script type="math/tex; mode=display" id="MathJax-Element-16"> f(x) = \frac{1}{1 + e^{-x}} </script>

## 模型原理

• 假设因变量服从Bernoulli distribution, 即

p(y|x)=p(y=1|x)y(1p(y=1|x))(1y),y{0,1}
<script type="math/tex; mode=display" id="MathJax-Element-2"> p(y|x) = p(y=1|x)^y (1 - p(y=1|x))^{(1-y)}, \,\,\,\,\,\,\,\, y\in \{0, 1\}； </script>

• 假设训练样本服从钟形分布，例如高斯分布：

p(xi|y=yk)N(μik,σi)
<script type="math/tex; mode=display" id="MathJax-Element-3"> p(x_i|y=y_k)服从 N(\mu_{ik}, \sigma_i) </script>

p(y=1|x)=11+exw,
<script type="math/tex; mode=display" id="MathJax-Element-5"> p(y=1 | x) = \frac{1}{1 + e^{-xw}},</script>

p(y=0|x)=1p(y=1|x)=11+exw
<script type="math/tex; mode=display" id="MathJax-Element-6">p(y=0 | x) = 1 - p(y=1 | x) = \frac{1}{1 + e^{xw}}</script>

p(y=1|x)=p(x|y=1)p(y=1)p(x)=p(x|y=1)p(y=1)p(x|y=1)p(y=1)+p(x|y=0)p(y=0)=11+p(x|y=0)p(y=0)p(x|y=1)p(y=1)=11+p(x|y=0)(1p(y=1))p(x|y=1)p(y=1)=11+p(x|y=0)(1π)p(x|y=1)π(p(y=1)=π)=11+eln1ππ+lnp(x|y=0)p(x|y=1)=11+eln1ππ+ilnp(xi|y=0)p(xi|y=1)
<script type="math/tex; mode=display" id="MathJax-Element-7">\begin{align*} p(y=1|x) & = \frac{p(x|y=1)p(y=1)}{p(x)} \\ & = \frac{p(x|y=1)p(y=1)}{p(x|y=1)p(y=1)+p(x|y=0)p(y=0)} \\ & = \frac{1}{1+\frac{p(x|y=0)p(y=0)}{p(x|y=1)p(y=1)}} \\ & = \frac{1}{1+\frac{p(x|y=0)(1-p(y=1))}{p(x|y=1)p(y=1)}} \\ & = \frac{1}{1+\frac{p(x|y=0)(1-\pi)}{p(x|y=1)\pi}} \,\,\,\,\,\, (令p(y=1)=\pi) \\ & = \frac{1}{1+e^{ln\frac{1-\pi}{\pi} + ln\frac{p(x|y=0)}{p(x|y=1)}}} \\ & = \frac{1}{1+e^{ln\frac{1-\pi}{\pi} + \sum_i ln\frac{p(x_i|y=0)}{p(x_i|y=1)}}} \end{align*}</script>

p(xi|yk)=1σik2πe(xiμik)22σik2
<script type="math/tex; mode=display" id="MathJax-Element-8"> p(x_i|y_k) = \frac{1}{\sigma_{ik}\sqrt{2\pi}} e^{-\frac{{(x_i-\mu_{ik})}^2}{2{\sigma_{ik}}^2}}</script>

p(y=1|x)=11+e(lnπ1π+i(μi1μi0σ2ixi+μ2i0μ2i12σ2i))=11+eiwixi+b,i=1,2,3,,n_features
<script type="math/tex; mode=display" id="MathJax-Element-9">\begin{align*} p(y=1|x) & = \frac{1}{1+e^{-(ln\frac{\pi-1}{\pi}+\sum_i(\frac{\mu_{i1}-\mu_{i0}}{\sigma^2_i}x_i + \frac{\mu^2_{i0}-\mu^2_{i1}}{2\sigma^2_i} ))}} \\ & = \frac{1}{1+e^{-\sum_iw_ix_i+b}}, \,\,\,\,\,\, i=1, 2, 3, …, n\_features \end{align*}</script>

b=i(lnπ1π+μ2i0μ2i12σ2i)),wi=μi1μi0σ2i
<script type="math/tex; mode=display" id="MathJax-Element-10"> b = \sum_i(ln\frac{\pi-1}{\pi} + \frac{\mu^2_{i0}-\mu^2_{i1}}{2\sigma^2_i} )), \,\,\,\,\,\, w_i = \frac{\mu_{i1}-\mu_{i0}}{\sigma^2_i} </script>

## 损失函数

w=argmaxwinlnp(yi|xi)
<script type="math/tex; mode=display" id="MathJax-Element-11"> w = arg \, max_w \sum_i^n ln \, p(y_i|x_i) </script>

w=agrminwi=1nyilnp(yi=1|xi)+(1yi)lnp(yi=0|xi)
<script type="math/tex; mode=display" id="MathJax-Element-12"> w = agr \, min_w \sum_{i=1}^n y_i ln \, p(y_i=1|x_i) + (1 - y_i) ln \, p(y_i=0|x_i) </script>

## 逻辑回归模型的另一面

from __future__ import print_function, division
import sys
import os
import math
from sklearn import datasets
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def shuffle_data(X, y, seed=None):
if seed:
np.random.seed(seed)

idx = np.arange(X.shape[0])
np.random.shuffle(idx)

return X[idx], y[idx]

# 正规化数据集 X
def normalize(X, axis=-1, p=2):
lp_norm = np.atleast_1d(np.linalg.norm(X, p, axis))
lp_norm[lp_norm == 0] = 1
return X / np.expand_dims(lp_norm, axis)

# 标准化数据集 X
def standardize(X):
X_std = np.zeros(X.shape)
mean = X.mean(axis=0)
std = X.std(axis=0)

# 做除法运算时请永远记住分母不能等于0的情形
# X_std = (X - X.mean(axis=0)) / X.std(axis=0)
for col in range(np.shape(X)[1]):
if std[col]:
X_std[:, col] = (X_std[:, col] - mean[col]) / std[col]

return X_std

# 划分数据集为训练集和测试集
def train_test_split(X, y, test_size=0.2, shuffle=True, seed=None):
if shuffle:
X, y = shuffle_data(X, y, seed)

n_train_samples = int(X.shape[0] * (1-test_size))
x_train, x_test = X[:n_train_samples], X[n_train_samples:]
y_train, y_test = y[:n_train_samples], y[n_train_samples:]

return x_train, x_test, y_train, y_test

# 将一个向量转换成对角阵，其中对角阵上的元素就是向量中元素
def vec2diagonal(vec):
vec_length = len(vec)
diagonal = np.zeros((vec_length, vec_length))
for i in range(vec_length):
diagonal[i][i] = vec[i]
return diagonal

def accuracy(y, y_pred):
y = y.reshape(y.shape[0], -1)
y_pred = y_pred.reshape(y_pred.shape[0], -1)
return np.sum(y == y_pred)/len(y)

class Sigmoid:

def function(self, x):
return 1/(1 + np.exp(-x))

def derivative(self, x):
return self.function(x) * (1 - self.function(x))

class LogisticRegression():
"""逻辑回归分类模型.
Parameters:
-----------
learning_rate: float
学习率.
"""
def __init__(self, learning_rate=.1):
self.w = None
self.learning_rate = learning_rate
self.sigmoid = Sigmoid()

def fit(self, X, y, n_iterations=4000):
# 在第一列添加偏置列，全部初始化为1
X = np.insert(X, 0, 1, axis=1)
X = X.reshape(X.shape[0], -1)
y = y.reshape(y.shape[0], -1)

n_samples, n_features = np.shape(X)

# 参数初始化 [-1/n_features, 1/n_features]
limit = 1 / math.sqrt(n_features)
self.w = np.random.uniform(-limit, limit, (n_features, 1))

for i in range(n_iterations):
# 通过初始化的参数w计算预测值
y_pred = self.sigmoid.function(X.dot(self.w))
# 梯度下降更新参数w.
self.w -= self.learning_rate * X.T.dot(-(y - y_pred) *
self.sigmoid.function(X.dot(self.w)) *
(1 - self.sigmoid.function(X.dot(self.w))))

def predict(self, X):
# 训练模型的时候我们添加了偏置，预测的时候也需要添加偏置
X = X.reshape(X.shape[0], -1)
X = np.insert(X, 0, 1, axis=1)
# 预测
y_pred = np.round(self.sigmoid.function(X.dot(self.w))).astype(int)
return y_pred

def main():
X = normalize(data.data[data.target != 0])
y = data.target[data.target != 0]
y[y == 1] = 0
y[y == 2] = 1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

clf = LogisticRegression()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accu = accuracy(y_test, y_pred)
print ("Accuracy:", accu)

plt.figure(figsize=(12, 8))
plt.scatter(X[y==0][:,0], X[y==0][:,1])
plt.scatter(X[y==1][:,0], X[y==1][:,1])

plt.show()

if __name__ == "__main__":
main()

Accuracy: 0.939393939394

08-20 1万+
11-12 2万+
10-15 3998
08-03 244