# 神经网络基础概念

## 激活函数实现去线性化

TensorFlow中提供的7中非线性激活函数：
tf.nn.relu, tf.sigmoid, tf.tanh是常用的三个

Activation Functions.

The activation ops provide different types of nonlinearities for use in neural networks. These include smooth nonlinearities (sigmoid, tanh, elu, softplus, and softsign), continuous but not everywhere differentiable functions (relu, relu6, crelu and relu_x), and random regularization (dropout).

All activation ops apply componentwise, and produce a tensor of the same shape as the input tensor.

### 经典的损失函数：

# tensorflow 实现交叉熵
cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, le-10, 1.0)))

# y_ : 正确结果
# y  : 预测结果
# tf.clip_by_value(y, le-10, 1.0): 将张量 y 限制在 le-10 和 1.0 之间

TensorFlow对交叉熵和softmax回归进行了统一封装

cross_entropy = tf.nn.softmax_cross_entropy_with_logits(y, y_)
# 获取使用了softmax回归之后的交叉熵

mse = tf.reduce_mean(tf.square(y_ - y))
# y : 输出答案
# y_: 标准答案

# coding: utf-8
import tensorflow as tf
from numpy.random import RandomState

# #### 1. 定义神经网络的相关参数和变量。
batch_size = 8
x = tf.placeholder(tf.float32, shape=(None, 2), name="x-input")
y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y-input')
w1= tf.Variable(tf.random_normal([2, 1], stddev=1, seed=1))
y = tf.matmul(x, w1)

# #### 2. 设置自定义的损失函数。
# 定义损失函数使得预测少了的损失大，于是模型应该偏向多的方向预测。
loss_less = 10
loss_more = 1
loss = tf.reduce_sum(tf.where(tf.greater(y, y_), (y - y_) * loss_more, (y_ - y) * loss_less))

# #### 3. 生成模拟数据集。
rdm = RandomState(1)
X = rdm.rand(128,2)
Y = [[x1+x2+rdm.rand()/10.0-0.05] for (x1, x2) in X]

# #### 4. 训练模型。
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
STEPS = 5000
for i in range(STEPS):
start = (i*batch_size) % 128
end = (i*batch_size) % 128 + batch_size
sess.run(train_step, feed_dict={x: X[start:end], y_: Y[start:end]})
if i % 1000 == 0:
print("After %d training step(s), w1 is: " % (i))
print sess.run(w1), "\n"
print "Final w1 is: \n", sess.run(w1)

# #### 6. 定义损失函数为MSE。
loss = tf.contrib.losses.mean_squared_error(y, y_)

with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
STEPS = 5000
for i in range(STEPS):
start = (i*batch_size) % 128
end = (i*batch_size) % 128 + batch_size
sess.run(train_step, feed_dict={x: X[start:end], y_: Y[start:end]})
if i % 1000 == 0:
print("After %d training step(s), w1 is: " % (i))
print sess.run(w1), "\n"
print "Final w1 is: \n", sess.run(w1)

### 学习率的设置

# 指数衰减法
decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)

# decayed_learning_rate 为每一轮优化时使用的学习率
# learning_rate 为事先设定的初始学习率
# decay_rate 为衰减系数
# decay_steps 为衰减速度

TensorFlow实现指数衰减法

global_step = tf.Variable(0)

# 通过 exponential_decay函数生成学习率
learning_rate = tf.train.exponential_decay(
0.1, global_step, 100, 0.96, staircase=True)
# 使用指数衰减法的学习率，在minimize函数中传入global_step将自动更新
# global_step 参数，从而是的学习率也得到相应更新
learning_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(...my_loss..., global_step=global_step)

### 过拟合

http://www.cnblogs.com/jianxinzhou/p/4083921.html

w = tf.Variable(tf.random_normal([2,1], stddev=1, seed=1))
y = tf.matmul(x, w)

# 损失函数中加入L2 正则化的值
loss = tf.reduce_mean(tf.square(y_ - y))
+ tf.contrib.layers.l2_regularizer(lambda)(w)

TensorFlow正则化示例：

# 1. 生成模拟数据集。
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

data = []
label = []
np.random.seed(0)

# 以原点为圆心，半径为1的圆把散点划分成红蓝两部分，并加入随机噪音。
for i in range(150):
x1 = np.random.uniform(-1,1)
x2 = np.random.uniform(0,2)
if x1**2 + x2**2 <= 1:
data.append([np.random.normal(x1, 0.1),np.random.normal(x2,0.1)])
label.append(0)
else:
data.append([np.random.normal(x1, 0.1), np.random.normal(x2, 0.1)])
label.append(1)

data = np.hstack(data).reshape(-1,2)
label = np.hstack(label).reshape(-1, 1)
plt.scatter(data[:,0], data[:,1], c=label,
cmap="RdBu", vmin=-.2, vmax=1.2, edgecolor="white")
plt.show()

# 2. 定义一个获取权重，并自动加入L2正则项到损失的函数。
def get_weight(shape, lambda1):
var = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
return var

# 3. 定义神经网络。
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
sample_size = len(data)

# 每层节点的个数
layer_dimension = [2,10,5,3,1]

n_layers = len(layer_dimension)

cur_layer = x
in_dimension = layer_dimension[0]

# 循环生成网络结构：5层全连接网络
for i in range(1, n_layers):
out_dimension = layer_dimension[i]
weight = get_weight([in_dimension, out_dimension], 0.003)
bias = tf.Variable(tf.constant(0.1, shape=[out_dimension]))
cur_layer = tf.nn.elu(tf.matmul(cur_layer, weight) + bias)
in_dimension = layer_dimension[i]

y= cur_layer # 最后一层是输出层

# 输出层的损失函数的定义。
mse_loss = tf.reduce_sum(tf.pow(y_ - y, 2)) / sample_size

# 5. 训练带正则项的损失函数loss。
# 定义训练的目标函数loss，训练次数及训练模型
TRAINING_STEPS = 40000

with tf.Session() as sess:
tf.global_variables_initializer().run()
for i in range(TRAINING_STEPS):
sess.run(train_op, feed_dict={x: data, y_: label})
if i % 2000 == 0:
print("After %d steps, loss: %f" % (i, sess.run(loss, feed_dict={x: data, y_: label})))

# 画出训练后的分割曲线
xx, yy = np.mgrid[-1:1:.01, 0:2:.01]
grid = np.c_[xx.ravel(), yy.ravel()]
probs = sess.run(y, feed_dict={x:grid})
probs = probs.reshape(xx.shape)

plt.scatter(data[:,0], data[:,1], c=label,
cmap="RdBu", vmin=-.2, vmax=1.2, edgecolor="white")
plt.contour(xx, yy, probs, levels=[.5], cmap="Greys", vmin=0, vmax=.1)
plt.show()

### 滑动平均模型

tensorflow 下的 tf.train.ExponentialMovingAverage 需要提供一个衰减率（decay）。该衰减率用于控制模型更新的速度。该衰减率用于控制模型更新的速度，ExponentialMovingAverage 对每一个（待更新训练学习的）变量（variable）都会维护一个影子变量（shadow variable）。影子变量的初始值就是这个变量的初始值，

import tensorflow as tf

# 1. 定义变量及滑动平均类
v1 = tf.Variable(0, dtype=tf.float32)
step = tf.Variable(0, trainable=False)
ema = tf.train.ExponentialMovingAverage(0.99, step)
maintain_averages_op = ema.apply([v1])

# 2. 查看不同迭代中变量取值的变化。
with tf.Session() as sess:

# 初始化
init_op = tf.global_variables_initializer()
sess.run(init_op)
print sess.run([v1, ema.average(v1)])

# 更新变量v1的取值
sess.run(tf.assign(v1, 5))
sess.run(maintain_averages_op)
print sess.run([v1, ema.average(v1)])

# 更新step和v1的取值
sess.run(tf.assign(step, 10000))
sess.run(tf.assign(v1, 10))
sess.run(maintain_averages_op)
print sess.run([v1, ema.average(v1)])

# 更新一次v1的滑动平均值
sess.run(maintain_averages_op)
print sess.run([v1, ema.average(v1)])  

#### Deep Learning 学习系列001 —— 神经网络基本概念

2016-09-06 20:01:58

#### 神经网络基本概念

2018-04-19 13:31:56

#### 神经网络概念理解

2017-03-26 00:21:23

#### TensorFlow学习笔记（4）——深层神经网络

2017-07-16 22:18:14

#### Keras实现卷积神经网络

2017-08-22 13:58:05

#### 180304 keras中图像化查看模型训练过程中的acc+loss+val_acc+val_loss

2018-03-04 20:51:42

2017-09-28 18:16:48

#### 深度学习之损失函数

2018-01-04 10:27:01

#### tensorflow学习笔记（十）：sess.run()

2016-10-24 09:04:08

#### 神经网络入门之bp算法，梯度下降

2017-07-23 14:21:20