1.基础语法
-
tf.where() 条件语句真/ 假
tf.where(条件语句,真返回A,假返回B) -
np.random.RandomState.rand()
返回一个[0,1)间的随机数
import numpy as np
rdm = np.random.RandomState()
a = rdm.rand(3,4)
print(a)
[[0.18780617 0.01291692 0.95263602 0.85058282]
[0.29071402 0.87537937 0.75541398 0.75589398]
[0.43554328 0.30366511 0.63423509 0.28484491]]
- np.vstack(数组1,数组2)
将两个数组按垂直方向叠加 - np.mgrid[起始值:结束值:步长 ]
- x.ravel() :把前面的x变为一维数组
- np.c_[ ]:使返回的间隔数值点配对
2.神经网络复杂度
- 空间复杂度:总参数 = 总权重 + 总bias
- 时间复杂度:乘加运算次数
(1)指数衰减学习率
指数衰减学习率 = 初始学习率 * 学习衰减率 ^(当前论数 / 多少轮衰减)^
黄字为超参数
3.激活函数 : MP模型提出
-
Sigmoid
f ( x ) = 1 1 + e − x f(x)=\frac1{1+e^{-x}}\\ f(x)=1+e−x1 -
Tanh
f ( x ) = 1 − e − 2 x 1 + e − 2 x f(x)=\frac{1-e^{-2x}}{1+e^{-2x}}\\\\ f(x)=1+e−2x1−e−2x -
Relu
f ( x ) = m a x ( x , 0 ) f(x)=max(x,0)\\ f(x)=max(x,0) -
Leaky Relu
f ( x ) = m a x ( a x , 0 ) f(x)=max(ax,0)\\ f(x)=max(ax,0)
4.损失函数
表示了预测值和已知答案之间的差距
- 均方误差MSE:
loss_mse=tf.reduce_mean(tf.square(y_-y))
import numpy as np
import tensorflow as tf
SEED = 23455
rdm = np.random.RandomState(seed=SEED)
x = rdm.rand(32,2)
y_= [[x1 + x2 +(rdm.rand()/ 10.0 - 0.05)] for (x1,x2) in x]
x = tf.cast(x , dtype=tf.float32)
w1 = tf.Variable(tf.random.normal([2,1],stddev=1,seed=1))
epoch = 15000
lr = 0.002
for epoch in range(epoch):
with tf.GradientTape() as tape:
y = tf.matmul(x,w1)
loss_mes = tf.reduce_mean(tf.square(y_ - y))
grads = tape.gradient(loss_mes,w1)
w1.assign_sub(lr * grads)
if epoch%500 ==0:
print("after %d traning step,w1 is" %(epoch))
print(w1.numpy(),"\n")
print("the final wa is",w1.numpy())
after 0 traning step,w1 is
[[-0.8096241]
[ 1.4855157]]
after 500 traning step,w1 is
[[-0.21934733]
[ 1.6984866 ]]
........................
after 14000 traning step,w1 is
[[0.9993659]
[0.999166 ]]
after 14500 traning step,w1 is
[[1.0002553 ]
[0.99838644]]
如果考虑成本和利润的后果,则采用自定义损失函数
##如果要考虑成本跟利润
COST = 99
PROFIT = 1
#损失函数定义为
loss = tf.reduce_sum(tf.where(tf.greater(y,y_),(y-y_) * COST,(y_-y)*PROFIT))
- 交叉熵损失函数ce : 表征两个概率分布之间的距离
交叉熵越小,分布越近
H ( y _ , y ) = − ∑ y _ ∗ ln y H\left(y\_,y\right)\;\;=-{\textstyle\sum_{}}\;y\_\;\ast\;\ln y H(y_,y)=−∑y_∗lny
tf.losses.catagorical_crossentroy(y_,y)
import tensorflow as tf
loss_ce1 = tf.losses.categorical_crossentropy([1,0],[0.8,0.2])
print(loss_ce1)
#result
tf.Tensor(0.22314353, shape=(), dtype=float32)
- oftmax与交叉熵的结合
√ 输出先过softmax函数,在计算y与y_的交叉熵损失函数
★一定用 tf.cast() 对数据类型进行转化
import tensorflow as tf
import numpy as np
y_ = np.array([[1,0,0],[0,1,0],[0,0,1]])
y = np.array([[12,3,2],[3,10,1],[1,2,5]])
yf = tf.cast(y,tf.float32)
loss_se = tf.nn.softmax_cross_entropy_with_logits(y_,yf)
print("softmax 和 交叉熵结合:\n",loss_se)
#result
softmax 和 交叉熵结合:
tf.Tensor([0.00016879 0.00103468 0.06588391], shape=(3,), dtype=float32)
5.缓和拟合情况
解决方法:数据清洗,增大数据集,采用正则化,增大正则化参数
- 正则化缓和:在损失函数中引入模型复杂度指标,利益w加权值,弱化了训练数据的噪声
l o s s = l o s s ( y 和 y _ ) + R E G U L A R I Z E R ∗ l o s s ( w ) loss\;=loss(y和y\_)\;+\;REGULARIZER\;\ast\;loss(w) loss=loss(y和y_)+REGULARIZER∗loss(w)
import tensorflow as tf
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
df = pd.read_csv('dot.csv')
x_data = np.array(df[['x1','x2']])
y_data = np.array(df['y_c'])
x_train = np.vstack(x_data).reshape(-1,2)
y_train = np.vstack(y_data).reshape(-1,1)
Y_c = [['red' if y else 'blue'] for y in y_train]
x_train = tf.cast(x_train,tf.float32)
y_train = tf.cast(y_train,tf.float32)
train_db = tf.data.Dataset.from_tensor_slices((x_train,y_train)).batch(32)
w1 = tf.Variable(tf.random.normal([2,11]),dtype=tf.float32)
b1 = tf.Variable(tf.constant(0.01,shape=[11]))
w2 = tf.Variable(tf.random.normal([11,1]),dtype=tf.float32)
b2 = tf.Variable(tf.constant(0.01,shape=[1]))
lr = 0.005
epoch = 800
for epoch in range(epoch):
for step,(x_train,y_train) in enumerate(train_db):
with tf.GradientTape() as tape:
h1 = tf.matmul(x_train,w1) + b1
h1 = tf.nn.relu(h1)
y = tf.matmul(h1,w2) + b2
loss_mse = tf.reduce_mean(tf.square(y_train-y))
loss_regu = []
loss_regu.append(tf.nn.l2_loss(w1))
loss_regu.append(tf.nn.l2_loss(w2))
loss_regu = tf.reduce_sum(loss_regu)
loss = loss_mse + 0.03 * loss_regu
varibles = [w1,b1,w2,b2]
grads = tape.gradient(loss,varibles) #求偏导数
w1.assign_sub(lr * grads[0])
b1.assign_sub(lr * grads[1])
w2.assign_sub(lr * grads[2])
b2.assign_sub(lr * grads[3])
if epoch % 20 == 0:
print('epoch:',epoch,'loss:',float(loss))
print("******predict*****")
xx,yy = np.mgrid[-3:3:1,-3:3:1]
grid = np.c_[xx.ravel(),yy.ravel()]
grid = tf.cast(grid,tf.float32)
probs = []
for x_test in grid:
h1 = tf.matmul([x_test],w1) + b1
h1 = tf.nn.relu(h1)
y = tf.matmul(h1,w2) + b2
probs.append(y)
x1 = x_data[:,0]
x2 = x_data[:,1]
probs = np.array(probs).reshape(xx.shape)
plt.scatter(x1,x2,color=np.squeeze(Y_c))
plt.contour(xx,yy,probs,levels=[.5])
plt.show()
epoch: 0 loss: 4.97540283203125
epoch: 20 loss: 0.743660569190979
epoch: 40 loss: 0.5566228032112122
.........
epoch: 760 loss: 0.10121501982212067
epoch: 780 loss: 0.0988234132528305