文章目录
1 Dense Layer
目的:构建Dense层,对比两种方法实现的区别
2 对比原始的add layer方法和继承方法的不同
2.1 global config
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, LSTM, Dense
import numpy as np
np.random.seed(1)
rows = 10000 # 样本数
columns = 100 # 特征数
train_x1 = np.random.random(size=(int(rows/2), columns))
train_y1 = np.random.choice([0], size=(int(rows/2), 1))
train_x2 = np.random.random(size=(int(rows/2), columns))+1
train_y2 = np.random.choice([1], size=(int(rows/2), 1))
train_x = np.vstack((train_x1, train_x2))
train_y = np.vstack((train_y1, train_y2))
units = 5 # 自定义cell个数
2.1 用add实现
tf.random.set_seed(1) # 固定随机值
model1 = keras.Sequential()
model1.add(Input(shape=(columns,)))
model1.add(Dense(units=units))
model1.compile(optimizer="adam", loss="mse", metrics=["accuracy"])
model1.fit(train_x, train_y, epochs=10)
model1.predict(train_x)[-1][-1]
l1 = model1.layers[0]
w1, b1 = l1.get_weights()
api中的参数分几部分
- initializer 初始项,初始化参数
- regularizer 正则项,选择不同正则模式L1L2
- constraint 约束项,非负约束或者最大模约束
2.2 用继承实现
tf.random.set_seed(1) # 固定随机值
class MyDenseLayer(keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
def build(self, input_shape):
self.kernel = self.add_weight(name="kernel", shape=[int(input_shape[-1]), self.num_outputs])
self.bias = self.add_weight(name="bias", shape=[self.num_outputs, ], initializer=keras.initializers.zeros)
self.build = True
def call(self, input):
return tf.matmul(input, self.kernel) + self.bias
model2 = keras.Sequential()
model2.add(Input(shape=(columns,)))
model2.add(MyDenseLayer(units))
model2.compile(loss="mse", optimizer="adam", metrics=['accuracy'])
model2.fit(train_x, train_y, epochs=10)
model2.predict(train_x)[-1][-1]
l2 = model2.layers[0]
w2, b2 = l2.get_weights()
3 有权重的对比
3.1 用自带add_weight方法自定义权重
自定义初始的权重和偏移项。为什么要定义这两个方法?为了用add_weight方法初始权重和偏移项
def w_init(shape, dtype=tf.float32):
return tf.random.normal(shape=shape, dtype=dtype)
def b_init(shape, dtype=tf.float32):
return tf.zeros(shape=shape, dtype=dtype)
3.2 用add实现
tf.random.set_seed(1) # 固定随机值
model3 = keras.Sequential()
model3.add(Input(shape=(columns,)))
model3.add(Dense(units=units, kernel_initializer=w_init, bias_initializer=b_init)) # 需要固定weights
model3.compile(optimizer="adam", loss="mse", metrics=["accuracy"])
model3.fit(train_x, train_y, epochs=10)
model3.predict(train_x)[-1][-1]
l3 = model3.layers[0]
w3, b3 = l3.get_weights()
3.3 用继承实现
tf.random.set_seed(1) # 固定随机值
class MyDenseLayer(keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
def build(self, input_shape):
self.kernel = self.add_weight(initializer=w_init, shape=(input_shape[-1], self.num_outputs), dtype=tf.float32) # 自定义权重
self.bias = self.add_weight(initializer=b_init, shape=(self.num_outputs,), dtype=tf.float32) # 自定义偏移项
def call(self, input):
return tf.matmul(input, self.kernel) + self.bias
model4 = keras.Sequential()
model4.add(Input(shape=(columns,)))
model4.add(MyDenseLayer(units))
model4.compile(loss="mse", optimizer="adam", metrics=['accuracy'])
model4.fit(train_x, train_y, epochs=10)
model4.predict(train_x)[-1][-1]
l4 = model4.layers[0]
w4, b4 = l4.get_weights()
4 用自定义矩阵为权重矩阵
4.1 初始化权重和偏移项矩阵
tf.random.set_seed(1)
w = tf.random.normal(shape=(columns, units), dtype=tf.float32)
b = tf.zeros(shape=(units,), dtype=tf.float32)
def w_init(shape, dtype=tf.float32):
return w
def b_init(shape, dtype=tf.float32):
return b
4.2 用add实现
tf.random.set_seed(1) # 固定随机值
model5 = keras.Sequential()
model5.add(Input(shape=(columns,)))
model5.add(Dense(units=units, kernel_initializer=w_init, bias_initializer=b_init))
model5.compile(loss="mse", optimizer="adam", metrics=['accuracy'])
model5.fit(train_x, train_y, epochs=10)
model5.predict(train_x)[-1][-1]
4.3 用继承实现
不用Layer.add_weight方法,自己实现一个权重矩阵,然后用权重矩阵作为初始化,进行训练
tf.random.set_seed(1) # 固定随机值
class MyDenseLayer(keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
def build(self, input_shape):
self.kernel = tf.Variable(w, trainable=True)
self.bias = tf.Variable(b, trainable=True)
def call(self, input):
return tf.matmul(input, self.kernel) + self.bias
model6 = keras.Sequential()
model6.add(Input(shape=(columns,)))
model6.add(MyDenseLayer(units))
model6.compile(loss="mse", optimizer="adam", metrics=['accuracy'])
model6.fit(train_x, train_y, epochs=10)
model6.predict(train_x)[-1][-1]
5 用numpy实现
5.1 初始化权重和偏移项矩阵
tf.random.set_seed(1)
train_x = np.ones(shape=(rows, columns), dtype="float32") # 这里一定要dtype一致,否则numpy与keras计算结果会有差异,我这里统一使用float32
train_y = np.vstack([np.ones(shape=(int(rows/2), 1), dtype="float32"), np.zeros(shape=(int(rows/2),1), dtype="float32")])
w = tf.random.normal(shape=(columns, 1), dtype=tf.float32)
b = tf.zeros(shape=(1,), dtype=tf.float32)
def w_init(shape, dtype=tf.float32):
return tf.convert_to_tensor(w, dtype=tf.float32)
def b_init(shape, dtype=tf.float32):
return tf.convert_to_tensor(b, dtype=tf.float32)
5.2 add实现,有激活函数
tf.random.set_seed(1) # 固定随机值
model7 = keras.Sequential()
model7.add(Input(shape=(columns,)))
model7.add(Dense(units=1, kernel_initializer=w_init, bias_initializer=b_init, activation="sigmoid"))
h1 = model7.predict(train_x)
model7.compile(loss="mse", optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), metrics=['accuracy'])
model7.fit(train_x, train_y, epochs=1, batch_size=rows) #这里要注意batch_size要用BatchGD,因为numpy实现时没有用batch,用的是全量数据更新
w1, b1 = model7.layers[0].weights
5.3 tf 实现
tf.random.set_seed(1) # 固定随机值
x = tf.Variable(train_x, dtype=tf.float32)
w2 = w
b2 = b
with tf.GradientTape(persistent=True) as tape:
tape.watch([w2, b2])
y_pred = 1/(1+tf.math.exp(-1*tf.matmul(x, w2)+b2))
loss = tf.math.reduce_mean(tf.math.square(tf.subtract(y_pred, train_y)))
dw2 = tape.gradient(target=loss, sources=w2)
db2 = tape.gradient(target=loss, sources=b2)
w2 = w2 - dw2*learning_rate
b2 = b2 - db2*learning_rate
5.4 numpy实现
import numpy as np
class MyModel:
def __init__(self, w, b, learning_rate):
self.w = w
self.b = b
self.learning_rate = learning_rate
def fit(self, train_x, train_y, epochs, batch_size):
self.x = train_x
self.y = train_y
for epoch in range(epochs):
print(f"epoch {epoch}")
self.forward() # 正向传播
self.get_loss()
self.backward()
def forward(self):
self.h3 = self.sigmoid(np.dot(self.x, self.w) + self.b)
def backward(self):
learning_rate = 0.01
dw3 = np.dot(self.x.T, 2*(self.h3 - self.y)*self.h3*(1-self.h3)/train_x.shape[0]) # loss对w的求导
db3 = np.dot(np.ones(shape=(1, rows)), 2*(self.h3 - self.y)*self.h3*(1-self.h3)/train_x.shape[0]) # loss对b的求导
self.w -= dw3 * learning_rate
self.b -= db3 * learning_rate
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def get_loss(self):
loss = np.sum((np.square(self.h3-self.y)), axis=0)/rows
print(f"loss {loss}")
def predict(self):
pass
model8 = MyModel(w, b, learning_rate)
model8.fit(train_x, train_y, epochs=1, batch_size=rows)
w3 = model8.w
b3 = model8.b