bert fine tuning
tf2 checkpoints bert
对应:
因为我这里用的是tf2.4.0
gs_folder_bert = "gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-12_H-768_A-12"
tf.io.gfile.listdir(gs_folder_bert)
history = model.fit(train_images,
train_labels,
batch_size=1024,
epochs=max_epochs,
validation_data=(test_images, test_labels))
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
87916544/87910968 [==============================] - 1s 0us/step
extend usage
#!/usr/bin/python
aList = [123, 'xyz', 'zara', 'abc', 123];
bList = [2009, 'manni'];
aList.extend(bList)
print "Extended List : ", aList ;
Extended List : [123, ‘xyz’, ‘zara’, ‘abc’, 123, 2009, ‘manni’]
In this example, you extract the features from the lower convolutional layer of InceptionV3 giving us a vector of shape (8, 8, 2048).
Some common arguments:
split=: Which split to read (e.g. ‘train’, [‘train’, ‘test’], ‘train[80%:]’,…). See our split API guide.
shuffle_files=: Control whether to shuffle the files between each epoch (TFDS store big datasets in multiple smaller files).
data_dir=: Location where the dataset is saved ( defaults to ~/tensorflow_datasets/)
with_info=True: Returns the tfds.core.DatasetInfo containing dataset metadata
download=False: Disable download
手动下载tfds
手动下载tfds数据集
pil img show
from PIL import Image
img=Image.open('d:/dog.png')
img.show()
import math
x1 = math.ceil(len_result ** 0.5)
ax = fig.add_subplot(x1, x1, l + 1)
打印模型的输出 形状 对于注意力的计算不是非常清楚 ,今天要搞懂。
特别是计算注意力分数:
tanh 相加 的原因
class BahdanauAttention(tf.keras.Model):
def __init__(self, units):
super(BahdanauAttention, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)
self.isprint = True
def call(self, features, hidden):
# features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)
# hidden shape == (batch_size, hidden_size)
# features.shape (64, 64, 256)
# hidden.shape (64, 512)
# hidden_with_time_axis (64, 1, 512)
# hidden_with_time_axis shape == (batch_size, 1, hidden_size)
hidden_with_time_axis = tf.expand_dims(hidden, 1)
# attention_hidden_layer shape == (batch_size, 64, units)
# attention_hidden_layer.shape,(64, 64, 512)
attention_hidden_layer = (tf.nn.tanh(self.W1(features) +
self.W2(hidden_with_time_axis)))
if self.isprint:
print(f'self.W1(features).shape,{self.W1(features).shape}')
print(f'self.W2(hidden_with_time_axis).shape,{self.W2(hidden_with_time_axis).shape}')
print(f'attention_hidden_layer.shape,{attention_hidden_layer.shape}')
# score shape == (batch_size, 64, 1)
# This gives you an unnormalized score for each image feature.
# score shape == (64, 64, 1)
score = self.V(attention_hidden_layer)
# attention_weights shape == (batch_size, 64, 1)
# attention_weights.shape, (64, 64, 1)
# features.shape (64, 64, 256)
attention_weights = tf.nn.softmax(score, axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
self.isprint = False
return context_vector, attention_weights
features.shape (64, 64, 256)
hidden.shape (64, 512)
target.shape:
(64, 51)
dec_input.shape
(64, 1)
img_tensor.shape
(64, 64, 2048)
features.shape (64, 64, 256)
context_vector.shape, (64, 256)
attention_weights.shape, (64, 64, 1)
self.embedding(x), (64, 1, 256)
tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1), (64, 1, 512)
output.shape, (64, 1, 512)
state.shape, (64, 512)
self.fc1(output), (64, 1, 512)
tf.reshape(x, (-1, x.shape[2])), (64, 512)
self.fc2(x), (64, 5001)
target[:, i].shape,(64,)
return x, state, attention_weights
# predictions (64, 5001)
# hidden (64, 512)
loss += loss_function(target[:, i], predictions)