import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense, Dropout, LayerNormalization
from tensorflow.keras import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
# 位置编码
class PositionEmbedding(Layer):
def __init__(self, max_len, embedding_dim, **kwargs):
super(PositionEmbedding, self).__init__(**kwargs)
self.pos_encoding = self.positional_encoding(max_len, embedding_dim)
def get_angles(self, pos, i, embedding_dim):
angles = 1 / np.power(10000, (2 * (i // 2)) / np.float32(embedding_dim))
return pos * angles
def positional_encoding(self, max_len, embedding_dim):
angle_rads = self.get_angles(np.arange(max_len)[:, np.newaxis],
np.arange(embedding_dim)[np.newaxis, :],
embedding_dim)
sines = np.sin(angle_rads[:, 0::2])
cosines = np.cos(angle_rads[:, 1::2])
pos_encoding = np.concatenate([sines, cosines], axis=-1)
pos_encoding = pos_encoding[np.newaxis, ...]
return tf.cast(pos_encoding, dtype=tf.float32)
def call(self, inputs):
pos_encoding = tf.cast(self.pos_encoding, dtype=inputs.dtype)
return inputs + pos_encoding[:, :tf.shape(inputs)[1], :]
# 多头注意力层
class MultiHeadAttentionLayer(Layer):
def __init__(self, embedding_dim, num_heads, **kwargs):
super(MultiHeadAttentionLayer, self).__init__(**kwargs)
self.num_heads = num_heads
self.embedding_dim = embedding_dim
self.query_dense = Dense(embedding_dim)
self.key_dense = Dense(embedding_dim)
self.value_dense = Dense(embedding_dim)
self.combine_heads = Dense(embedding_dim)
def attention(self, query, key, value):
score = tf.matmul(query, key, transpose_b=True)
dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
scaled_score = score / tf.math.sqrt(dim_key)
weights = tf.nn.softmax(scaled_score, axis=-1)
output = tf.matmul(weights, value)
return output, weights
def separate_heads(self, x, batch_size):
x = tf.reshape(x, (batch_size, -1, self.num_heads, self.embedding_dim // self.num_heads))
return tf.transpose(x, perm=[0, 2, 1, 3])
def call(self, inputs):
batch_size = tf.shape(inputs)[0]
query = self.query_dense(inputs)
key = self.key_dense(inputs)
value = self.value_dense(inputs)
query = self.separate_heads(query, batch_size)
key = self.separate_heads(key, batch_size)
value = self.separate_heads(value, batch_size)
attention_output, _ = self.attention(query, key, value)
attention_output = tf.transpose(attention_output, perm=[0, 2, 1, 3])
attention_output = tf.reshape(attention_output, (batch_size, -1, self.embedding_dim))
attention_output = self.combine_heads(attention_output)
return attention_output
# 前馈网络层
class FeedForwardNetwork(Layer):
def __init__(self, embedding_dim, ff_dim, **kwargs):
super(FeedForwardNetwork, self).__init__(**kwargs)
self.dense1 = Dense(ff_dim, activation='relu')
self.dense2 = Dense(embedding_dim)
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
return x
# 编码器层
class EncoderLayer(Layer):
def __init__(self, embedding_dim, num_heads, ff_dim, **kwargs):
super(EncoderLayer, self).__init__(**kwargs)
self.attention = MultiHeadAttentionLayer(embedding_dim, num_heads)
self.ffn = FeedForwardNetwork(embedding_dim, ff_dim)
self.layernorm1 = LayerNormalization(epsilon=1e-6)
self.layernorm2 = LayerNormalization(epsilon=1e-6)
self.dropout1 = Dropout(0.1)
self.dropout2 = Dropout(0.1)
def call(self, inputs, training):
attn_output = self.attention(inputs)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(inputs + attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output, training=training)
out2 = self.layernorm2(out1 + ffn_output)
return out2
# 完整的Transformer模型
class Transformer(Model):
def __init__(self, num_layers, embedding_dim, num_heads, ff_dim, input_dim, max_len, num_classes, **kwargs):
super(Transformer, self).__init__(**kwargs)
self.embedding = Dense(embedding_dim)
self.pos_encoding = PositionEmbedding(max_len, embedding_dim)
self.enc_layers = [EncoderLayer(embedding_dim, num_heads, ff_dim) for _ in range(num_layers)]
self.dropout = Dropout(0.1)
self.flatten = Dense(num_classes, activation='softmax')
def call(self, inputs, training):
x = self.embedding(inputs)
x = self.pos_encoding(x)
x = self.dropout(x, training=training)
for enc_layer in self.enc_layers:
x = enc_layer(x, training)
x = tf.reduce_mean(x, axis=1)
x = self.flatten(x)
return x
# 加载和预处理数据
url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
data = pd.read_csv(url)
def preprocess_data(data):
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)
data['Fare'].fillna(data['Fare'].median(), inplace=True)
data.drop(columns=['Cabin', 'Ticket', 'Name', 'PassengerId'], inplace=True)
le = LabelEncoder()
data['Sex'] = le.fit_transform(data['Sex'])
data['Embarked'] = le.fit_transform(data['Embarked'])
return data
data = preprocess_data(data)
X = data.drop(columns=['Survived'])
y = data['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# 将数据reshape为三维
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
# 定义模型参数
num_layers = 2
embedding_dim = 32
num_heads = 2
ff_dim = 64
input_dim = X_train.shape[2] # 注意这里是输入数据的特征维度
max_len = X_train.shape[1] # 这里是序列长度
num_classes = 2
# 构建和编译模型
transformer_model = Transformer(num_layers, embedding_dim, num_heads, ff_dim, input_dim, max_len, num_classes)
transformer_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# 训练模型
history = transformer_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)
# 评估模型
test_loss, test_accuracy = transformer_model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy:.4f}')
# 进行预测
predictions = transformer_model.predict(X_test)
predictions = np.argmax(predictions, axis=1)
01-12
1万+
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)
02-26
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交