环境配置:
python 3.10
Tensorflow Federated 0.61.0
%load_ext tensorboard
1 导入库
import collections
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff
from matplotlib import pyplot as plt
np.random.seed(0)
测试tensorflow federated是否成功导入
tff.federated_computation(lambda: 'Hello, World!')()
2 准备输入数据
emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data()
example_dataset = emnist_train.create_tf_dataset_for_client(
emnist_train.client_ids[0])
example_element = next(iter(example_dataset))
example_element['label'].numpy()
from matplotlib import pyplot as plt
plt.imshow(example_element['pixels'].numpy(), cmap='gray', aspect='equal')
plt.grid(False)
_ = plt.show()
可视化联邦数据的异质性
# 客户端样本每层的示例数
f = plt.figure(figsize=(12, 7))
f.suptitle('Label Counts for a Sample of Clients')
for i in range(6):
client_dataset = emnist_train.create_tf_dataset_for_client(
emnist_train.client_ids[i])
plot_data = collections.defaultdict(list)
for example in client_dataset:
# 为每个标签单独追加计数以绘制图
label = example['label'].numpy()
plot_data[label].append(label)
plt.subplot(2, 3, i+1)
plt.title('Client {}'.format(i))
for j in range(10):
plt.hist(
plot_data[j],
density=False,
bins=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
# 每个客户端都有不同的平均图像,这意味着每个客户端将在本地将模型推向自己的方向。
for i in range(5):
client_dataset = emnist_train.create_tf_dataset_for_client(
emnist_train.client_ids[i])
plot_data = collections.defaultdict(list)
for example in client_dataset:
plot_data[example['label'].numpy()].append(example['pixels'].numpy())
f = plt.figure(i, figsize=(12, 5))
f.suptitle("Client #{}'s Mean Image Per Label".format(i))
for j in range(10):
mean_img = np.mean(plot_data[j], 0)
plt.subplot(2, 5, j+1)
plt.imshow(mean_img.reshape((28, 28)))
plt.axis('off')
3 数据集的预处理
在这里,我们将 28x28 图像平展成 784 元素数组,将个别示例进行洗牌,将其组织成批,并将特征从"像素"和"标签"重命名为" x "和" y ",以便与Keras配合使用。
NUM_CLIENTS = 10
NUM_EPOCHS = 5
BATCH_SIZE = 20
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10
def preprocess(dataset):
def batch_format_fn(element):
"""将一个批次的"像素"进行扁平化处理,并将特征作为" OrderedDict "返回."""
return collections.OrderedDict(
x=tf.reshape(element['pixels'], [-1, 784]),
y=tf.reshape(element['label'], [-1, 1]))
return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)
preprocessed_example_dataset = preprocess(example_dataset)
下面是一个简单的辅助函数,它将从给定的用户集合中构造一个数据集列表,作为一轮训练或评估的输入。
def make_federated_data(client_data, client_ids):
return [
preprocess(client_data.create_tf_dataset_for_client(x))
for x in client_ids
]
4 选择客户端
在一个典型的联邦训练场景中,我们要处理的是潜在的大量用户设备,其中只有一小部分可以在给定的时间点上进行训练。例如,当客户端设备为手机时,手机只有当插上电源参与训练,手机断开计量网络处于空闲状态。当然,我们是在一个模拟的环境中,所有的数据都是本地可用的。通常情况下,当运行仿真时,我们会简单地在每一轮训练中抽取一个随机的客户子集,一般在每一轮中不同。通过学习 Federation Averaging ( https://arxiv.org/abs/1602.05629)算法)一文可以发现,在每轮客户子集随机采样的系统中实现收敛需要一段时间,而在本交互教程中运行数百轮是不现实的。
sample_clients = emnist_train.client_ids[0:NUM_CLIENTS]
federated_train_data = make_federated_data(emnist_train, sample_clients)
print(f'Number of client datasets: {len(federated_train_data)}')
print(f'First dataset: {federated_train_data[0]}')
Number of client datasets: 10
First dataset: <_PrefetchDataset element_spec=OrderedDict([('x', TensorSpec(shape=(None, 784), dtype=tf.float32, name=None)), ('y', TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))])>
5 利用Keras创建模型
def create_keras_model():
return tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=(784,)),
tf.keras.layers.Dense(10, kernel_initializer='zeros'),
tf.keras.layers.Softmax(),
])
def model_fn():
# 我们必须在这里创建一个新的模型,而不能从外部范围捕获它。TFF将在不同的图环境中对此进行调用。
keras_model = create_keras_model()
return tff.learning.models.from_keras_model(
keras_model,
input_spec=preprocessed_example_dataset.element_spec,
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
6 在联邦数据上训练模型
training_process = tff.learning.algorithms.build_weighted_fed_avg(
model_fn,
client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))
调用"初始化"计算来构造服务器状态。
train_state = training_process.initialize()
result = training_process.next(train_state, federated_train_data)
train_state = result.state
train_metrics = result.metrics
print('round 1, metrics={}'.format(train_metrics))
NUM_ROUNDS = 11
for round_num in range(2, NUM_ROUNDS):
result = training_process.next(train_state, federated_train_data)
train_state = result.state
train_metrics = result.metrics
print('round {:2d}, metrics={}'.format(round_num, train_metrics))
7 在TensorBoard中显示模型指标
接下来,使用Tensorboard将这些联邦计算的度量可视化。
logdir = "./logs/training/"
try:
tf.io.gfile.rmtree(logdir) # delete any previous results
except tf.errors.NotFoundError as e:
pass # Ignore if the directory didn't previously exist.
summary_writer = tf.summary.create_file_writer(logdir)
train_state = training_process.initialize()
!ls {logdir}
%tensorboard --logdir {logdir} --port=0