目前查找到是资料只有1.6版本的FATE训练mnist数据集,流程如下:
- 使用的数据集是AI入门的手写数字识别数据集MNIST,从kaggle上下载csv格式的数据集,拷贝到虚拟机中,由于fate训练需要id,故先将数据预处理,并将训练数据切分。
import pandas as pd train = pd.read_csv("mnist_train.csv") test = pd.read_csv("mnist_test.csv") # 为训练数据增加id字段 train['idx'] = range(train.shape[0]) idx = train['idx'] train.drop(labels=['idx'], axis=1, inplace=True) train.insert(0, 'idx', idx) # 修改数据集的label字段为y train = train.rename(columns={"label":"y"}) y = train["y"] train.drop(labels=["y"], axis=1, inplace=True) train.insert(train.shape[1], "y", y) train = train.sample(frac=1) # 打乱数据集 # 切分训练集 train_1 = train.iloc[:30000] train_2 = train.iloc[30000:] train_1.to_csv("mnist_1_train.csv", index=False, header=True) train_2.to_csv("mnist_2_train.csv", index=False, header=True) # 为测试数据集做相同处理 test['idx'] = range(test.shape[0]) idx_test = test['idx'] test.drop(labels=['idx'], axis=1, inplace=True) test.insert(0, 'idx', idx) test = test.rename(columns={"label":"y"}) y_test = test["y"] test.drop(labels=["y"], axis=1, inplace=True) test.insert(test.shape[1], "y", y_test) test.to_csv("mnist_test.csv", index=False, header=True)
-
为提交数据配置upload的conf
{ "file": "/data/projects/fate/examples/data/mnist_train_3w_b.csv", "head": 1, "partition": 8, "work_mode": 0, "table_name": "homo_guest_mnist_train", "namespace": "experiment" }
{ "file": "/data/projects/fate/examples/data/mnist_train_3w_a.csv", "head": 1, "partition": 8, "work_mode": 0, "table_name": "homo_host_mnist_train", "namespace": "experiment" }
修改guest、host的table name和namespace
-
使用Keras,采用自定义模型的方法,自定义多层卷积神经网络来训练模型。
import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers # 定义model model_nn = tf.keras.Sequential() model_nn.add(layers.Dense(512, activation='relu', input_shape=(784,))) model_nn.add(layers.Dense(256, activation='relu')) model_nn.add(layers.Dense(10, activation='softmax')) print(model_nn.to_json()) # 打印模型
-
将输出的json串放到对应的conf的nn_define项处,并修改host和guest处的表名和命名空间,与上传时对应
{ "dsl_version": 2, "initiator": { "role": "guest", "party_id": 9999 }, "role": { "guest": [ 9999 ], "host": [ 10000 ], "arbiter": [ 10000 ] }, "job_parameters": { "common": { "job_type": "train" } }, "component_parameters": { "role": { "guest": { "0": { "data_transform_0": { "with_label": true, "output_format": "dense" }, "reader_0": { "table": { "name": "homo_guest_mnist_train", "namespace": "experiment" } } } }, "host": { "0": { "data_transform_0": { "with_label": true, "output_format": "dense" }, "reader_0": { "table": { "name": "homo_host_mnist_train", "namespace": "experiment" } } } } }, "common": { "nn_0": { "encode_label": true , "loss": "categorical_crossentropy", "max_iter": 20, "batch_size": -1, "early_stop": { "early_stop": "diff", "eps": 0.0001 }, "optimizer": { "lr": 0.01, "betas": [ 0.9, 0.999 ], "eps": 1e-08, "weight_decay": 0, "amsgrad": false, "optimizer": "Adam", }, "trainer": { "trainer_name": "fedavg_trainer", "param": { "epochs": 20, "batch_size": 128, "validation_freqs": 1, "aggregate_every_n_epoch": 5 } }, "torch_seed": 100, "metrics": ["accuracy", "AUC"], "config_type": "keras", "nn_define": {"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"module": "keras.layers", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 784], "dtype": "float32", "sparse": false, "ragged": false, "name": "reshape_input"}, "registered_name": null}, {"module": "keras.layers", "class_name": "Reshape", "config": {"name": "reshape", "trainable": true, "dtype": "float32", "batch_input_shape": [null, 784], "target_shape": [28, 28, 1]}, "registered_name": null, "build_config": {"input_shape": [null, 784]}}, {"module": "keras.layers", "class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "dtype": "float32", "batch_input_shape": [null, 28, 28, 1], "filters": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"module": "keras.initializers", "class_name": "GlorotUniform", "config": {"seed": null}, "registered_name": null}, "bias_initializer": {"module": "keras.initializers", "class_name": "Zeros", "config": {}, "registered_name": null}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "registered_name": null, "build_config": {"input_shape": [null, 28, 28, 1]}}, {"module": "keras.layers", "class_name": "MaxPooling2D", "config": {"name": "max_pooling2d", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}, "registered_name": null, "build_config": {"input_shape": [null, 26, 26, 32]}}, {"module": "keras.layers", "class_name": "Conv2D", "config": {"name": "conv2d_1", "trainable": true, "dtype": "float32", "filters": 64, "kernel_size": [3, 3], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"module": "keras.initializers", "class_name": "GlorotUniform", "config": {"seed": null}, "registered_name": null}, "bias_initializer": {"module": "keras.initializers", "class_name": "Zeros", "config": {}, "registered_name": null}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "registered_name": null, "build_config": {"input_shape": [null, 13, 13, 32]}}, {"module": "keras.layers", "class_name": "MaxPooling2D", "config": {"name": "max_pooling2d_1", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}, "registered_name": null, "build_config": {"input_shape": [null, 11, 11, 64]}}, {"module": "keras.layers", "class_name": "Conv2D", "config": {"name": "conv2d_2", "trainable": true, "dtype": "float32", "filters": 64, "kernel_size": [3, 3], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"module": "keras.initializers", "class_name": "GlorotUniform", "config": {"seed": null}, "registered_name": null}, "bias_initializer": {"module": "keras.initializers", "class_name": "Zeros", "config": {}, "registered_name": null}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "registered_name": null, "build_config": {"input_shape": [null, 5, 5, 64]}}, {"module": "keras.layers", "class_name": "Flatten", "config": {"name": "flatten", "trainable": true, "dtype": "float32", "data_format": "channels_last"}, "registered_name": null, "build_config": {"input_shape": [null, 3, 3, 64]}}, {"module": "keras.layers", "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"module": "keras.initializers", "class_name": "GlorotUniform", "config": {"seed": null}, "registered_name": null}, "bias_initializer": {"module": "keras.initializers", "class_name": "Zeros", "config": {}, "registered_name": null}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "registered_name": null, "build_config": {"input_shape": [null, 576]}}, {"module": "keras.layers", "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 10, "activation": "softmax", "use_bias": true, "kernel_initializer": {"module": "keras.initializers", "class_name": "GlorotUniform", "config": {"seed": null}, "registered_name": null}, "bias_initializer": {"module": "keras.initializers", "class_name": "Zeros", "config": {}, "registered_name": null}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "registered_name": null, "build_config": {"input_shape": [null, 64]}}]}, "keras_version": "2.13.1", "backend": "tensorflow"} } } } }
-
提交任务即可
flow job submit -c homo_nn_mnist_aggregate_n_epoch_conf.json -d homo_nn_mnist_aggregate_n_epoch_dsl.json
但由于FATE跨版本差异较大,1.6版本的json文件在1.11版本会报错,目前仍在根据版本调配出现的问题,以及根据给出的样例写出自定义的pipeline用于2.1版本的训练