FATE —— 二.4.4 联邦任务,guest 使用图像数据,host使用文本数据

在本任务中,我们将向您展示如何在Hetero NN下构建联合任务,其中参与方使用不同的结构化数据:来宾方具有图像数据和标签,宿主方具有文本,它们一起完成二进制分类任务。教程数据集由flickr 8k构建,标签0和1指示图像是在荒野中还是在城市中。您可以从这里下载处理过的数据集,并将其放在examples/data下。完整的数据集可以从这里下载。(请注意,原始数据集与本示例中的数据不同,出于演示目的,此数据集用完整数据集的一小部分进行了注释。)

获取示例数据集:

请从以下位置下载数据集:

并将其放在/examples/data文件夹下(或者自己存放文件的地址,代码中需要用到)。

此数据集的来源是flickr-8k数据集,来自:

from pipeline.component.nn import save_to_fate

Guest Bottom Model

%%save_to_fate model guest_bottom_image.py
from torch import nn
import torch as t
from torch.nn import functional as F

class ImgBottomNet(nn.Module):
    def __init__(self):
        super(ImgBottomNet, self).__init__()
        self.seq = t.nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5),
            nn.MaxPool2d(kernel_size=3),
            nn.Conv2d(in_channels=6, out_channels=6, kernel_size=3),
            nn.AvgPool2d(kernel_size=5)
        )
        
        self.fc = t.nn.Sequential(
            nn.Linear(1176, 32),
            nn.ReLU(),
            nn.Linear(32, 8)
        )

    def forward(self, x):
        x = self.seq(x)
        x = x.flatten(start_dim=1)
        x = self.fc(x)
        return x

Guest Top Model

%%save_to_fate model guest_top_image.py
from torch import nn
import torch as t
from torch.nn import functional as F

class ImgTopNet(nn.Module):
    def __init__(self):
        super(ImgTopNet, self).__init__()
        
        self.fc = t.nn.Sequential(
            nn.Linear(4, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.fc(x)
        return x.flatten()

Host Bottom Model

%%save_to_fate model host_bottom_lstm.py
from torch import nn
import torch as t
from torch.nn import functional as F

class LSTMBottom(nn.Module):
    
    def __init__(self, vocab_size):
        super(LSTMBottom, self).__init__()
        self.word_embed = nn.Embedding(num_embeddings=vocab_size, embedding_dim=16, padding_idx=0)
        self.lstm = t.nn.Sequential(
            nn.LSTM(input_size=16, hidden_size=16, num_layers=2, batch_first=True)
        )
        self.act = nn.ReLU()
        self.linear = nn.Linear(16, 8)

    def forward(self, x):
        embeddings = self.word_embed(x)
        lstm_fw, _ = self.lstm(embeddings)
        
        return self.act(self.linear(lstm_fw.sum(dim=1)))    

本地测试数据集和底部模型

from federatedml.nn.dataset.image import ImageDataset
from federatedml.nn.dataset.nlp_tokenizer import TokenizerDataset
# flicke image
img_ds = ImageDataset(center_crop=True, center_crop_shape=(224, 224), return_label=True) # return label = True
img_ds.load('/mnt/hgfs/examples/data/flicker_toy_data/flicker/images/')  # 根据自己的文件位置进行调整
# text
txt_ds = TokenizerDataset(return_label=False) 
txt_ds.load('/mnt/hgfs/examples/data/flicker_toy_data/flicker_toy_data/text.csv')  # 根据自己的文件位置进行调整
查看数据
print(len(img_ds))
print(img_ds[0])
print(img_ds.get_classes())
print(img_ds.get_sample_ids()[0: 10])
print(len(txt_ds))
print(txt_ds[0]) # word idx
print(txt_ds.get_vocab_size()) # vocab size
img_bottom = ImgBottomNet()
lstm_bottom = LSTMBottom(vocab_size=txt_ds.get_vocab_size())
lstm_bottom(t.vstack([txt_ds[0], txt_ds[1]]))  # test forward

tensor([[0.0000, 0.0000, 0.0000, 0.5858, 0.0000, 0.0000, 0.5437, 0.0000],

[0.0000, 0.0000, 0.0000, 0.0511, 0.0000, 0.0000, 0.6965, 0.0000]],

grad_fn=<ReluBackward0>)

img_bottom(t.vstack([img_ds[0][0].unsqueeze(dim=0), img_ds[1][0].unsqueeze(dim=0)])) 

tensor([[-0.0009, -0.2242, 0.0642, 0.1377, -0.0960, -0.2041, -0.0653, -0.0182],

[ 0.0104, -0.2324, 0.0810, 0.1305, -0.1085, -0.1857, -0.0652, -0.0390]],

grad_fn=<AddmmBackward0>)

Pipeline

import os
import torch as t
from torch import nn
from pipeline import fate_torch_hook
from pipeline.component import HeteroNN
from pipeline.component.hetero_nn import DatasetParam
from pipeline.backend.pipeline import PipeLine
from pipeline.component import Reader, Evaluation, DataTransform
from pipeline.interface import Data, Model
from pipeline.component.nn import save_to_fate

fate_torch_hook(t)

# fate_project_path = os.path.abspath('../../../../')
guest = 10000
host = 9999

pipeline_mix = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host)

guest_data = {"name": "flicker_guest", "namespace": "experiment"}
host_data = {"name": "flicker_host", "namespace": "experiment"}

# 根据自己的文件位置进行调整
guest_data_path = '/mnt/hgfs/examples/data/flicker_toy_data/flicker/images'
host_data_path = '/mnt/hgfs/examples/data/flicker_toy_data/flicker_toy_data/text.csv'

pipeline_mix.bind_table(name='flicker_guest', namespace='experiment', path=guest_data_path)
pipeline_mix.bind_table(name='flicker_host', namespace='experiment', path=host_data_path)

{'namespace': 'experiment', 'table_name': 'flicker_host'}

reader_0 = Reader(name="reader_0")
reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_data)
reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_data)
hetero_nn_0 = HeteroNN(name="hetero_nn_0", epochs=5,
                       interactive_layer_lr=0.001, batch_size=64, validation_freqs=1, task_type='classification')
guest_nn_0 = hetero_nn_0.get_party_instance(role='guest', party_id=guest)
host_nn_0 = hetero_nn_0.get_party_instance(role='host', party_id=host)
guest_bottom = t.nn.Sequential(
    nn.CustModel(module_name='guest_bottom_image', class_name='ImgBottomNet')
)

guest_top = t.nn.Sequential(
    nn.CustModel(module_name='guest_top_image', class_name='ImgTopNet')
)
# bottom model
host_bottom = nn.CustModel(module_name='host_bottom_lstm', class_name='LSTMBottom', vocab_size=txt_ds.get_vocab_size())

interactive_layer = t.nn.InteractiveLayer(out_dim=4, guest_dim=8, host_dim=8, host_num=1)
guest_nn_0.add_top_model(guest_top)
guest_nn_0.add_bottom_model(guest_bottom)
host_nn_0.add_bottom_model(host_bottom)
optimizer = t.optim.Adam(lr=0.001)
loss = t.nn.BCELoss()

hetero_nn_0.set_interactive_layer(interactive_layer)
hetero_nn_0.compile(optimizer=optimizer, loss=loss)
# 添加dataset
guest_nn_0.add_dataset(DatasetParam(dataset_name='image', return_label=True, center_crop=True, center_crop_shape=(224, 224), label_dtype='float'))
host_nn_0.add_dataset(DatasetParam(dataset_name='nlp_tokenizer', return_label=False))
pipeline_mix.add_component(reader_0)
pipeline_mix.add_component(hetero_nn_0, data=Data(train_data=reader_0.output.data))
pipeline_mix.compile()
pipeline_mix.fit()

写入并保存

df = pipeline_mix.get_component('hetero_nn_0').get_output_data()  # get result
df
import pandas as pd
df.to_csv('联邦任务,guest 使用图像数据,host使用文本数据.csv')
pipeline_mix.get_component('hetero_nn_0').get_summary()

{'best_iteration': -1,

'history_loss': [0.6944547406462735,

0.6887847345928814,

0.6825250193130138,

0.6736582733864008,

0.6652656313984893],

'is_converged': False,

'validation_metrics': {'train': {'auc': [0.7946521287642783,

0.8087573554863274,

0.8114399446175147,

0.8181896850121149,

0.8222568362755278],

'ks': [0.47836621668397367,

0.45006922810661126,

0.469539633091035,

0.47836621668397367,

0.4774143302180685]}}}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
FATE使用Pipeline运行预测任务可以分为以下几个步骤: 1. 定义pipeline:定义一个Pipeline对象,用于组织各个组件的执行顺序和参数配置。 2. 加载数据使用DataLoader将需要预测的数据加载到内存中。 3. 加载模型:使用ModelLoader将训练好的模型加载到内存中。 4. 预测数据使用PredictComponent进行预测操作,将加载的数据和模型作为参数传入进行预测。 5. 输出结果:将预测结果输出到指定的文件或者数据库中。 下面是一个简单的代码示例: ```python from fate_arch.pipeline import Pipeline from fate_arch.data import DataLoader from fate_arch.model import ModelLoader from fate_arch.predict import PredictComponent # 定义pipeline pipeline = Pipeline(order=["load_data", "load_model", "predict", "output"]) # 加载数据 data_loader = DataLoader() data_loader.load(path="data") # 加载模型 model_loader = ModelLoader() model_loader.load(path="model") # 预测数据 predict_component = PredictComponent() predict_component.set_flowid("predict_task") predict_component.set_taskid("predict_task") predict_component.set_tracker("local") predict_component.set_predict_dsl(predict_dsl=None) predict_component.set_input_data(data_loader) predict_component.set_predict_output_data_description({"name": "predict_output"}) predict_component.set_model(model_loader) predict_component.run() # 输出结果 predict_output = predict_component.get_output_data().get("predict_output") predict_output.save_as_file("result") ``` 在这个示例中,我们首先定义了一个Pipeline对象,然后使用DataLoader将需要预测的数据加载到内存中,使用ModelLoader将训练好的模型加载到内存中。接着,我们使用PredictComponent进行预测操作,将加载的数据和模型作为参数传入进行预测。最后,将预测结果保存到文件中。 需要注意的是,以上示例仅为演示Pipeline运行预测任务的基本流程,具体的实现方式还需根据具体业务需要进行调整。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值