案例1 —— pipeline-deploy-demo
from pipeline.backend.pipeline import PipeLine
from pipeline.component import DataTransform
from pipeline.component import Evaluation
from pipeline.component import HeteroLR
from pipeline.component import Intersection
from pipeline.component import Reader
from pipeline.interface import Data
def main():
# 参与方配置
guest = 9999
host = 10000
arbiter = 10000
# 在数据库中指定输入数据名称和命名空间
guest_train_data = {"name": "breast_hetero_guest", "namespace": "experiment"}
host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"}
guest_eval_data = {"name": "breast_hetero_guest", "namespace": "experiment"}
host_eval_data = {"name": "breast_hetero_host", "namespace": "experiment"}
# 初始化pipeline
pipeline = PipeLine()
# 设置作业启动器
pipeline.set_initiator(role="guest", party_id=guest)
# 设置参与者信息
pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)
# 定义要读入数据的读取器组件
reader_0 = Reader(name="reader_0")
# 为guest配置读取器
reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data)
# 为host配置读取器
reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data)
# 定义DataTransform组件
data_transform_0 = DataTransform(name="data_transform_0")
# 获取guest的DataTransform参与方实例
data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role="guest", party_id=guest)
# 为guest配置DataTransform
data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense")
# 获取并配置host的DataTransform方实例
data_transform_0.get_party_instance(role="host", party_id=host).component_param(with_label=False)
# 定义Intersection组件
intersection_0 = Intersection(name="intersection_0")
# 定义HeteroLR组件
hetero_lr_0 = HeteroLR(name="hetero_lr_0",
early_stop="diff",
learning_rate=0.15,
optimizer="rmsprop",
max_iter=10,
callback_param={"callbacks": ["ModelCheckpoint"]})
# 按任务执行顺序向pipeline添加组件
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
# 设置intersection构件的数据输入源
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
# #设置hetero_lr_0组件的序列数据
pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))
# 一旦添加完模块,编译pipeline,这一步将形成用于运行作业的conf和dsl文件
pipeline.compile()
# 拟合模型
pipeline.fit()
# 查询组件摘要
import json
print(json.dumps(pipeline.get_component("hetero_lr_0").get_summary(), indent=4))
# 预测
# deploy required components
pipeline.deploy_component([data_transform_0, intersection_0, hetero_lr_0])
# initiate predict pipeline
predict_pipeline = PipeLine()
# define new data reader
reader_1 = Reader(name="reader_1")
reader_1.get_party_instance(role="guest", party_id=guest).component_param(table=guest_eval_data)
reader_1.get_party_instance(role="host", party_id=host).component_param(table=host_eval_data)
# define evaluation component
evaluation_0 = Evaluation(name="evaluation_0")
evaluation_0.get_party_instance(role="guest", party_id=guest).component_param(need_run=True, eval_type="binary")
evaluation_0.get_party_instance(role="host", party_id=host).component_param(need_run=False)
# add data reader onto predict pipeline
predict_pipeline.add_component(reader_1)
# add selected components from train pipeline onto predict pipeline
# specify data source
predict_pipeline.add_component(
pipeline, data=Data(
predict_input={
pipeline.data_transform_0.input.data: reader_1.output.data}))
# add evaluation component to predict pipeline
predict_pipeline.add_component(evaluation_0, data=Data(data=pipeline.hetero_lr_0.output.data))
# run predict model
predict_pipeline.predict(components_checkpoint={"hetero_lr_0": {"step_index": 8}})
if __name__ == "__main__":
main()
案例2 —— pipeline-mini-demo
from pipeline.backend.pipeline import PipeLine
from pipeline.component import DataTransform
from pipeline.component import Evaluation
from pipeline.component import HeteroLR
from pipeline.component import Intersection
from pipeline.component import Reader
from pipeline.interface import Data
def main():
# parties config
guest = 9999
host = 10000
arbiter = 10000
# specify input data name & namespace in database
guest_train_data = {"name": "breast_hetero_guest", "namespace": "experiment"}
host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"}
guest_eval_data = {"name": "breast_hetero_guest", "namespace": "experiment"}
host_eval_data = {"name": "breast_hetero_host", "namespace": "experiment"}
# initialize pipeline
pipeline = PipeLine()
# set job initiator
pipeline.set_initiator(role="guest", party_id=guest)
# set participants information
pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)
# define Reader components to read in data
reader_0 = Reader(name="reader_0")
# configure Reader for guest
reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data)
# configure Reader for host
reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data)
# define DataTransform component
data_transform_0 = DataTransform(name="data_transform_0")
# get DataTransform party instance of guest
data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role="guest", party_id=guest)
# configure DataTransform for guest
data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense")
# get and configure DataTransform party instance of host
data_transform_0.get_party_instance(role="host", party_id=host).component_param(with_label=False)
# define Intersection components
intersection_0 = Intersection(name="intersection_0")
# define HeteroLR component
hetero_lr_0 = HeteroLR(name="hetero_lr_0",
early_stop="diff",
learning_rate=0.15,
optimizer="rmsprop",
max_iter=10)
# add components to pipeline, in order of task execution
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
# set data input sources of intersection components
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
# set train data of hetero_lr_0 component
pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))
# compile pipeline once finished adding modules, this step will form conf and dsl files for running job
pipeline.compile()
# fit model
pipeline.fit()
# query component summary
import json
print(json.dumps(pipeline.get_component("hetero_lr_0").get_summary(), indent=4))
# predict
# deploy required components
pipeline.deploy_component([data_transform_0, intersection_0, hetero_lr_0])
# initiate predict pipeline
predict_pipeline = PipeLine()
# define new data reader
reader_1 = Reader(name="reader_1")
reader_1.get_party_instance(role="guest", party_id=guest).component_param(table=guest_eval_data)
reader_1.get_party_instance(role="host", party_id=host).component_param(table=host_eval_data)
# define evaluation component
evaluation_0 = Evaluation(name="evaluation_0")
evaluation_0.get_party_instance(role="guest", party_id=guest).component_param(need_run=True, eval_type="binary")
evaluation_0.get_party_instance(role="host", party_id=host).component_param(need_run=False)
# add data reader onto predict pipeline
predict_pipeline.add_component(reader_1)
# add selected components from train pipeline onto predict pipeline
# specify data source
predict_pipeline.add_component(
pipeline, data=Data(
predict_input={
pipeline.data_transform_0.input.data: reader_1.output.data}))
# add evaluation component to predict pipeline
predict_pipeline.add_component(evaluation_0, data=Data(data=pipeline.hetero_lr_0.output.data))
# run predict model
predict_pipeline.predict()
if __name__ == "__main__":
main()
案例3 —— pipeline-quick-demo
import json
from pipeline.backend.pipeline import PipeLine
from pipeline.component import Reader, DataTransform, Intersection, HeteroSecureBoost, Evaluation
from pipeline.interface import Data
# table name & namespace in data storage
# data should be uploaded before running modeling task
guest_train_data = {"name": "breast_hetero_guest", "namespace": "experiment"}
host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"}
# initialize pipeline
# Party ids are indicators of parties involved in federated learning. For standalone mode,
# arbitrary integers can be used as party id.
pipeline = PipeLine().set_initiator(role="guest", party_id=9999).set_roles(guest=9999, host=10000)
# define components
# Reader is a component to obtain the uploaded data. This component are very likely to be needed.
reader_0 = Reader(name="reader_0")
# By the following way, you can set different parameters for different party.
reader_0.get_party_instance(role="guest", party_id=9999).component_param(table=guest_train_data)
reader_0.get_party_instance(role="host", party_id=10000).component_param(table=host_train_data)
# Data transform provided some preprocessing to the raw data, including extract label, convert data format,
# filling missing value and so on. You may refer to the algorithm list doc for more details.
data_transform_0 = DataTransform(name="data_transform_0", with_label=True)
data_transform_0.get_party_instance(role="host", party_id=10000).component_param(with_label=False)
# Perform PSI for hetero-scenario.
intersect_0 = Intersection(name="intersection_0")
# Define a hetero-secureboost component. The following parameters will be set for all parties involved.
hetero_secureboost_0 = HeteroSecureBoost(name="hetero_secureboost_0",
num_trees=5,
bin_num=16,
task_type="classification",
objective_param={"objective": "cross_entropy"},
encrypt_param={"method": "paillier"},
tree_param={"max_depth": 3})
# To show the evaluation result, an "Evaluation" component is needed.
evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
# add components to pipeline, in order of task execution
# The components are connected by indicating upstream data output as their input.
# Typically, a feature engineering component will indicate input data as "data" while
# the modeling component will use "train_data". Please check out carefully of the difference
# between hetero_secureboost_0 input and other components below.
# Here we are just showing a simple example, for more details of other components, please check
# out the examples in "example/pipeline/{component you are interested in}
pipeline.add_component(reader_0)\
.add_component(data_transform_0, data=Data(data=reader_0.output.data))\
.add_component(intersect_0, data=Data(data=data_transform_0.output.data))\
.add_component(hetero_secureboost_0, data=Data(train_data=intersect_0.output.data))\
.add_component(evaluation_0, data=Data(data=hetero_secureboost_0.output.data))
# compile & fit pipeline
pipeline.compile().fit()
# query component summary
print(f"Evaluation summary:\n{json.dumps(pipeline.get_component('evaluation_0').get_summary(), indent=4)}")
案例4 —— pipeline-upload
import os
import argparse
from pipeline.backend.pipeline import PipeLine
# path to data
# default fate installation path
DATA_BASE = "/data/projects/fate"
# site-package ver
# import site
# DATA_BASE = site.getsitepackages()[0]
def main(data_base=DATA_BASE):
# parties config
guest = 9999
# partition for data storage
partition = 4
# table name and namespace, used in FATE job configuration
dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"}
tag_data = {"name": "breast_hetero_host", "namespace": f"experiment"}
pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest)
# add upload data info
# path to csv file(s) to be uploaded, modify to upload designated data
# This is an example for standalone version. For cluster version, you will need to upload your data
# on each party respectively.
pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"),
table_name=dense_data["name"], # table name
namespace=dense_data["namespace"], # namespace
head=1, partition=partition) # data info
pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/breast_hetero_host.csv"),
table_name=tag_data["name"],
namespace=tag_data["namespace"],
head=1, partition=partition)
# upload data
pipeline_upload.upload(drop=1)
if __name__ == "__main__":
parser = argparse.ArgumentParser("PIPELINE DEMO")
parser.add_argument("--base", "-b", type=str,
help="data base, path to directory that contains examples/data")
args = parser.parse_args()
if args.base is not None:
main(args.base)
else:
main()