综述
将一个job中的各个component拆开来运行。只需要将上一步component 输出的name,namespace 作为下一步的输入,即可。
至于component 输出的name,namespace,可以通过cli 中的component_output_data_table 或者直接查db 获取。
在一些需要复用component 的情况下,比如一次求交结束后,使用不同的模型训练、预测等。可以有效的减少重复操作的耗时(io的和求交的耗时可是不小的)
准备upload 数据
- 生成up_guest.json 和up_host.json, 参考自 examples/dsl/v2/upload/upload_conf.json
- 执行上传命令
python /data/projects/fate/python/fate_flow/fate_flow_client.py -f upload -c up_host.json
python /data/projects/fate/python/fate_flow/fate_flow_client.py -f upload -c up_guest.json
执行dataio
- 生成data_io_dsl.json,参考examples/dsl/v2/hetero_logistic_regression/hetero_lr_normal_dsl.json 改动。 这里只需要reader 和 dataio 两个组件。
- 生成data_io_conf.json,参考examples/dsl/v2/hetero_logistic_regression/hetero_lr_normal_conf.json 改动。 hetero_lr_0 等无需删除,dsl中没有配置,这里冗余不会执行。 name 和 namesapce 注意和upload 中保持一致。
- 执行submit_job
python /data/projects/fate/python/fate_flow/fate_flow_client.py -f submit_job -d data_io_dsl.json -c data_io_conf.json
submit 成功,jobid 为:20210910183852525373395,这个id记录下。
- 查询name,namespace
mysql> select f_table_name,f_table_namespace,f_role from t_tracking_output_data_info_20210910 where f_job_id ='20210910183852525373395';
+----------------------------------+------------------------------------------------+--------+
| f_table_name | f_table_namespace | f_role |
+----------------------------------+------------------------------------------------+--------+
| 71622c82122311ec8dcf36ab29097321 | output_data_20210910183852525373395_dataio_0_0 | guest |
| 71563620122311ec8b9a36ab29097321 | output_data_20210910183852525373395_dataio_0_0 | host |
| 4d509f54122311ec8fbf36ab29097321 | output_data_20210910183852525373395_reader_0_0 | guest |
| 4d5b55fc122311ecbdf336ab29097321 | output_data_20210910183852525373395_reader_0_0 | host |
+----------------------------------+------------------------------------------------+--------+
记录下相关值,在inter中使用
执行 intersection
- 生成inter_dsl.json, 参考examples/dsl/v2/hetero_logistic_regression/hetero_lr_normal_dsl.json 改动。 这里只需要reader 和 intersection 两个组件。
- 生成inter_conf.json,参考examples/dsl/v2/hetero_logistic_regression/hetero_lr_normal_conf.json 改动。 hetero_lr_0 等无需删除,dsl中没有配置,这里冗余不会执行。 注意,这里reader.table的name 和 namesapce 是上述db中查到的值。
- 执行命令
python /data/projects/fate/python/fate_flow/fate_flow_client.py -f submit_job -d inter_dsl.json -c inter_conf.json
- 求交成功日志
(app-root) bash-4.2# cat /data/projects/fate/logs/20210910184753907046398/guest/9989/intersection_0/stat.log |grep count
[INFO] [2021-09-10 18:49:00,960] [377507:140147011696448] - intersect_model.py[line:168]: intersect_ids count:569
- 查看db结果
mysql> select f_table_name,f_table_namespace,f_role from t_tracking_output_data_info_20210910 where f_job_id ='20210910184753907046398';
+----------------------------------+------------------------------------------------------+--------+
| f_table_name | f_table_namespace | f_role |
+----------------------------------+------------------------------------------------------+--------+
| b53042ae122411ec93e436ab29097321 | output_data_20210910184753907046398_intersection_0_0 | guest |
| b51d8f2e122411ec837c36ab29097321 | output_data_20210910184753907046398_intersection_0_0 | host |
| 8fe0f0c0122411ec98e636ab29097321 | output_data_20210910184753907046398_reader_0_0 | guest |
| 8fee4734122411ec9a2b36ab29097321 | output_data_20210910184753907046398_reader_0_0 | host |
+----------------------------------+------------------------------------------------------+--------+
后面如果接hetero_lr_0 等组件,相同操作,将reader.table 调整下即可。
附录,相关配置文件
- up_guest.json
{
"file": "/data/projects/fate/examples/data/breast_hetero_guest.csv",
"head": 1,
"partition": 1,
"work_mode": 1,
"table_name": "hetero_guest",
"namespace": "cl"
}
- up_host.json
{
"file": "/data/projects/fate/examples/data/breast_hetero_host.csv",
"head": 1,
"partition": 1,
"work_mode": 1,
"table_name": "hetero_host",
"namespace": "cl"
}
- data_io_dsl.json
{
"components": {
"reader_0": {
"module": "Reader",
"output": {
"data": [
"data"
]
}
},
"dataio_0": {
"module": "DataIO",
"input": {
"data": {
"data": [
"reader_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
}
}
}
- data_io_conf.json
{
"dsl_version": 2,
"initiator": {
"role": "guest",
"party_id": 9989
},
"role": {
"guest": [
9989
],
"host": [
9989
],
"arbiter": [
9989
]
},
"job_parameters": {
"common": {
"job_type": "train",
"backend": 0,
"work_mode": 1
}
},
"component_parameters": {
"common": {
"dataio_0": {
"output_format": "dense"
},
"hetero_lr_0": {
"penalty": "L2",
"tol": 0.0001,
"alpha": 0.01,
"optimizer": "rmsprop",
"batch_size": -1,
"learning_rate": 0.15,
"init_param": {
"init_method": "zeros"
},
"max_iter": 30,
"early_stop": "diff",
"cv_param": {
"n_splits": 5,
"shuffle": false,
"random_seed": 103,
"need_cv": false
},
"sqn_param": {
"update_interval_L": 3,
"memory_M": 5,
"sample_size": 5000,
"random_seed": null
}
},
"evaluation_0": {
"eval_type": "binary"
}
},
"role": {
"host": {
"0": {
"dataio_0": {
"with_label": false
},
"reader_0": {
"table": {
"name": "hetero_guest",
"namespace": "cl"
}
}
}
},
"guest": {
"0": {
"dataio_0": {
"with_label": true
},
"reader_0": {
"table": {
"name": "hetero_guest",
"namespace": "cl"
}
}
}
}
}
}
}
- inter_dsl.json
{
"components": {
"reader_0": {
"module": "Reader",
"output": {
"data": [
"data"
]
}
},
"intersection_0": {
"module": "Intersection",
"input": {
"data": {
"data": [
"reader_0.data"
]
}
},
"output": {
"data": [
"data"
]
}
}
}
}
- inter_conf.json
{
"dsl_version": 2,
"initiator": {
"role": "guest",
"party_id": 9989
},
"role": {
"guest": [
9989
],
"host": [
9989
],
"arbiter": [
9989
]
},
"job_parameters": {
"common": {
"job_type": "train",
"backend": 0,
"work_mode": 1
}
},
"component_parameters": {
"common": {
"dataio_0": {
"output_format": "dense"
},
"hetero_lr_0": {
"penalty": "L2",
"tol": 0.0001,
"alpha": 0.01,
"optimizer": "rmsprop",
"batch_size": -1,
"learning_rate": 0.15,
"init_param": {
"init_method": "zeros"
},
"max_iter": 30,
"early_stop": "diff",
"cv_param": {
"n_splits": 5,
"shuffle": false,
"random_seed": 103,
"need_cv": false
},
"sqn_param": {
"update_interval_L": 3,
"memory_M": 5,
"sample_size": 5000,
"random_seed": null
}
},
"evaluation_0": {
"eval_type": "binary"
}
},
"role": {
"host": {
"0": {
"dataio_0": {
"with_label": false
},
"reader_0": {
"table": {
"name": "71563620122311ec8b9a36ab29097321",
"namespace": "output_data_20210910183852525373395_dataio_0_0"
}
}
}
},
"guest": {
"0": {
"dataio_0": {
"with_label": true
},
"reader_0": {
"table": {
"name": "71622c82122311ec8dcf36ab29097321",
"namespace": "output_data_20210910183852525373395_dataio_0_0"
}
}
}
}
}
}
}