anatomy的目的,是为了把Parameter server从一个framework,改造为一个platform。
独立的PS server集群,包括PS scheduler(需要增加scheduler HA的实现,并且scheduler只管理和调度server,不再管理和调度worker)。
独立的PS worker集群,基于Spark实现。把PS-lite framework中的worker相关的代码,剥离出来,封装成Java class(通过JNI),供实现某种算法(比如FTRL)的Spark app使用。
下面代码的分析,目的是正确地剥离scheduler、server、worker的代码。
ps.h wraps class Postoffice.
base.h defines kScheduler, kServerGroup, kWorkerGroup using bitmask.
ps::IsScheduler()
Postoffice::Postoffice() (singleton)
Postoffice::is_scheduler()
ps::IsServer()
Postoffice::Postoffice() (singleton)
Postoffice::is_server()
ps::IsWorker()
Postoffice::Postoffice() (singleton)
Postoffice::is_worker()
ps::Start()
Postoffice::Postoffice() (singleton)
Postoffice::Start()
Postoffice::Postoffice()
Van::Create("zmq")
new ZMQVan() (NOOP)
some env vars
Postoffice::num_workers_
Postoffice::num_servers_
Postoffice::is_worker_
Postoffice::is_server
Postoffice::is_scheduler_
Postoffice::Start()
log
Postoffice::node_ids_ (role (bitmask) <--> node IDs)
ZMQVan::Start()
ZMQVan::context_ = zmq_ctx_new()
zmq_ctx_set()
// must call it before calling Send
// it initalizes all connections to other nodes. start the receiving threads, which keeps receiving messages
Van::Start()
Van::scheduler_
Van::is_scheduler_
Van::my_node_
Node::SCHEDULER
Van::my_node_ = Van::scheduler_
Node::SERVER or Node::WORKER
hostname (IP), port, role, id
Van::Bind() (pure virtual)
ZMQVan::Bind()
zmq_socket()
zmq_bind()
Van::Connect(Van::scheduler_) (pure virtual)
ZMQVan::Connect()
ZMQVan::senders_
zmq_socket()
zmq_setsockopt()
zmq_connect()
Van::receiver_thread_ = std::unique_ptr<std::thread>(new std::thread(&Van::Receiving, this))
ZMQVan::RecvMsg()
zmq_msg_init()
zmq_msg_recv()
zmq_msg_close()
zmq_msg_more()
Van::resender_
operations on all kinds of nodes of the scheduler, servers, workers (very important)
Connect()
Send()
// let the scheduler know myself
Van::Send()
ZMQVan::SendMsg()
Van::resender_
Van::resender_
Van::heartbeat_thread_ = std::unique_ptr<std::thread>(new std::thread(&Van::Heartbeat, this))
Postoffice::start_time_
Barrier()
Van::Send()
ZMQVan::SendMsg()
ZMQVan::SendMsg()
senders_.find() (for connected socket)
PackMeta()
zmq_msg_init_data()
zmq_msg_send()
zmq_msg_close()
// All nodes should call this function before existing.
ps::Finalize()
Barrier()
ZMQVan::Stop()
Van::Stop()
zmq_setsockopt()
zmq_close()
zmq_ctx_destroy()
exit_callback_()
SimpleApp::SimpleApp()
SimpleApp::SimpleApp() (default constructor)
obj_ = new Customer()
Postoffice::Postoffice() (singleton)
Postoffice::AddCustomer()
Postoffice::customers_
Customer::recv_thread_ = std::unique_ptr<std::thread>(new std::thread(&Customer::Receiving, this))
Customer::recv_queue_.WaitAndPop()
Customer::recv_handle_()
Customer::tracker_
SimpleApp::Process()
SimpleApp::request_handle_()
SimpleApp::response_handle_()
SimpleApp::Request()
Customer::NewRequest()
Customer::tracker_
Van::Send()
SimpleApp::Wait()
Customer::WaitRequest()
Customer::tracker_
SimpleApp::Response()
Van::Send()
KVServer::KVServer()
SimpleApp::SimpleApp() (default constructor)
obj_ = new Customer()
Postoffice::Postoffice() (singleton)
Postoffice::AddCustomer()
Postoffice::customers_
Customer::recv_thread_ = std::unique_ptr<std::thread>(new std::thread(&Customer::Receiving, this))
Customer::recv_queue_.WaitAndPop()
Customer::recv_handle_()
Customer::tracker_
KVServer<Val>::Process()
SimpleApp::Process()
KVServer::request_handle_()
KVWorker::KVWorker()
SimpleApp::SimpleApp() (default constructor)
KVWorker::slicer_
obj_ = new Customer()
Postoffice::Postoffice() (singleton)
Postoffice::AddCustomer()
Postoffice::customers_
Customer::recv_thread_ = std::unique_ptr<std::thread>(new std::thread(&Customer::Receiving, this))
Customer::recv_queue_.WaitAndPop()
Customer::recv_handle_()
Customer::tracker_
KVWorker<Val>::Process()
SimpleApp::Process()
KVWorker::recv_kvs_
Customer::NumResponse()
Customer::tracker_
KVWorker::RunCallback()
KVWorker::callbacks_
// Pushes a list of key-value pairs to all server nodes.
KVWorker::Push()
KVWorker::ZPush()
Customer::NewRequest()
KVWorker::AddCallback()
KVWorker::Send()
KVWorker::slicer_()
Customer::AddResponse()
Customer::tracker_
KVWorker::RunCallback()
Van::Send()
KVWorker::Wait()
Customer::WaitRequest()
KVWorker::Pull()
KVWorker::Pull_()
Customer::NewRequest()
KVWorker::AddCallback()
KVWorker::recv_kvs_
ps::FindRange()
all kinds of operations and checks on data
cb()
KVWorker::Send()