只针对windows平台, linux平台比较容易,就不写了.
一、.单机 安装zookeeper的三个服务.目录层次如下:
e:|
|zks
| |server1
| |data
| |zookeeper
| |conf
| |bin
| |lib
| |server2
| data
| |zookeeper
| |conf
| |bin
| |lib
| |server3
| data
| |zookeeper
| |conf
| |bin
| |lib
|
| |run.bat
其中run.bat文件的内容为:
cd server1\zookeeper\bin
start zkServer.cmd
cd ..\..\..
cd server2\zookeeper\bin
start zkServer.cmd
cd ..\..\..
cd server3\zookeeper\bin
start zkServer.cmd
二、打开zookeeper\src\c\下的工程文件, 编译其, 可得zookeeper的c客户端(dll)
三、编写进程监控类, 内容如下:
ZKOpeator.h
#ifndef _COGLINKZKOPERATORH_
#define _COGLINKZKOPERATORH_
#include <string>
#include <zookeeper.h>
#include <boost/thread/recursive_mutex.hpp>
#include <boost/thread.hpp>
#include <boost/thread/condition_variable.hpp>
typedef void (* DoCore)();
namespace coglink{ namespace base{
class ZKOperator{
public:
static ZKOperator *Instance();
bool Init(const std::string &hostport, int timeouts);
// 以下为监控相关
void StartZKThread(const std::string &ppath, const std::string &vvalue, DoCore doCore);
void StartIfNecessary();
bool IsNeedStartAnCopy();
// 以下为配置相关
std::string GetValue(const std::string &ppath);
private:
ZKOperator();
void MainCore();
void WaitChange();
private:
DoCore doCore_;
std::string ppath_;
std::string value_;
boost::thread_group threadGroup_;
boost::condition_variable_any cond_;
boost::recursive_mutex mutex_;
zhandle_t *zh_;
bool waiting_;
static ZKOperator * staticP_;
bool startAnCopy_;
};
}}
#endif
ZKOperator.cpp
#include "ZKOperator.h"
namespace coglink{ namespace base{
ZKOperator *ZKOperator::staticP_ = 0;
void watcher(zhandle_t *zzh, int type, int state, const char *path, void* context)
{
if(type==4 && state==3){
ZKOperator::Instance()->StartIfNecessary();
}
}
ZKOperator::ZKOperator(){
waiting_ = false;
zh_ = 0;
startAnCopy_ = false;
}
ZKOperator *ZKOperator::Instance(){
if(!staticP_){
staticP_ = new ZKOperator();
}
return staticP_;
}
bool ZKOperator::IsNeedStartAnCopy(){
return startAnCopy_;
}
void ZKOperator::WaitChange(){
int pos = (int)ppath_.rfind("/");
if(pos < 0){
return;
}
std::string rootPath = ppath_.substr(0, pos);
String_vector vecs;
zoo_get_children(zh_, rootPath.c_str(), 1, &vecs);
}
void ZKOperator::MainCore(){
doCore_();
}
void ZKOperator::StartIfNecessary(){
int re = zoo_exists(zh_, ppath_.c_str(), 0, 0);
while(1){
if(re == ZNONODE){
re = zoo_create(zh_, ppath_.c_str(), value_.c_str(), value_.length(), &ZOO_OPEN_ACL_UNSAFE, ZOO_EPHEMERAL, 0, 0);
if(re != ZOK){
printf("create node error!=====\r\n");
break;
}
startAnCopy_ = true;
boost::thread * th = new boost::thread(boost::bind(&ZKOperator::MainCore, this));
threadGroup_.add_thread(th);
}
break;
}
WaitChange();
if(threadGroup_.size() == 0){
if(waiting_){
return;
}
boost::recursive_mutex::scoped_lock lock(mutex_);
waiting_ = true;
cond_.wait(lock);
}
}
bool ZKOperator::Init(const std::string &hostport, int timeouts){
zh_ = zookeeper_init(hostport.c_str(), watcher, timeouts, 0, 0, 0);
if (!zh_) {
printf("init zk error!\r\n");
return false;
}
return true;
}
void ZKOperator::StartZKThread(const std::string &ppath, const std::string &vvalue, DoCore doCore){
ppath_ = ppath;
value_ = vvalue;
doCore_ = doCore;
StartIfNecessary();
threadGroup_.join_all();
}
std::string ZKOperator::GetValue(const std::string &ppath){
std::string tmppath = "/zookeeper/" + ppath;
int oldpos = -1;
int pos = 0;
while(1){
pos = (int)tmppath.find(".", oldpos + 1);
if(pos < 0){
break;
}
tmppath.replace(pos, 1, "/");
}
std::string re;
int len = 255;
char *buf = new char[len];
int ne = zoo_get(zh_, tmppath.c_str(), 0, buf, &len, 0);
if(ne != ZOK){
delete []buf;
return re;
}
re = std::string(buf, len);
delete []buf;
return re;
}
}}
四、调用处
_logicNo = std::string(argv[1]);
std::string ppath = "/zookeeper/logicpoint/";
ppath += _logicNo;
coglink::base::ZKOperator::Instance()->StartZKThread(ppath, "", MainCore);
在MainCore中会启动主要业务代码, 同时会新启动一个进程(自己的名字), 如下:
if(coglink::base::ZKOperator::Instance()->IsNeedStartAnCopy()){
ShellExecute(NULL,"open", coglink::base::CogPath::GetAppWholePath().c_str(),_logicNo.c_str(),NULL,SW_MINIMIZE);
}
注意上方的ShellExecute, 我原来用的是system函数(之后据我测试可知此函数是启动一个子进程).
用system函数的结果是: 主进程退出后子进程代替其开始主要业务处理, 而由于主进程是意外退出的, 操作系统只知其还有子进程没退出, 因而就不会释放一些被占用的端口,
导致子进程再次listen时失败.