目录
说明
基于C语言开发的简易版supervisor,进程间的通信使用的是NNG通信库(nng-1.5.2),可根据自己需求自行优化开发。
- 支持supervisorctrl status、start、stop、restart等程序状态查看、启停控制
- 支持被管理程序异常退出后重启拉起(程序自己退出和10秒内退出3次的不会再次拉起)
- 被管理程序的标准输出和标准出错已重定向到/dev/null
- 暂不支持管理进程所产生的子进程
supervisor服务的启动方式与原supervisor基本相同
Usage:
supervisor [-c <cfg filename>] [-d]
Param:
-c 后面跟上配置文件路径, 如没有-c选项, 默认配置文件为[./supervisord.json]
-d 后台运行,作为守护进程
-h, --help 显示此说明
supervisorctrl程序控制程序与原supervisorctl也基本类似
Usage:
supervisorctrl <status|start|stop|restart> [all|proname]
supervisorctrl <shutdown>
Param:
status 查看程序状态, 后接程序名, 默认all
<start|stop|restart> <all|proname> 启动|停止|重启 后接all或者程序名
shutdown 关闭所有程序并停止supervisor服务(请务必使用该命令停止supervisor服务)
-h, --help 显示此说明
配置
服务的配置文件(supervisord.json)如下:
{
"log_path":"./log/", // 日志路径
"log_level":4, // 日志等级4-debug
"program":[
{
"programName":"smapleProname1", // 程序显示名称
"directory":"/smapleProDir1", // 程序运行路径
"command":"sampleProCmd1" // 程序执行命令
},
{
"programName":"smapleProname2",
"directory":"/smapleProDir2",
"command":"sampleProCmd2"
}
]
}
supervisor服务端代码
supervisor服务代码如下
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <time.h>
#include <sys/time.h>
#include <signal.h>
#include <errno.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <pthread.h>
#include "cJSON.h"
#include "nng/nng.h"
#include "nng/protocol/reqrep0/rep.h"
#include "nng/protocol/reqrep0/req.h"
#define NNGURL "ipc://.supervisor.key"
int isDaemon = 0; // 守护进程
#define LOGMAXLEN 10240 // 打印字符串的长度
#define LOGDEBUG __FILE__, __LINE__, 4
#define LOGINFO __FILE__, __LINE__, 3
#define LOGSYS __FILE__, __LINE__, 2
#define LOGWARN __FILE__, __LINE__, 1
#define LOGERROR __FILE__, __LINE__, 0
#define LOGNOLINE __FILE__, __LINE__, -1 // 不打印前缀
/*日志配置文件结构体*/
typedef struct
{
char log_path[100]; // 日志路径
char log_name[100]; // 日志名称
int logfile_num; // 日志文件个数
int logfile_size; // 单日志最大大小(MB)
int log_level; // 日志级别
int logfile_open; // 日志文件是否打开1-开
} logCfg;
logCfg logcfg_st = {"./", "supervisor", 1, 5, 4, 1}; // 日志配置
/*程序运行参数配置*/
typedef struct ProgramRunParm
{
char programName[64]; // 程序显示名称
char directory[256]; // 运行目录 绝对路径
char command[64]; // 程序
char absProgram[320]; // 带绝对路的程序
pid_t pid; // 进程pid
time_t beginTime; // 启动时间
int status; // 状态 0-停止 1-运行 2-FATAL Exited too quickly 3-paused 4-unkown
} ProRunParm;
ProRunParm *ProRunParm_ptr; // 程序运行参数配置
int ProRunParmSize = 0;
/* 记录程序运行失败次数 以及第一次运行时间 */
typedef struct ProgramRunFailTime
{
time_t ftime; // 第一次启动时间
int times; // 启动次数
} ProFailTime;
ProFailTime *ProFailTime_ptr;
#define PRORUNFAILTIMES 3 // 程序在一定时间范围内失败次数
#define PRORUNFAILTIMERANGE 10 // 程序可以失败重启的时间范围(s)
#define RUNNING 1
#define STOPPED 0
#define FATAL 2
#define PAUSED 3
#define UNKOWN 4
// 日志
void printLog(const char *filename, int line, int lv, const char *arg, ...)
{
if (lv > logcfg_st.log_level)
{
return;
}
char lv_str[8];
memset(lv_str, 0, sizeof(lv_str));
switch (lv)
{
case 0:
strcpy(lv_str, "ERROR");
break;
case 1:
strcpy(lv_str, "WARN");
break;
case 2:
strcpy(lv_str, "SYSTEM");
break;
case 3:
strcpy(lv_str, "INFO");
break;
case 4:
strcpy(lv_str, "DEBUG");
break;
default:
break;
}
FILE *fp = NULL;
if (logcfg_st.logfile_open)
{
char logfile[256];
memset(logfile, 0, sizeof(logfile));
sprintf(logfile, "%s/%s.log", logcfg_st.log_path, logcfg_st.log_name);
if (access(logcfg_st.log_path, F_OK) == -1)
{
fprintf(stderr, "path [%s] not exist!\n", logcfg_st.log_path);
return;
}
if (access(logfile, F_OK) == 0) // 日志文件存在
{
// 获取文件大小
struct stat statbuf;
int rc = stat(logfile, &statbuf);
if (rc < 0)
{
fprintf(stderr, "sys error : %m\n");
return;
}
// 判断文件大小,替换文件
if (statbuf.st_size >= logcfg_st.logfile_size * 1024 * 1024) // 超过大小
{
char logbakname[256];
for (int i = logcfg_st.logfile_num; i > 0; i--)
{
memset(logbakname, 0, sizeof(logbakname));
sprintf(logbakname, "%s/%s%d.log", logcfg_st.log_path, logcfg_st.log_name, i);
int isexist = access(logbakname, F_OK);
if (isexist == 0 && i == logcfg_st.logfile_num) // 文件存在,且是最后一个文件,删除
{
unlink(logbakname);
}
else if (isexist == 0) // 文件存在,不是最后一个,重命名
{
char logrename[256];
memset(logrename, 0, sizeof(logrename));
sprintf(logrename, "%s/%s%d.log", logcfg_st.log_path, logcfg_st.log_name, i + 1);
rename(logbakname, logrename);
}
}
memset(logbakname, 0, sizeof(logbakname));
sprintf(logbakname, "%s/%s1.log", logcfg_st.log_path, logcfg_st.log_name);
rename(logfile, logbakname);
}
}
fp = fopen(logfile, "a+");
if (fp == NULL)
{
printf("fopen error : %m\n");
return;
}
}
char str[LOGMAXLEN];
time_t timer;
time(&timer);
struct tm *stm = localtime(&timer);
struct timeval tv;
// struct timezone tz;
gettimeofday(&tv, NULL); // 取毫秒
char sdate[10];
strftime(sdate, 10, "%Y%m%d", stm); // 取日期
char stime_date[80];
strftime(stime_date, 80, "%G%m%d %T:", stm); // 取详细时间
va_list va;
va_start(va, arg);
vsprintf(str, arg, va);
// vsnprintf(str,1000,arg,va);
if (lv == -1)
{
/* 不打印前缀 */
if (!isDaemon)
printf("%s", str);
if (logcfg_st.logfile_open)
fprintf(fp, "%s", str);
}
else
{
//[20210126 09:14:25:118] [DEBUG] [test.c:50] ..........test
if (!isDaemon)
printf("[%s%ld] [%s] [%s:%d] %s\n", stime_date, tv.tv_usec / 1000, lv_str, filename, line, str);
if (logcfg_st.logfile_open)
fprintf(fp, "[%s%ld] [%s] [%s:%d] %s\n", stime_date, tv.tv_usec / 1000, lv_str, filename, line, str);
}
if (logcfg_st.logfile_open)
fclose(fp);
va_end(va);
return;
}
/*读文件返回文本内容的指针,用完需要释放*/
char *get_file_text(const char *filename)
{
FILE *fp = fopen(filename, "r");
if (!fp)
{
printLog(LOGERROR, "fopen error: %m");
return NULL;
}
// 将文件指针移到文件结尾处
if (fseek(fp, 0, SEEK_END))
{
printLog(LOGERROR, "fseek() error: %m");
fclose(fp);
return NULL;
}
// 返回文件指针的当前位置,(字节数),失败<0
long len = ftell(fp);
if (len < 0)
{
printLog(LOGERROR, "ftell() error: %m");
fclose(fp);
return NULL;
}
// 将文件指针移到文件开头处
if (fseek(fp, 0, SEEK_SET))
{
printLog(LOGERROR, "fseek() error: %m");
fclose(fp);
return NULL;
}
// 申请空间保存配置文件内容
char *text_ptr = malloc(len + 1);
if (text_ptr == NULL)
{
printLog(LOGERROR, "malloc error: %m");
fclose(fp);
return NULL;
}
// 每次读取1个字节,一共读取len次
fread(text_ptr, 1, len, fp);
fclose(fp);
return text_ptr;
}
int get_cfg(const char *filename)
{
char *textptr = get_file_text(filename);
if (textptr == NULL)
{
printLog(LOGERROR, "get file %s failed", filename);
return -1;
}
int ret = 0;
cJSON *item = NULL;
cJSON *proArr = NULL;
cJSON *root = cJSON_Parse(textptr);
if (!root)
{
printLog(LOGERROR, "Error before: [%s]\n", cJSON_GetErrorPtr());
ret = -1;
goto end;
}
item = cJSON_GetObjectItem(root, "log_path");
if (item)
{
strncpy(logcfg_st.log_path, item->valuestring, sizeof(logcfg_st.log_path));
}
item = cJSON_GetObjectItem(root, "log_level");
if (item)
{
logcfg_st.log_level = item->valueint;
}
// item = cJSON_GetObjectItem(root, "max_program"); //最大程序数目
// if (item)
// {
// maxProNum = item->valueint;
// }
proArr = cJSON_GetObjectItem(root, "program");
if (!proArr)
{
printLog(LOGERROR, "x program failed");
ret = -1;
goto end;
}
ProRunParmSize = cJSON_GetArraySize(proArr);
if (ProRunParmSize <= 0)
{
printLog(LOGERROR, "program size error");
ret = -1;
goto end;
}
ProRunParm_ptr = malloc(sizeof(ProRunParm) * ProRunParmSize);
if (ProRunParm_ptr == NULL)
{
printLog(LOGERROR, "malloc error: %m");
ret = -1;
goto end;
}
ProFailTime_ptr = malloc(sizeof(ProFailTime) * ProRunParmSize);
if (ProFailTime_ptr == NULL)
{
printLog(LOGERROR, "malloc error: %m");
ret = -1;
goto end;
}
memset(ProFailTime_ptr, 0, sizeof(ProFailTime) * ProRunParmSize);
memset(ProRunParm_ptr, 0, sizeof(ProRunParm) * ProRunParmSize);
for (int i = 0; i < ProRunParmSize; i++)
{
cJSON *arrItem = cJSON_GetArrayItem(proArr, i);
strncpy(ProRunParm_ptr[i].programName, cJSON_GetObjectItem(arrItem, "programName")->valuestring,
sizeof(ProRunParm_ptr[i].programName));
strncpy(ProRunParm_ptr[i].directory, cJSON_GetObjectItem(arrItem, "directory")->valuestring,
sizeof(ProRunParm_ptr[i].directory));
strncpy(ProRunParm_ptr[i].command, cJSON_GetObjectItem(arrItem, "command")->valuestring,
sizeof(ProRunParm_ptr[i].command));
}
end:
if (textptr)
{
free(textptr);
}
if (root)
{
cJSON_Delete(root);
}
return ret;
}
void usage(const char *pname)
{
printf("Usage:\n");
printf(" %s [-c <cfg filename>] [-d]\n", pname);
printf("Param:\n");
printf(" -c 后面跟上配置文件路径, 如没有-c选项, 默认配置文件为[./supervisord.json]\n");
printf(" -d 后台运行,作为守护进程\n");
printf(" -h, --help 显示此说明\n");
exit(0);
}
int checkProParam()
{
for (int i = 0; i < ProRunParmSize; i++)
{
if (strcmp(ProRunParm_ptr[i].directory, "") == 0 ||
strcmp(ProRunParm_ptr[i].command, "") == 0)
{
printLog(LOGERROR, "program[%s] config error", ProRunParm_ptr[i].programName);
return -1;
}
if (ProRunParm_ptr[i].directory[strlen(ProRunParm_ptr[i].directory) - 1] != '/')
sprintf(ProRunParm_ptr[i].absProgram, "%s/%s", ProRunParm_ptr[i].directory, ProRunParm_ptr[i].command);
else
sprintf(ProRunParm_ptr[i].absProgram, "%s%s", ProRunParm_ptr[i].directory, ProRunParm_ptr[i].command);
if (access(ProRunParm_ptr[i].absProgram, F_OK))
{
printLog(LOGERROR, "[%s] file [%s] not exist!", ProRunParm_ptr[i].programName, ProRunParm_ptr[i].absProgram);
return -1;
}
}
return 0;
}
// 程序锁,不允许重复运行
int program_lock()
{
int fd = open("/tmp/.supervisor.lock", O_RDWR | O_CREAT, 0644);
if (fd < 0)
{
printLog(LOGERROR, "Open file /tmp/.supervisor.lock error:%m");
return -1;
}
struct flock file_lock;
file_lock.l_whence = SEEK_SET; // 基准位置
file_lock.l_start = 0; // 相对偏移量
file_lock.l_len = 0; // 加锁区域长度
file_lock.l_type = F_WRLCK; // 锁的类型 F_WRLCK-写入锁(排斥锁) F_RDLCK-读取锁(共享锁) F_UNLCK-解锁
int rc = fcntl(fd, F_SETLK, &file_lock);
if (rc == -1)
{
if (errno == 11)
{
printLog(LOGWARN, "Supervisor is already running.");
}
else
{
printLog(LOGERROR, "fcntl error:%m");
}
close(fd);
return -1;
}
return 0;
}
// 子进程执行程序
void execPro(ProRunParm ProRunParm_st)
{
// char stdoutlogpath[200] = {0};
// snprintf(stdoutlogpath, sizeof(stdoutlogpath), "%s/stdout_super_%s.log", logcfg_st.log_path, ProRunParm_st.programName);
// int fd = open(stdoutlogpath, O_WRONLY | O_CREAT | O_TRUNC, 0644);
int fd = open("/dev/null", O_WRONLY, 0644);
if (fd > 0)
{
dup2(fd, STDOUT_FILENO);
dup2(fd, STDERR_FILENO);
close(fd);
}
chdir(ProRunParm_st.directory); // 切换目录
execl(ProRunParm_st.absProgram, ProRunParm_st.command, NULL); // 执行
// 执行失败
printLog(LOGERROR, "exec [%s] failed: %m", ProRunParm_st.programName);
exit(-1);
}
// 启动程序 返回1-已运行 0-成功
int startPro(int idx)
{
if (ProRunParm_ptr[idx].status == RUNNING)
{
return 1;
}
printLog(LOGINFO, "Starting Program [%s]...", ProRunParm_ptr[idx].programName);
pid_t pid = fork();
if (pid < 0)
{
printLog(LOGERROR, "fork error: %m");
exit(-1);
}
else if (pid == 0)
{
/* 子进程 */
execPro(ProRunParm_ptr[idx]);
}
else
{
/* 父进程 */
ProRunParm_ptr[idx].pid = pid; // 保存子进程的pid
ProRunParm_ptr[idx].beginTime = time(NULL);
ProRunParm_ptr[idx].status = RUNNING;
if (ProFailTime_ptr[idx].times == 0) // 第一次启动
{
ProFailTime_ptr[idx].ftime = time(NULL);
ProFailTime_ptr[idx].times++;
}
}
return 0;
}
// 停止程序 返回值1-已停止
int stopPro(int idx)
{
if (ProRunParm_ptr[idx].status != RUNNING)
{
return 1;
}
int status = kill(ProRunParm_ptr[idx].pid, SIGTERM); // 发送SIGTERM信号
if (status == 0)
{
return 0;
}
else
{
printLog(LOGERROR, "stop Program[%s] failed", ProRunParm_ptr[idx].programName);
return -1;
}
}
void fatal(const char *func, int rv)
{
printLog(LOGERROR, "%s Error(%d): %s", func, rv, nng_strerror(rv));
exit(1);
}
// 通过程序名找程序 返回值 -1没找到 >=0 找到的索引
int find_program_byname(const char *name)
{
for (int i = 0; i < ProRunParmSize; i++)
if (strcmp(name, ProRunParm_ptr[i].programName) == 0)
return i;
return -1;
}
void *supervisorctrl_thread(void *arg)
{
pthread_detach(pthread_self());
nng_socket sock;
int rv;
if ((rv = nng_rep0_open(&sock)) != 0)
{
fatal("nng_rep0_open", rv);
}
if ((rv = nng_listen(sock, NNGURL, NULL, 0)) != 0)
{
fatal("nng_listen", rv);
}
char opt[10] = {0}; // 操作 status start stop restart
char proname[64] = {0}; // 待操作的程序名称
int sendbufSize = sizeof(ProRunParm) * ProRunParmSize + 1;
void *sendbuf = malloc(sendbufSize);
if (sendbuf == NULL)
{
printLog(LOGERROR, "malloc error : %m");
exit(1);
}
int shutdown = 0;
while (1)
{
memset(opt, 0, sizeof(opt));
memset(proname, 0, sizeof(proname));
memset(sendbuf, 0, sendbufSize);
char *buf = NULL;
size_t sz;
if ((rv = nng_recv(sock, &buf, &sz, NNG_FLAG_ALLOC)) != 0)
{
fatal("nng_recv", rv);
}
char *found = strchr(buf, '|');
if (found)
{
/* 找到| */
int position = (int)(found - buf);
strncpy(opt, buf, position);
strcpy(proname, buf + position + 1);
// printf("position:%d opt=%s proname=%s\n", position, opt, proname);
int sendsize = 0;
if (strcmp(opt, "status") == 0) /* status */
{
if (strcmp(proname, "all") == 0) /* 全部状态 */
{
sendsize = sendbufSize;
memcpy(sendbuf, "0", 1);
memcpy(sendbuf + 1, (void *)ProRunParm_ptr, sendsize - 1);
}
else // 单个程序
{
int idx = find_program_byname(proname);
if (idx < 0) /* 没有找到程序 返回1 */
{
memcpy(sendbuf, "1", 1);
sendsize = 1;
}
else /* 找到程序 */
{
sendsize = sizeof(ProRunParm) + 1;
memcpy(sendbuf, "0", 1);
memcpy(sendbuf + 1, (void *)(&(ProRunParm_ptr[idx])), sendsize - 1);
}
}
}
else if (strcmp(opt, "start") == 0) /* start */
{
sendsize = 1; // 结果只有状态码1个字节
if (strcmp(proname, "all") == 0) /* 全部 */
{
for (int i = 0; i < ProRunParmSize; i++)
{
startPro(i);
}
sleep(3);
memcpy(sendbuf, "0", 1);
}
else // 单个程序
{
int idx = find_program_byname(proname);
if (idx < 0) /* 没有找到程序 返回1 */
{
memcpy(sendbuf, "1", 1);
}
else /* 找到程序 */
{
int rc = startPro(idx); // 启动
if (rc == 1)
memcpy(sendbuf, "2", 1); // 已启动返回2
else
memcpy(sendbuf, "0", 1);
}
}
}
else if (strcmp(opt, "stop") == 0) /* stop */
{
sendsize = 1; // 结果只有状态码1个字节
if (strcmp(proname, "all") == 0) /* 全部 */
{
for (int i = 0; i < ProRunParmSize; i++)
{
stopPro(i);
}
sleep(3);
memcpy(sendbuf, "0", 1);
}
else // 单个程序
{
int idx = find_program_byname(proname);
if (idx < 0) /* 没有找到程序 返回1 */
{
memcpy(sendbuf, "1", 1);
}
else /* 找到程序 */
{
int rc = stopPro(idx); // 停止
if (rc == 1)
memcpy(sendbuf, "2", 1); // 已停止返回2
else
memcpy(sendbuf, "0", 1);
}
}
}
else if (strcmp(opt, "restart") == 0) /* restart */
{
sendsize = 1; // 结果只有状态码1个字节
if (strcmp(proname, "all") == 0) /* 全部 */
{
for (int i = 0; i < ProRunParmSize; i++)
{
stopPro(i);
}
sleep(3); // 等待程序退出
for (int i = 0; i < ProRunParmSize; i++)
{
startPro(i);
}
sleep(3);
memcpy(sendbuf, "0", 1);
}
else // 单个程序
{
int idx = find_program_byname(proname);
if (idx < 0) /* 没有找到程序 返回1 */
{
memcpy(sendbuf, "1", 1);
}
else /* 找到程序 */
{
stopPro(idx); // 停止
sleep(1);
startPro(idx); // 启动
memcpy(sendbuf, "0", 1);
}
}
}
else if (strcmp(opt, "shutdown") == 0) /* shutdown */
{
sendsize = 1; // 结果只有状态码1个字节
for (int i = 0; i < ProRunParmSize; i++)
{
stopPro(i);
}
sleep(3);
memcpy(sendbuf, "0", 1);
shutdown = 1;
}
if (sendsize > 0)
{
if ((rv = nng_send(sock, sendbuf, sendsize, 0)) != 0)
{
fatal("nng_send", rv);
}
}
else
{
printLog(LOGWARN, "Recv nosupport opt[%s], ignored, MSG[%s]", opt, buf);
}
}
else
{
printLog(LOGWARN, "Recv error Msg format, ignored, MSG[%s]", buf);
}
nng_free(buf, sz);
if (shutdown)
{
exit(0);
}
sleep(1);
}
}
// 程序处理
void processHandle(const char *cfgfile)
{
if (program_lock())
{
exit(0);
}
printf("Running on pid:%d\n", getpid());
printf("daemon:%d cfgfile:%s\n", isDaemon, cfgfile);
pthread_t pid;
if (pthread_create(&pid, NULL, supervisorctrl_thread, NULL))
{
printLog(LOGERROR, "Manage routinue pthread_create error: %m");
exit(1);
}
usleep(1000 * 100);
printLog(LOGINFO, "Manage routinue pthread_create success");
for (int i = 0; i < ProRunParmSize; i++)
{
startPro(i);
}
int status;
// 父进程处理
while (1)
{
for (int i = 0; i < ProRunParmSize; i++)
{
if (ProRunParm_ptr[i].pid == 0)
{
continue;
}
pid_t rc = waitpid(ProRunParm_ptr[i].pid, &status, WNOHANG);
if (rc == -1)
{
if (errno != ECHILD) /* 10 - No child processes */
printLog(LOGERROR, "waitpid error(%d) %m", errno);
sleep(10);
}
else if (rc == 0)
{
continue;
}
else
{
if (WIFEXITED(status)) // 子进程自己退出
{
ProRunParm_ptr[i].status = FATAL;
ProRunParm_ptr[i].pid = 0;
ProRunParm_ptr[i].beginTime = 0;
printLog(LOGWARN, "program[%s] Exited by self, exit(%d)",
ProRunParm_ptr[i].programName, WEXITSTATUS(status));
}
else if (WIFSIGNALED(status)) // 子进程意外终止接收到终止信号
{
int sig = WTERMSIG(status);
if (sig == SIGTERM) // supervisorctrl stop信号
{
/* stopped: xxx (terminated by SIGTERM) */
printLog(LOGINFO, "Stopped: %s (terminated by SIGTERM)", ProRunParm_ptr[i].programName);
ProRunParm_ptr[i].status = STOPPED;
ProRunParm_ptr[i].pid = 0;
ProRunParm_ptr[i].beginTime = time(NULL);
}
else // 其他异常信号
{
printLog(LOGWARN, "program[%s] get signal:%d(%s), Exit!",
ProRunParm_ptr[i].programName, sig, strsignal(sig));
time_t now = time(NULL);
ProRunParm_ptr[i].status = STOPPED;
ProRunParm_ptr[i].pid = 0;
ProRunParm_ptr[i].beginTime = now;
if ((now - ProFailTime_ptr[i].ftime) <= PRORUNFAILTIMERANGE)
{
/* 10s内失败,判断次数 大于3次 不启动 */
if (ProFailTime_ptr[i].times >= PRORUNFAILTIMES)
{
printLog(LOGWARN, "program[%s] Exited too quickly(start failed %d times in %d seconds).",
ProRunParm_ptr[i].programName, PRORUNFAILTIMES, PRORUNFAILTIMERANGE);
ProFailTime_ptr[i].times = 0;
ProRunParm_ptr[i].status = FATAL;
ProRunParm_ptr[i].pid = 0;
ProRunParm_ptr[i].beginTime = 0;
}
else
{
startPro(i); // 启动
ProFailTime_ptr[i].times++;
}
}
else
{
/* 10s外意外停止 当第一次启动 */
startPro(i);
ProFailTime_ptr[i].times = 0;
}
}
}
else if (WIFSTOPPED(status))
{
ProRunParm_ptr[i].status = PAUSED;
ProRunParm_ptr[i].pid = 0;
ProRunParm_ptr[i].beginTime = time(NULL);
printLog(LOGWARN, "程序[%s]被暂停, 暂停信号(%d), 先不做处理",
ProRunParm_ptr[i].programName, WSTOPSIG(status));
}
else
{
ProRunParm_ptr[i].status = UNKOWN;
ProRunParm_ptr[i].pid = 0;
ProRunParm_ptr[i].beginTime = time(NULL);
printLog(LOGWARN, "程序[%s] 未知退出状态, 先不做处理", ProRunParm_ptr[i].programName);
}
}
}
sleep(1);
}
return;
}
int main(int argc, char **argv)
{
char cfgfile[256] = {0};
if (argc == 1)
strcpy(cfgfile, "./supervisord.json");
else if (argc == 2)
{
if (strcmp(argv[1], "-d") == 0)
{
isDaemon = 1;
strcpy(cfgfile, "./supervisord.json");
}
else
usage(argv[0]);
}
else if (argc == 3)
{
if (strcmp(argv[1], "-c") == 0 && strcmp(argv[2], "-d") != 0)
{
strncpy(cfgfile, argv[2], sizeof(cfgfile));
}
else
usage(argv[0]);
}
else if (argc == 4)
{
if (strcmp(argv[1], "-c") == 0 && strcmp(argv[3], "-d") == 0)
{
strncpy(cfgfile, argv[2], sizeof(cfgfile));
isDaemon = 1;
}
else if (strcmp(argv[1], "-d") == 0 && strcmp(argv[2], "-c") == 0)
{
strncpy(cfgfile, argv[3], sizeof(cfgfile));
isDaemon = 1;
}
else
usage(argv[0]);
}
else
usage(argv[0]);
// signal(SIGTERM, sigint_handler);
if (get_cfg(cfgfile))
{
return -1;
}
if (checkProParam())
{
return -1;
}
if (isDaemon)
{
pid_t pid = fork();
if (pid < 0)
{
printLog(LOGERROR, "fork error: %m");
exit(1);
}
else if (pid == 0)
{
/* 子进程 */
processHandle(cfgfile);
}
else
{
/* 父进程 退出 */
exit(0);
}
}
else
{
processHandle(cfgfile);
// while (1)
// {
// sleep(60);
// }
}
return 0;
}
supervisorctrl进程控制代码
supervisorctrl进程控制代码如下
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <fcntl.h>
#include "nng/nng.h"
#include "nng/protocol/reqrep0/rep.h"
#include "nng/protocol/reqrep0/req.h"
#define NNGURL "ipc://.supervisor.key"
#define OVERTIME 10000 // 10s
#define RUNNING 1
#define STOPPED 0
#define FATAL 2
#define PAUSED 3
#define UNKOWN 4
/*程序运行参数配置*/
typedef struct ProgramRunParm
{
char programName[64]; // 程序显示名称
char directory[256]; // 运行目录 绝对路径
char command[64]; // 程序
char absProgram[320]; // 带绝对路的程序
pid_t pid; // 进程pid
time_t beginTime; // 启动时间
int status; // 状态 0-停止 1-运行 2-FATAL Exited too quickly 3-paused 4-unkown
} ProRunParm;
void usage(const char *pname)
{
printf("Usage:\n");
printf(" %s <status|start|stop|restart> [all|proname]\n", pname);
printf(" %s <shutdown>\n", pname);
printf("Param:\n");
printf(" status 查看程序状态, 后接程序名, 默认all\n");
printf(" <start|stop|restart> <all|proname> 启动|停止|重启 后接all或者程序名\n");
printf(" shutdown 关闭所有程序并停止supervisor服务(请务必使用该命令停止supervisor服务)\n");
printf(" -h, --help 显示此说明\n");
exit(0);
}
void getUptime(time_t beginTime, char *uptimeStr)
{
time_t ut = time(NULL) - beginTime;
int leaveTime;
int days = ut / 86400;
leaveTime = ut % 86400;
int hours = leaveTime / 3600;
leaveTime = leaveTime % 3600;
int mins = leaveTime / 60;
int seconds = leaveTime % 60;
if (days != 0)
sprintf(uptimeStr, "uptime %d days,%d:%02d:%02d", days, hours, mins, seconds);
else
sprintf(uptimeStr, "uptime %d:%02d:%02d", hours, mins, seconds);
return;
}
// 返回ctime去回车的标准时间字符串
char *std_time(time_t stamp)
{
char *str_time = ctime(&stamp);
str_time[strlen(str_time) - 1] = '\0';
return str_time;
}
void getStatusUptime(ProRunParm ProRunParm_st, char *statusStr, char *uptimeStr)
{
char temp[50] = {0};
switch (ProRunParm_st.status)
{
case STOPPED:
sprintf(uptimeStr, "%s", std_time(ProRunParm_st.beginTime));
strcpy(statusStr, "STOPPED");
break;
case RUNNING:
getUptime(ProRunParm_st.beginTime, temp);
sprintf(uptimeStr, "pid %d,%s", ProRunParm_st.pid, temp);
strcpy(statusStr, "RUNNING");
break;
case FATAL:
strcpy(statusStr, "FATAL");
strcpy(uptimeStr, "Exited too quickly");
break;
case PAUSED:
strcpy(statusStr, "PAUSED");
sprintf(uptimeStr, "%s", std_time(ProRunParm_st.beginTime));
break;
case UNKOWN:
strcpy(statusStr, "UNKOWN");
sprintf(uptimeStr, "%s", std_time(ProRunParm_st.beginTime));
break;
}
return;
}
// supervisorctrl status
void showStatus(ProRunParm *ProRunParm_ptr, int ProRunParmSize)
{
// for (int i = 0; i < ProRunParmSize; i++)
// {
// printf("status = %d\n", ProRunParm_ptr[i].status);
// printf("pid = %d\n", ProRunParm_ptr[i].pid);
// printf("beginTime = %ld\n", ProRunParm_ptr[i].beginTime);
// printf("programName = %s\n", ProRunParm_ptr[i].programName);
// printf("directory = %s\n", ProRunParm_ptr[i].directory);
// printf("command = %s\n", ProRunParm_ptr[i].command);
// printf("absProgram = %s\n", ProRunParm_ptr[i].absProgram);
// }
for (int i = 0; i < ProRunParmSize; i++)
{
char statusStr[10] = {0};
char uptimeStr[100] = {0};
getStatusUptime(ProRunParm_ptr[i], statusStr, uptimeStr);
printf("%-30s%-10s%s\n", ProRunParm_ptr[i].programName, statusStr, uptimeStr);
}
return;
}
void fatal(const char *func, int rv)
{
fprintf(stderr, "%s Error(%d): %s\n", func, rv, nng_strerror(rv));
if (rv == NNG_ECONNREFUSED)
{
fprintf(stdout, "supervisor server not found\n");
}
exit(1);
}
int communication(const char *opt, const char *proname)
{
nng_socket sock;
int rv;
if ((rv = nng_req0_open(&sock)) != 0)
{
fatal("nng_socket", rv);
}
if ((rv = nng_setopt_ms(sock, NNG_OPT_RECVTIMEO, OVERTIME)) != 0)
{
fatal("nng_setopt_ms", rv);
}
if ((rv = nng_dial(sock, NNGURL, NULL, 0)) != 0)
{
fatal("nng_dial", rv);
}
char sendMsg[100] = {0};
snprintf(sendMsg, sizeof(sendMsg), "%s|%s", opt, proname);
// printf("supervisorctrl: SEND [%s]\n", sendMsg);
if ((rv = nng_send(sock, sendMsg, strlen(sendMsg) + 1, 0)) != 0)
{
fatal("nng_send", rv);
}
// recv
void *recvbuf = NULL;
size_t sz;
if ((rv = nng_recv(sock, &recvbuf, &sz, NNG_FLAG_ALLOC)) != 0)
{
fatal("nng_recv", rv);
}
// printf("supervisorctrl: RECEIVED SIZE[%ld]\n", sz);
if (strcmp(opt, "status") == 0)
{
char res = ((char *)recvbuf)[0];
if (res == '1')
{
/* 未找到 */
fprintf(stderr, "%s: ERROR (no such process)\n", proname);
}
else
{
int proNum = sz / sizeof(ProRunParm);
ProRunParm *ProRunParm_st = (ProRunParm *)(recvbuf + 1);
showStatus(ProRunParm_st, proNum);
}
}
else
{
char res = ((char *)recvbuf)[0];
if (res == '1')
{
/* 未找到 */
fprintf(stderr, "%s: ERROR (no such process)\n", proname);
}
else if (res == '2')
{
/* 已启动或者已停止 */
if (strcmp(opt, "start") == 0)
{
/* 已启动 */
fprintf(stderr, "%s: ERROR (already started)\n", proname);
}
else if (strcmp(opt, "stop") == 0)
{
/* 已停止 */
fprintf(stderr, "%s: ERROR (not running)\n", proname);
}
}
else
{
fprintf(stdout, "Operation Ok\n");
}
}
nng_free(recvbuf, sz);
nng_close(sock);
return (0);
}
int main(int argc, char **argv)
{
char opt[10] = {0}; // 操作 status start stop restart
char proname[64] = {0}; // 待操作的程序名称
if (argc < 2)
{
usage(argv[0]);
}
if (argc == 2)
{
if (strcmp(argv[1], "status") != 0 && strcmp(argv[1], "shutdown") != 0)
{
usage(argv[0]);
}
strcpy(opt, argv[1]);
strcpy(proname, "all");
}
else //>2
{
if (strcmp(argv[1], "status") != 0 &&
strcmp(argv[1], "start") != 0 &&
strcmp(argv[1], "stop") != 0 &&
strcmp(argv[1], "restart") != 0 &&
strcmp(argv[1], "shutdown") != 0)
{
usage(argv[0]);
}
strcpy(opt, argv[1]);
strcpy(proname, argv[2]);
}
communication(opt, proname);
return 0;
}