我们的系统在很多运行情况下,由于内存和CPU使用情况不当,或者网络链路不佳的情况下,会出现一些意想不到的问题,所以设计一个插件实现系统的健康诊断。
大致思路如下:
读取/proc下对应的CPU、内存、网络信息情况,计算出内存和cpu的使用率,以及网络收发包的丢包率,根据这三项情况判断系统的健康情况。
判别标准:
设置每项的一个健康阈值,当计算的上述三项超过所设阈值时,判断该项为不健康。
如果三项都健康,则系统为健康状况优秀
有两项健康,系统为良好
有一项健康,系统为一般
均不健康,系统为差
实现代码结构如下:
type_def.h:用于定义结构体和宏
mem_cal.h/cpp:用于计算内存使用率
cpu_cal.h/cpp:用于计算CPU使用率
net_cal.h/cpp:用于计算网络收发包丢包率
type_def.h
#ifndef _TYPE_DEF_H_
#define _TYPE_DEF_H_
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <vector>
#define ERR_SUCCESS
#define ERR_ERROR
#define MEMINFO "/proc/meminfo" // 内存信息表
#define CPUINFO "/proc/stat" // cpu信息表
#define NETINFO "/proc/dev/net" // 网络信息表
#define uint (unsigned int)
// 系统健康状态
enum class SYSTEM_HEALTH_STATUS
{
EXECELLENT = 0, // 优秀(所有指标均达标)
FINE = 1, // 良好(有两项指标达标)
LOYAL = 2, // 一般(有一项指标达标)
BAD = 3, // 差(没有指标达标)
};
// 定义一个内存信息项结构体
typedef struct
{
char memTypeDesc[64]; //描述性
char memUnit[8]; //单位,一般为KB
uint memTypeInt; //序号 1-9
uint memVal; //数值
}MemItem;
//定义内存信息
typedef struct
{
double useRate; //内存占用率
std::vector<MemItem> items; //单项信息
}MemInfo;
//定义一个cpu单项信息结构体
typedef struct
{
char name[20]; //首列名称
uint user; //user
uint nice; //nice
uint system;//system
uint idle; //idle
uint iowait; //iowait
uint irq; //irq
uint softirq; //softirq
}CpuInfo;
//定义网络信息结构体
typedef struct
{
char name[20]; //首列名称
uint r_bytes; //接口接收数据的总字节数
uint r_packets; //接口接收数据包总数
uint r_errs; //接收错误总数
uint r_drop; //接收丢弃包总数
uint r_fifo; //FIFO接收缓冲区错误的数量
uint r_frame; //接收分组帧错误的数量
uint r_compressed;//接收压缩数据包数
uint r_multicast; //接收的多播帧数
uint t_bytes; //发送数据总字节数
uint t_packets; //发送数据包总数
uint t_errs; //发送错误总数
uint t_drop; //发送丢包数
uint t_fifo; //发送FIFO缓冲区错误数
uint t_colls; //接口检测到的冲突数
uint t_carrier; //载波损耗数
uint t_compressed; //发送压缩数据包数
}NetInfo;
#endif //_TYPE_DEF_H_
mem_cal.h
#ifndef _MEM_CAL_H_
#define _MEM_CAL_H_
#include "type_def.h"
// 获取单项索引
int GetValType(char *memField);
// 获取内存信息
int GetMemOccupy(MemInfo *memInfo);
// 计算内存利用率
double CalMemRate();
#endif //_MEM_CAL_H_
mem_cal.cpp
#include "mem_cal.h"
int GetValType(char *memField)
{
if (0 == strncmp(memField, "MemTotal:", sizeof("MemTotal:")))
{
return 1;
}
else if (0 == strncmp(memField, "MemFree:", sizeof("MemFree:")))
{
return 2;
}
else if (0 == strncmp(memField, "Buffers:", sizeof("Buffers:")))
{
return 3;
}
else if (0 == strncmp(memField, "Cached:", sizeof("Cached:")))
{
return 4;
}
else if (0 == strncmp(memField, "MemAvailable:", sizeof("MemAvailable:")))
{
return 5;
}
else if (0 == strncmp(memField, "SwapTotal:", sizeof("SwapTotal:")))
{
return 6;
}
else if (0 == strncmp(memField, "SwapFree:", sizeof("SwapFree:")))
{
return 7;
}
else if (0 == strncmp(memField, "Dirty:", sizeof("Dirty:")))
{
return 8;
}
else if (0 == strncmp(memField, "Mapped:", sizeof("Mapped:")))
{
return 9;
}
else
{
return 0;
}
}
int GetMemOccupy(MemInfo *memInfo)
{
FILE *fp = fopen(MEMINFO, "r");
if(!fp)
{
return ERR_ERROR;
}
uint totalMem = 0;
uint freeMem = 0;
MemItem item;
memset(&item, 0, sizeof(item));
while (fscanf(fp, "%s %u %s", item.memTypeDesc, &item.memVal, &item.memUnit) == 3)
{
item.memTypeInt = GetValType(item.memTypeDesc);
if (item.memTypeInt > 0)
{
memInfo->push_back(item);
if (item.memTypeInt == 1)
{
totalMem = item.memVal;
}
if (item.memTypeInt == 2)
{
freeMem = item.memVal;
}
}
}
fclose(fd);
memInfo->useRate = 100.0 - (1.0 * freeMem) / (1.0 * totalMem);
return ERR_SUCCESS;
}
cpu_cal.h
#ifndef _CPU_CAL_H_
#define _CPU_CAL_H_
#include "type_def.h"
// 计算CPU使用率
double CalCpuOccupy(CpuInfo *proStat, CpuInfo *newStat);
// 获取内存使用情况
int GetCpuOccupy(CpuInfo *cpust);
// 获取cpu使用占比
double GetSysCpuSage();
#endif //_CPU_CAL_H_
cpu_cal.cpp:
#include "cap_cal.h"
double CalCpuOccupy(CpuInfo *proStat, CpuInfo *newStat)
{
uint od, nd;
uint id, sd;
int cpu_use = 0;
od = (uint) (o->user + o->nice + o->system +o->idle);//第一次(用户+优先级+系统+空闲)的时间再赋给od
nd = (uint) (n->user + n->nice + n->system +n->idle);//第二次(用户+优先级+系统+空闲)的时间再赋给od
id = (uint) (n->user - o->user); //用户第一次和第二次的时间之差再赋给id
sd = (uint) (n->system - o->system);//系统第一次和第二次的时间之差再赋给sd
if((nd-od) != 0)
cpu_use = (int)((sd+id)*10000)/(nd-od); //((用户+系统)乖100)除(第一次和第二次的时间差)再赋给g_cpu_used
else cpu_use = 0;
return cpu_use;
}
int GetCpuOccupy(CpuInfo *cpust);
{
FILE *fd;
int n;
char buff[256];
CpuInfo *cpu_occupy;
cpu_occupy=cpust;
fd = fopen (CPUINFO, "r");
if(!fd)
{
return ERR_ERROR;
}
fgets (buff, sizeof(buff), fd);
sscanf (buff, "%s %u %u %u %u", cpu_occupy->name, &cpu_occupy->user, &cpu_occupy->nice,&cpu_occupy->system,
&cpu_occupy->idle, &cpu_occupy->iowait, &cpu_occupy->irq, &cpu_occupy->softirq);
fclose(fd);
return ERR_ERROR;
}
double GetSysCpuSage();
{
CpuInfo cpu_stat1;
CpuInfo cpu_stat2;
MEM_OCCUPY mem_stat;
double cpuRate;
//获取内存
get_memoccupy ((MEM_OCCUPY *)&mem_stat);
//第一次获取cpu使用情况
get_cpuoccupy((CpuInfo *)&cpu_stat1);
sleep(10);
//第二次获取cpu使用情况
get_cpuoccupy((CpuInfo *)&cpu_stat2);
//计算cpu使用率
cpuRate = cal_cpuoccupy ((CpuInfo *)&cpu_stat1, (CpuInfo *)&cpu_stat2);
}
net_cal.h:
#ifndef _NET_CAL_H_
#define _NET_CAL_H_
#include "type_def.h"
// 获取丢包率
int GetNetInfo(double rDropRate, double sDropRate);
#endif //_NET_CAL_H
net_cal.cpp:
#include "net_cal.h"
static uint rBytes = 0;
static uint rPackets = 0;
static double rDrop = 0;
static uint tBytes = 0;
static uint tPackets = 0;
static double tDrop = 0;
static double rDroper = 0;
static double tDroper = 0;
int GetNetInfo(doule rDropRate, double sDropRate)
{
FILE *fd;
int n = 0;
char buff[256];
uint rByteNew = 0;
uint rPacketsNew = 0;
uint rDropNew = 0;
uint tByteNew = 0;
uint tPacketsNew = 0;
uint tDropNew = 0;
std::vector<NetInfo> netInfo;
netInfo.clear();
NetInfo tempInfo;
memset(&tempInfo, 0, sizeof(tempInfo));
fd = fopen(NETINFO);
if(!fd)
{
return ERR_ERROR;
}
while(fgets(buff, sizeof(buff), fd))
{
char name[20];
sscanf(buff, "%s", name);
if (0 == strncmp(name, "eth", sizeof("eth")))
{
n++;
sscanf(buff, "%s %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
tempInfo.name, tempInfo.r_bytes, tempInfo.r_packets, tempInfo.r_errs,
tempInfo.r_drop, tempInfo.r_fifo, tempInfo.r_frame, tempInfo.r_compressed,
tempInfo.r_multicast, tempInfo.t_bytes, tempInfo.tPackets, tempInfo.t_errs,
tempInfo.t_drop, tempInfo.t_fifo, tempInfo.t_colls, tempInfo.t_carrier,
tempInfo.t_compressed);
netInfo.push_back(tempInfo);
}
for (int i = 0; i < n; i++)
{
rByteNew += netInfo[i].r_bytes;
rPacketsNew += netInfo[i].r_packets;
rDropNew += netInfo[i].r_drop;
tByteNew += netInfo[i].t_bytes;
tPacketsNew += netInfo[i].t_packets;
tDropNew += netInfo[i].t_drop;
}
if (0 == (rPacketsNew - rPackets))
{
rDroper = 0;
}
else
{
rDroper = (rDropNew - rDrop)* 100.0 / (rPacketsNew - rPackets);
}
if (0 == (tPacketsNew - tPackets))
{
tDroper = 0;
}
else
{
tDroper = (tDropNew - tDrop)* 100.0 / (tPacketsNew - tPackets);
}
rBytes = rByteNew;
rDrop = rDropNew;
rPackets = rPacketsNew;
tBytes = tByteNew;
tDrop = tDropNew;
tPackets = tPacketsNew;
rDropRate = rDroper;
tDroper = tDroper;
return ERR_SUCCESS;
}
}
main.cpp:
#include "mem_cal.h"
#include "cpu_cal.h"
#include "net_cal.h"
SYSTEM_HEALTH_STATUS calHealthStatus(double memThreshold, double cpuThreshold, double sDropThreshold, rDropThreshold)
{
int index = 0;
if (memThreshold <= CalMemRate())
{
index++;
}
if (cpuThreshold < = GetSysCpuSage())
{
index++;
}
double rDrop = 0;
double = 0;
GetNetInfo(drDrop, sDrop);
if (sDropThreshold <= sDrop || rDropThreshold < = rDrop)
{
index++;
}
return static_cast<SYSTEM_HEALTH_STATUS>(index);
}
int main()
{
double memThreshold = 10.0;
double cpuThreshold = 10.0;
double sDropThreshold = 10.0;
double rDropThreshold = 10.0;
auto stat = calHealthStatus(memThreshold, cpuThreshold, sDropThreshold, rDropThreshold);
std::cout << "the system status is " << stat<<endl;
return 0;
}