1. 脚本源码
执行脚本前请确保监控的Linux操作系统已安装sysstat工具(脚本中sar命令需要安装此工具)。
#!/usr/bin/env python3
import fabric
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas
import re
import time
import typing
class UnixSystemMonitor(object):
"""
Unix操作系统资源使用情况监控.
为避免连接异常断开的情况,所有连接均在使用时临时建立.
"""
def __init__(self, hostname: str, username: str, password: str):
"""
:param hostname: 远端Unix服务器的主机名
:param username: 操作系统的用户名
:param password: 用户名对应的密码
"""
self.hostname = hostname
self.kwargs = {
"host": hostname,
"user": username,
"connect_kwargs": {"password": password}
}
def get_resource_usage(self, dirname: str, nics: typing.Iterable[str] = (),
disks: typing.Iterable[str] = (), first: bool = False) -> None:
"""
定期从服务器换获取资源使用情况并记录到日志文件中.
:param dirname: 存放日志文件的目录
:param nics: 需要监控的网卡名列表
:param disks: 需要监控的磁盘名列表
:param first: 如果为True,会将清空文件并写每列的含义
:return:
"""
date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
def record_usage(path: str, keys, vals):
with open(path, mode="w" if first else "a") as file:
if first:
file.write("\t".join(["date"] + keys) + "\n")
file.write("\t".join([date] + vals) + "\n")
# 从远端服务器收集系统资源的使用情况
with fabric.Connection(**self.kwargs) as conn:
command = "sar -u -r -d -n DEV -p 1 3 | grep 'Average:'"
resp = conn.run(command, warn=False, hide=True)
if resp.exited != 0:
return
# 获取网卡使用情况
nic_keys = re.findall(f"Average: +IFACE +(.*)\n", resp.stdout)[0].split()
for nic in nics:
nic_vals = re.findall(f"Average: +{nic} +(.*)\n", resp.stdout)[0].split()
record_usage(f"{dirname}/{self.hostname}_nic_{nic}.log", nic_keys, nic_vals)
# 获取磁盘使用率情况
disk_keys = re.findall(f"Average: +DEV +(.*)\n", resp.stdout)[0].split()
for disk in disks:
disk_vals = re.findall(f"Average: +{disk} +(.*)\n", resp.stdout)[0].split()
record_usage(f"{dirname}/{self.hostname}_disk_{disk}.log", disk_keys, disk_vals)
# 获取内核使用情况
cpu_keys = re.findall(f"Average: +CPU +(.*)\n", resp.stdout)[0].split()
cpu_vals = re.findall(f"Average: +all +(.*)\n", resp.stdout)[0].split()
record_usage(f"{dirname}/{self.hostname}_cpu.log", cpu_keys, cpu_vals)
# 获取内存使用情况
mem_keys = re.findall(f"Average: +(kbmemfree.*)\n", resp.stdout)[0].split()
mem_vals = re.findall(f"Average: +([0-9]+.*)\n", resp.stdout)[0].split()
record_usage(f"{dirname}/{self.hostname}_mem.log", mem_keys, mem_vals)
def draw_resource_usage_charts(self, dirname: str, nics: typing.Iterable[str] = (),
disks: typing.Iterable[str] = ()) -> None:
"""
根据指定目录下的日志文件绘制资源使用情况趋势图.
:param dirname: 存放日志文件的目录
:param nics: 需要监控的网卡名列表
:param disks: 需要监控的磁盘名列表
:return:
"""
nics, disks = tuple(nics), tuple(disks) # 解决参数为迭代器时不可重入问题
num_sun_plots = len(nics) + len(disks) + 2
plt.figure(figsize=(40, 6 * num_sun_plots), dpi=100) # 设置画布尺寸和分辨率
index_sub_plots = 1 # 子图索引号,用于计算子图在画布的纵向位置
def add_sub_plots(df_: pandas.DataFrame, title: str, lables_: typing.List[str], ylim=None):
distance_to_bottom = 1 - 1 / num_sun_plots * index_sub_plots + 0.06
ax = plt.axes([0.05, distance_to_bottom, 0.9, 0.6 / num_sun_plots]) # 设置子图位置和大小
ax.plot(df_)
ax.set(title=title, ylim=ylim)
ax.legend(labels=lables_, loc="lower right") # 设置图例
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d %H:%M:%S"))
# plt.xticks(rotation=90) # 设置X轴坐标旋转角度
# 添加内核使用率到画布中
df = pandas.read_csv(f"{dirname}/{self.hostname}_cpu.log", delimiter="\t", header=0,
index_col="date", usecols=["date", "%idle"])
df.index = pandas.to_datetime(df.index)
df["%idle"] = df["%idle"].map(lambda x: 100-x)
add_sub_plots(df, f"{self.hostname} cpu usage(%)", ["total used(%)"], (0, 100))
index_sub_plots += 1
# 添加内存使用率到画布中
df = pandas.read_csv(f"{dirname}/{self.hostname}_mem.log", delimiter="\t", header=0,
index_col="date", usecols=["date", "%memused"])
df.index = pandas.to_datetime(df.index)
add_sub_plots(df, f"{self.hostname} mem usage(%)", ["total used(%)"], (0, 100))
index_sub_plots += 1
# 添加磁盘使用情况到画布中
for disk in disks:
lables = ["%util"]
df = pandas.read_csv(f"{dirname}/{self.hostname}_disk_{disk}.log", delimiter="\t", header=0,
index_col="date", usecols=["date"] + lables)
df.index = pandas.to_datetime(df.index)
add_sub_plots(df, f"{self.hostname} disk {disk} usage(%)", lables, (0, 100))
index_sub_plots += 1
# 添加网卡使用情况到画布中
for nic in nics:
lables = ["rxpck/s", "txpck/s", "rxkB/s", "txkB/s"]
df = pandas.read_csv(f"{dirname}/{self.hostname}_nic_{nic}.log", delimiter="\t", header=0,
index_col="date", usecols=["date"] + lables)
df.index = pandas.to_datetime(df.index)
add_sub_plots(df, f"{self.hostname} nic {nic} usage(%)", lables)
index_sub_plots += 1
plt.savefig(f"{dirname}/{self.hostname}.png")
if __name__ == '__main__':
hostname = "..."
username = "..."
password = "..."
nics, disks = list(), list()
duration = 300
om = UnixSystemMonitor(hostname, username=username, password=password)
om.get_resource_usage("./log", nics=nics, disks=disks, first=True)
start_date = time.time()
while time.time() - start_date < duration:
om.get_resource_usage("./log", nics=nics, disks=disks)
om.draw_resource_usage_charts("./log", nics=nics, disks=disks)