目录
此工具可放在服务器中,每隔3秒检测一次iis中所有应用程序池中异常的应用程序。
实现的功能
- 每3秒检测所有应用程序池中占用cpu过高的程序,cpu阈值可自行设置,超过阈值自动kill进程并且进行重启
- 每3秒检测iis应用程序池中有崩溃、异常停止的应用程序,且进行自动3次的重启,3次以后不会自动重启,且过滤掉手动停止的iis应用程序池。
此界面左边会显示所有在iis中的应用程序池的信息,右边则是textBox,持续滚动显示日志,图片出现异常是我的本地电脑没有部署IIS。
技术栈与依赖 DLL
类型 | 技术 / DLL | 用途 | 路径说明 |
---|---|---|---|
平台 | WinForms (.NET Framework 4.5.2) | 主程序框架 | — |
核心 DLL | Microsoft.Web.Administration.dll | 操作 IIS 应用池状态与回收 | C:\Windows\System32\inetsrv\ |
系统库 | System.Diagnostics | 获取进程信息与 CPU 使用率 | .NET 自带 |
日志 | System.IO | 记录操作日志到本地文件 | .NET 自带 |
⚠️ 注意:程序必须以 管理员权限运行 才能访问 IIS 的相关管理权限。
实现细节
-
变量
private System.Threading.Timer MonitorTimer; //定义计时器
private const int MonitorInterval = 3000; // 检测间隔:5秒一检测
private string LogDirectory = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Log");//日志目录
private string CpuConfigPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "cpu_threshold.txt");//cpu阈值文件
private Dictionary<string, int> RestartDict = new Dictionary<string, int>(); //应用程序池重启次数 // 定义文件路径(可放在程序目录)
private string RestartCountPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "restart_counts.json");//应用程序池重启次数文件
private volatile bool mIsChecking = false;//标志位避免重入,防止资源竞争。(检测 IIS 应用池状态 CheckAppPools)
private bool mIsClose = false;
-
初始化操作
/// <summary>
/// 初始化列表控件
/// </summary>
private void InitListView()
{
listView1.View = View.Details;
listView1.Columns.Add("应用程序池", 100);
listView1.Columns.Add("进程ID", 100);
listView1.Columns.Add("状态", 120);
listView1.Columns.Add("CPU 使用率(%)", 120);
// 启用 OwnerDraw
listView1.OwnerDraw = true;
listView1.DrawColumnHeader += listView1_DrawColumnHeader;
listView1.DrawSubItem += listView1_DrawSubItem;
}
private void listView1_DrawColumnHeader(object sender, DrawListViewColumnHeaderEventArgs e)
{
e.DrawDefault = true; // 默认绘制列头
}
private void listView1_DrawSubItem(object sender, DrawListViewSubItemEventArgs e)
{
if (e.ColumnIndex == 3) // 第四列是 cpuUsage
{
float cpu;
int mCpuValue = GetCpuThreshold();
if (float.TryParse(e.SubItem.Text, out cpu) && cpu >=mCpuValue) // 大于阈值时标红
{
e.Graphics.DrawString(e.SubItem.Text, listView1.Font, Brushes.Red, e.Bounds);
}
else
{
e.Graphics.DrawString(e.SubItem.Text, listView1.Font, Brushes.Black, e.Bounds);
}
}
else
{
e.DrawDefault = true;
}
}
/// <summary>
/// 启动定时器监控
/// </summary>
private void StartMonitoring()
{
MonitorTimer = new System.Threading.Timer(CheckAppPools, null, 0, MonitorInterval);
}
-
自定义cpu阈值
if (button1.Text == "设置 CPU 阈值")
{
txtCpuThreshold.Visible = true;
txtCpuThreshold.Text = GetCpuThreshold().ToString();
button1.Text = "保存";
label1.Location = new System.Drawing.Point(258, 12);
}
else
{
label1.Location = new System.Drawing.Point(129, 11);
string input = txtCpuThreshold.Text.Trim();
if (int.TryParse(input, out int threshold))
{
string path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "cpu_threshold.txt");
File.WriteAllText(path, threshold.ToString());
txtCpuThreshold.Visible = false;
button1.Text = "设置 CPU 阈值";
label1.Text = "当前CPU阈值为:" + input + "%";
MessageBox.Show($"CPU阈值已设置为 {threshold}%", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
else
{
MessageBox.Show("请输入有效的整数值作为阈值。", "错误", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
-
检测IIS应用程序池
private void CheckAppPools(object state)
{
if (mIsChecking) return;
mIsChecking = true;
try
{
using (ServerManager serverManager = new ServerManager())
{
var mCurrentNames = new HashSet<string>();
foreach (var pool in serverManager.ApplicationPools)
{
string mProcessID = "";//进程ID
string name = pool.Name;
string statusEn = pool.State.ToString(); //应用程序池状态
string statusCn = GetStatusDescription(pool.State);//应用程序池状态中文描述
double cpuUsage = GetCpuUsageByAppPool(name);//获取某个应用程序池的cpu使用率
var wp = pool.WorkerProcesses.FirstOrDefault();
if (wp != null)
{
var process = Process.GetProcessById(wp.ProcessId);
mProcessID=process.Id.ToString();
}
else
{
mProcessID = "NULL";
}
int threshold = GetCpuThreshold();//我们设置的阈值
if (cpuUsage >= threshold)
{
KillAndRestart(pool);//超过了阈值以后直接杀掉然后重启
Log($"[{name}](进程ID:{mProcessID}) CPU 使用率过高 {cpuUsage}%,已杀掉该进程并且重启。状态:{statusCn}",true);
}
//如果是已停止的状态我们通过日志去查是不是崩溃导致的,避免是手动停止的一直重启造成崩溃
if (statusEn == "Stopped")
{
string logSource = "WAS"; // 只筛选管理应用程序池(AppPool)的生命周期,包括启动、停止、崩溃等日志。
string logName = "System"; //系统日志
DateTime checkTime = DateTime.Now.AddMinutes(-5); // 查最近5分钟
bool isCrash = false;
try
{
EventLog eventLog = new EventLog(logName);
foreach (EventLogEntry entry in eventLog.Entries)
{
if (entry.TimeGenerated >= checkTime &&
entry.Source == logSource &&
entry.EntryType == EventLogEntryType.Error &&
entry.Message.Contains(name)) // 判断是否提到目标AppPool
{
isCrash = true;
Log($"[{name}](进程ID:{mProcessID})发现相关崩溃日志,准备尝试自动重启。奔溃日志:" +entry.Message,true);
break;
}
}
}
catch (Exception ex)
{
Log($"[{name}] 检查WAS事件日志失败:{ex.Message}",false);
}
// 重启次数限制(防止死循环)
string restartKey = $"RestartCount_{name}";
int restartCount = RestartDict.ContainsKey(restartKey) ? RestartDict[restartKey] : 0;
int restartLimit = 3;
if (isCrash)
{
if (restartCount < restartLimit)
{
KillAndRestart(pool);
restartCount++;
RestartDict[restartKey] = restartCount;
File.WriteAllText(RestartCountPath, JsonConvert.SerializeObject(RestartDict, Formatting.Indented));
Log($"[{name}] (进程ID:{mProcessID})状态异常,尝试 Kill 并重启。(当前次数:{restartCount})",true);
}
else
{
Log($"[{name}](进程ID:{mProcessID}) 已达重启上限({restartLimit}次),不再重启,防止死循环。",true);
}
}
else
{
Log($"[{name}] (进程ID:{mProcessID})状态为 Stopped,但无崩溃迹象,疑似人工操作,跳过自动重启。", true);
}
}
mCurrentNames.Add(name);
if (!name.Contains("DefaultAppPool") && !name.Contains(".NET v4.5 Classic") && !name.Contains(".NET v4.5"))
{
Invoke(new Action(() =>
{
var existingItem = listView1.Items.Cast<ListViewItem>()
.FirstOrDefault(i => i.Text == name);
if (existingItem != null)
{
existingItem.SubItems[1].Text = mProcessID;
existingItem.SubItems[2].Text = statusEn + "(" + statusCn + ")";
existingItem.SubItems[3].Text = cpuUsage.ToString("F2");
}
else
{
var item = new ListViewItem(new[]
{
name,
mProcessID,
statusEn + "(" + statusCn + ")",
cpuUsage.ToString("F2")
});
listView1.Items.Add(item);
}
}));
}
}
// 移除已不存在的项
Invoke(new Action(() =>
{
for (int i = listView1.Items.Count - 1; i >= 0; i--)
{
string name = listView1.Items[i].Text;
if (!mCurrentNames.Contains(name))
{
listView1.Items.RemoveAt(i);
}
}
}));
}
}
catch (Exception ex)
{
Log("检测IIS应用池出现异常: " + ex.Message,true);
}
finally
{
mIsChecking = false;
}
}
-
获取自定义阈值
private int GetCpuThreshold()
{
try
{
if (File.Exists(CpuConfigPath))
{
string text = File.ReadAllText(CpuConfigPath).Trim();
if (int.TryParse(text, out int value))
return value;
}
}
catch { }
return 70; // 默认值
}
-
获取某个应用程序池的占用率
private double GetCpuUsageByAppPool(string appPoolName)
{
// 排除三个应用程序池名称
if (appPoolName.Contains("DefaultAppPool") ||
appPoolName.Contains(".NET v4.5 Classic") ||
appPoolName.Contains(".NET v4.5"))
{
//Log($"[{appPoolName}] 属于排除监控列表,跳过 CPU 监测。");
return 0;
}
using (ServerManager manager = new ServerManager())
{
var pool = manager.ApplicationPools[appPoolName];
if (pool == null)
{
Log($"[{appPoolName}]该应用池对象为 null,跳过处理。",false);
return 0;
}
var wp = pool.WorkerProcesses.FirstOrDefault();
if (wp == null)
{
Log($"[{appPoolName}]的WorkerProcesses(进程总和)count为0,可能尚未启动,跳过处理。",false);
return 0;
}
try
{
var process = Process.GetProcessById(wp.ProcessId);
// 获取唯一的性能计数器实例名(如 w3wp#1)进程名都是w3wp,所以要取一下ID
string instanceName = GetProcessInstanceName(appPoolName, process.Id);
//Log($"[{appPoolName}] 对应进程名称:{process.ProcessName},PID:{process.Id},性能计数器实例名:{instanceName}");
var counter = new PerformanceCounter("Process", "% Processor Time", instanceName, true);
counter.NextValue(); // 第一次调用为初始化,不可信
Thread.Sleep(2000); // 等待间隔至少 1 秒以上
float rawValue = counter.NextValue();
int cpuCount = Environment.ProcessorCount;
float usage = rawValue / cpuCount;
//Windows 的 "% Processor Time" 是所有 CPU 核心总和,例如 4 核 CPU,一个进程满载可能是 400 %
//所以:如果rawValue = 200(代表该进程用掉了 200 %)
//cpuCount = 4 假设cpu核数是4核
//usage = 200 / 4 =50
//这样才能和设置的阈值(例如80 %)做准确对比。
Log($"[{appPoolName}]CPU使用率:{usage:F2}%(原始值:{rawValue:F2}%,核数:{cpuCount})",false);
return usage;
}
catch (Exception ex)
{
Log($"[{appPoolName}] 获取 CPU 使用率失败:{ex.Message}",false);
return 0;
}
}
}
-
获取性能计数器实例名
private string GetProcessInstanceName(string appPoolName,int pid)
{
try
{
PerformanceCounterCategory category = new PerformanceCounterCategory("Process");
string[] instances = category.GetInstanceNames();
foreach (string instance in instances)
{
using (PerformanceCounter counter = new PerformanceCounter("Process", "ID Process", instance, true))
{
if ((int)counter.RawValue == pid)
{
return instance;
}
}
}
}
catch (Exception ex)
{
Log("["+appPoolName+"]未找到 PID 为 "+pid+" 的性能计数器实例名",false);
}
return "";
}
-
Kill 并重新启动应用池
private void KillAndRestart(ApplicationPool pool)
{
string poolName = pool.Name;
try
{
foreach (var wp in pool.WorkerProcesses)
{
try { Process.GetProcessById(wp.ProcessId).Kill(); } catch { Log("kill并重新启动[" + poolName + "] 池时,没有找到"+ wp.ProcessId,true); }
}
pool.Stop();
Thread.Sleep(1000);
pool.Start();
Log("[" + poolName + "]重启成功!",true);
}
catch (Exception ex)
{
Log("["+poolName+"]重启失败: " + ex.Message,true);
}
}
-
写入日志到 Log 目录,并显示在文本框中
private void Log(string message,bool mIsWriteTxt)
{
try
{
if (mIsWriteTxt)
{
if (!Directory.Exists(LogDirectory))
Directory.CreateDirectory(LogDirectory);
string mFileName = DateTime.Now.ToString("yyyyMMdd") + "Log.txt";
string mFilePath = Path.Combine(LogDirectory, mFileName);
string mLine = $"[{DateTime.Now:yyyy-MM-dd HH:mm:ss}] {message}";
using (StreamWriter writer = new StreamWriter(mFilePath, true))
{
writer.WriteLine(mLine + Environment.NewLine);
}
}
Invoke(new Action(() =>
{
// 判断当前文本框行数是否超过 200 行
int currentLineCount = txtLog.Lines.Length;
if (currentLineCount >= 200)
{
txtLog.Clear(); // 超过行数则清空
}
string mLine = $"[{DateTime.Now:yyyy-MM-dd HH:mm:ss}] {message}";
txtLog.AppendText(mLine + Environment.NewLine);
}));
}
catch { }
}
实际运行效果
-
工具部署在多台服务器的桌面系统上,稳定运行数月。
-
显著减少了手工处理 IIS 挂死的情况,节省了近 80% 的运维时间。
-
在 CPU 高频波动的高峰期,应用池自动回收机制保证了服务稳定不崩溃。
-
日志功能也为问题回溯与性能分析提供了有力支持。