不多BB
import time
from nvitop import Device, GpuProcess, NA, colored
from datetime import datetime
def printAndLog(msg):
print(msg)
logFile.write(str(msg)+"\n")
devices = Device.cuda.all() # or `Device.all()` to use NVML ordinal instead
separator = False
## log file
logFileName = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
logFile = open(logFileName, "w")
printAndLog("Monitoring task initiation....Powered by nvitop!")
printAndLog("Start time : " + str(datetime.now()))
## target processes's pids
pids = [1220722, 1123911]
tips = 0
printAndLog(f'----- This task will monitor ({len(pids)}) processes -----')
# fmt = ' {pid:<5} {username:<8} {cpu:>5} {host_memory:>8} {time:>8} {gpu_memory:>8} {sm:>3} {command:<}'.format
# print(colored(fmt(pid='PID', username='USERNAME',
# cpu='CPU%', host_memory='HOST-MEM', time='TIME',
# gpu_memory='GPU-MEM', sm='SM%',
# command='COMMAND'),
# attrs=('bold',)))
times = 1
while (True):
printAndLog(f' No.{times} -- {datetime.now()}')
times +=1
for device in devices:
processes = device.processes() # type: Dict[int, GpuProcess]
# if tips < len(devices):
# print(colored(str(device), color='green', attrs=('bold',)))
# print(colored(' - Temperature: ', color='blue', attrs=('bold',)) + f'{device.temperature()}C')
# print(colored(' - Total memory: ', color='blue', attrs=('bold',)) + f'{device.memory_total_human()}')
# tips += 1
target_processes = {}
## filter the pids
for pid in pids:
if pid in processes:
target_processes[pid] = processes[pid]
## start monitor
if len(target_processes) > 0:
target_processes = GpuProcess.take_snapshots(target_processes.values(), failsafe=True)
target_processes.sort(key=lambda process: (process.username, process.pid))
## 1s -- always
for snapshot in target_processes:
# print(fmt(pid=snapshot.pid,
# username=snapshot.username[:7] + ('+' if len(snapshot.username) > 8 else snapshot.username[7:8]),
# cpu=snapshot.cpu_percent, host_memory=snapshot.host_memory_human,
# time=snapshot.running_time_human,
# gpu_memory=(snapshot.gpu_memory_human if snapshot.gpu_memory_human is not NA else 'WDDM:N/A'),
# sm=snapshot.gpu_sm_utilization,
# command=snapshot.command))
infos = {
'pid' : snapshot.pid,
'username' : snapshot.username[:7] + ('+' if len(snapshot.username) > 8 else snapshot.username[7:8]),
'cpu' : snapshot.cpu_percent,
'memory' : snapshot.host_memory_human,
'running_time' : snapshot.running_time_human,
'gpu' : (snapshot.gpu_memory_human if snapshot.gpu_memory_human is not NA else 'WDDM:N/A'),
'sm' : snapshot.gpu_sm_utilization,
'command' : snapshot.command
}
printAndLog(infos)
else:
printAndLog(colored(' - No Running Processes', attrs=('bold',)))
# if separator:
# print('-' * 120)
# separator = True
time.sleep(1)