nvitop监管指定线程

不多BB

import time
from nvitop import Device, GpuProcess, NA, colored
from datetime import datetime

def printAndLog(msg):
    print(msg)
    logFile.write(str(msg)+"\n")

devices = Device.cuda.all()  # or `Device.all()` to use NVML ordinal instead
separator = False
## log file
logFileName = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
logFile = open(logFileName, "w")

printAndLog("Monitoring task initiation....Powered by nvitop!")
printAndLog("Start time : " + str(datetime.now()))



## target processes's pids
pids = [1220722, 1123911]
tips = 0

printAndLog(f'-----     This task will monitor ({len(pids)}) processes     -----')
# fmt = '    {pid:<5}  {username:<8} {cpu:>5}  {host_memory:>8} {time:>8}  {gpu_memory:>8}  {sm:>3}  {command:<}'.format
# print(colored(fmt(pid='PID', username='USERNAME',
#                   cpu='CPU%', host_memory='HOST-MEM', time='TIME',
#                   gpu_memory='GPU-MEM', sm='SM%',
#                   command='COMMAND'),
#               attrs=('bold',)))
times = 1
while (True):
    printAndLog(f' No.{times}  --  {datetime.now()}')
    times +=1
    for device in devices:
        processes = device.processes()  # type: Dict[int, GpuProcess]
        # if tips < len(devices):
        #     print(colored(str(device), color='green', attrs=('bold',)))
        #     print(colored('  - Temperature:     ', color='blue', attrs=('bold',)) + f'{device.temperature()}C')
        #     print(colored('  - Total memory:    ', color='blue', attrs=('bold',)) + f'{device.memory_total_human()}')
        #     tips += 1
        target_processes = {}
        ## filter the pids
        for pid in pids:
            if pid in processes:
                target_processes[pid] = processes[pid]
        ## start monitor
        if len(target_processes) > 0:
            target_processes = GpuProcess.take_snapshots(target_processes.values(), failsafe=True)
            target_processes.sort(key=lambda process: (process.username, process.pid))
            ## 1s -- always
            for snapshot in target_processes:
                # print(fmt(pid=snapshot.pid,
                #           username=snapshot.username[:7] + ('+' if len(snapshot.username) > 8 else snapshot.username[7:8]),
                #           cpu=snapshot.cpu_percent, host_memory=snapshot.host_memory_human,
                #           time=snapshot.running_time_human,
                #           gpu_memory=(snapshot.gpu_memory_human if snapshot.gpu_memory_human is not NA else 'WDDM:N/A'),
                #           sm=snapshot.gpu_sm_utilization,
                #           command=snapshot.command))
                infos = {
                    'pid' : snapshot.pid,
                    'username' : snapshot.username[:7] + ('+' if len(snapshot.username) > 8 else snapshot.username[7:8]),
                    'cpu' : snapshot.cpu_percent,
                    'memory' : snapshot.host_memory_human,
                    'running_time' : snapshot.running_time_human,
                    'gpu' : (snapshot.gpu_memory_human if snapshot.gpu_memory_human is not NA else 'WDDM:N/A'),
                    'sm' : snapshot.gpu_sm_utilization,
                    'command' : snapshot.command
                }
                printAndLog(infos)
        else:
            printAndLog(colored('  - No Running Processes', attrs=('bold',)))

        # if separator:
        #     print('-' * 120)
        # separator = True
    time.sleep(1)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值