通过python查看GPU信息的工具包: pynvml
前言: 首先讲一下查看系统显卡硬件信息:
lspci | grep -i vga
安装
conda install -c conda-forge pynvml
使用示例
from pynvml import *
nvmlInit()
print("Driver Version:", nvmlSystemGetDriverVersion())
deviceCount = nvmlDeviceGetCount()
for i in range(deviceCount):
handle = nvmlDeviceGetHandleByIndex(i)
meminfo = nvmlDeviceGetMemoryInfo(handle)
print(meminfo.total/1024**2) #总的显存大小
print(meminfo.used/1024**2) #已用显存大小
print(meminfo.free/1024**2) #剩余显存大小
print("Device", i, ":", nvmlDeviceGetName(handle))
nvmlShutdown()
Driver Version: b'470.94'
32508.1875
2106.9375
30401.25
Device 0 : b'Quadro GV100'
32505.0625
59.9375
32445.125
Device 1 : b'Quadro GV100'
from pynvml.smi import nvidia_smi
nvsmi = nvidia_smi.getInstance()
nvsmi.DeviceQuery('memory.free, memory.total')
{'gpu': [{'fb_memory_usage': {'total': 32508.1875,
'free': 30401.25,
'unit': 'MiB'}},
{'fb_memory_usage': {'total': 32505.0625,
'free': 32445.125,
'unit': 'MiB'}}]}
from pynvml.smi import nvidia_smi
nvsmi = nvidia_smi.getInstance()
print(nvsmi.DeviceQuery(), end='\n')
{'timestamp': '2022-02-16', 'driver_version': '470.94', 'count': 2, 'gpu': [{'id': '00000000:17:00.0', 'product_name': 'Quadro GV100', 'product_brand': 'Quadro', 'display_mode': 'Disabled', 'display_active': 'Disabled', 'persistence_mode': 'Disabled', 'mig_mode': {'current_mm': 'Disabled', 'pending_mm': 'Disabled'}, 'accounting_mode': 'Disabled', 'accounting_mode_buffer_size': '4000', 'driver_model': {'current_dm': 'N/A', 'pending_dm': 'N/A'}, 'serial': '1322220060800', 'uuid': 'GPU-a232050c-8d98-b00d-1420-2854c917ba88', 'minor_number': '0', 'vbios_version': '88.00.87.00.04', 'multigpu_board': 'No', 'board_id': '0x1700', 'inforom_version': {'img_version': 'G500.0500.00.05', 'oem_object': '1.1', 'ecc_object': '5.0', 'pwr_object': 'N/A'}, 'gpu_operation_mode': {'current_gom': 'N/A', 'pending_gom': 'N/A'}, 'pci': {'pci_bus': '17', 'pci_device': '00', 'pci_domain': '0000', 'pci_device_id': '1DBA10DE', 'pci_bus_id': '00000000:17:00.0', 'pci_sub_system_id': '121A1028', 'pci_gpu_link_info': {'pcie_gen': {'max_link_gen': '3', 'current_link_gen': '3'}, 'link_widths': {'max_link_width': '16x', 'current_link_width': '16x'}}, 'pci_bridge_chip': {'bridge_chip_type': 'N/A', 'bridge_chip_fw': 'N/A'}, 'replay_counter': '0', 'tx_util': 0, 'tx_util_unit': 'KB/s', 'rx_util': 0, 'rx_util_unit': 'KB/s'}, 'fan_speed': 35, 'fan_speed_unit': '%', 'performance_state': 'P2', 'clocks_throttle': {'clocks_throttle_reason_gpu_idle': 'Active', 'clocks_throttle_reason_applications_clocks_setting': 'Not Active', 'clocks_throttle_reason_sw_power_cap': 'Not Active', 'clocks_throttle_reason_hw_slowdown': 'Not Active', 'clocks_throttle_reason_none': 'N/A'}, 'fb_memory_usage': {'total': 32508.1875, 'used': 2106.9375, 'free': 30401.25, 'unit': 'MiB'}, 'bar1_memory_usage': {'total': 256.0, 'used': 9.0, 'free': 247.0, 'unit': 'MiB'}, 'compute_mode': 'Default', 'utilization': {'gpu_util': 0, 'memory_util': 0, 'encoder_util': 0, 'decoder_util': 0, 'unit': '%'}, 'ecc_mode': {'current_ecc': 'Disabled', 'pending_ecc': 'Disabled'}, 'ecc_errors': {'volatile': {'single_bit': {'device_memory': 'N/A', 'dram': 'N/A', 'register_file': 'N/A', 'l1_cache': 'N/A', 'l2_cache': 'N/A', 'texture_memory': 'N/A', 'cbu': 'N/A', 'sram': 'N/A', 'total': 'N/A'}, 'double_bit': {'device_memory': 'N/A', 'dram': 'N/A', 'register_file': 'N/A', 'l1_cache': 'N/A', 'l2_cache': 'N/A', 'texture_memory': 'N/A', 'cbu': 'N/A', 'sram': 'N/A', 'total': 'N/A'}}, 'aggregate': {'single_bit': {'device_memory': 'N/A', 'dram': 'N/A', 'register_file': 'N/A', 'l1_cache': 'N/A', 'l2_cache': 'N/A', 'texture_memory': 'N/A', 'cbu': 'N/A', 'sram': 'N/A', 'total': 'N/A'}, 'double_bit': {'device_memory': 'N/A', 'dram': 'N/A', 'register_file': 'N/A', 'l1_cache': 'N/A', 'l2_cache': 'N/A', 'texture_memory': 'N/A', 'cbu': 'N/A', 'sram': 'N/A', 'total': 'N/A'}}}, 'retired_pages': {'multiple_single_bit_retirement': None, 'double_bit_retirement': None, 'pending_retirement': 'No'}, 'temperature': {'gpu_temp': 43, 'gpu_temp_max_threshold': 90, 'gpu_temp_slow_threshold': 88, 'unit': 'C'}, 'power_readings': {'power_management': 'Supported', 'power_draw': 38.385, 'power_limit': 250.0, 'default_power_limit': 250.0, 'enforced_power_limit': 250.0, 'min_power_limit': 100.0, 'max_power_limit': 250.0, 'power_state': 'P2', 'unit': 'W'}, 'clocks': {'graphics_clock': 135, 'sm_clock': 135, 'mem_clock': 850, 'unit': 'MHz'}, 'applications_clocks': {'graphics_clock': 1132, 'mem_clock': 850, 'unit': 'MHz'}, 'default_applications_clocks': {'graphics_clock': 1132, 'mem_clock': 850, 'unit': 'MHz'}, 'max_clocks': {'graphics_clock': 1912, 'sm_clock': 1912, 'mem_clock': 850, 'unit': 'MHz'}, 'clock_policy': {'auto_boost': 'N/A', 'auto_boost_default': 'N/A'}, 'supported_clocks': [{'current': 850, 'unit': 'MHz', 'supported_graphics_clock': [1912, 1905, 1897, 1890, 1882, 1875, 1867, 1860, 1852, 1845, 1837, 1830, 1822, 1815, 1807, 1800, 1792, 1785, 1777, 1770, 1762, 1755, 1747, 1740, 1732, 1725, 1717, 1710, 1702, 1695, 1687, 1680, 1672, 1665, 1657, 1650, 1642, 1635, 1627, 1620, 1612, 1605, 1597, 1590, 1582, 1575, 1567, 1560, 1552, 1545, 1537, 1530, 1522, 1515, 1507, 1500, 1492, 1485, 1477, 1470, 1462, 1455, 1447, 1440, 1432, 1425, 1417, 1410, 1402, 1395, 1387, 1380, 1372, 1365, 1357, 1350, 1342, 1335, 1327, 1320, 1312, 1305, 1297, 1290, 1282, 1275, 1267, 1260, 1252, 1245, 1237, 1230, 1222, 1215, 1207, 1200, 1192, 1185, 1177, 1170, 1162, 1155, 1147, 1140, 1132, 1125, 1117, 1110, 1102, 1095, 1087, 1080, 1072, 1065, 1057, 1050, 1042, 1035, 1027, 1020, 1012, 1005, 997, 990, 982, 975, 967, 960, 952, 945, 937, 930, 922, 915, 907, 900, 892, 885, 877, 870, 862, 855, 847, 840, 832, 825, 817, 810, 802, 795, 787, 780, 772, 765, 757, 750, 742, 735, 727, 720, 712, 705, 697, 690, 682, 675, 667, 660, 652, 645, 637, 630, 622, 615, 607, 600, 592, 585, 577, 570, 562, 555, 547, 540, 532, 525, 517, 510, 502, 495, 487, 480, 472, 465, 457, 450, 442, 435, 427, 420, 412, 405, 397, 390, 382, 375, 367, 360, 352, 345, 337, 330, 322, 315, 307, 300, 292, 285, 277, 270, 262, 255, 247, 240, 232, 225, 217, 210, 202, 195, 187, 180, 172, 165, 157, 150, 142, 135]}, {'current': 850, 'unit': 'MHz', 'supported_graphics_clock': [1912, 1905, 1897, 1890, 1882, 1875, 1867, 1860, 1852, 1845, 1837, 1830, 1822, 1815, 1807, 1800, 1792, 1785, 1777, 1770, 1762, 1755, 1747, 1740, 1732, 1725, 1717, 1710, 1702, 1695, 1687, 1680, 1672, 1665, 1657, 1650, 1642, 1635, 1627, 1620, 1612, 1605, 1597, 1590, 1582, 1575, 1567, 1560, 1552, 1545, 1537, 1530, 1522, 1515, 1507, 1500, 1492, 1485, 1477, 1470, 1462, 1455, 1447, 1440, 1432, 1425, 1417, 1410, 1402, 1395, 1387, 1380, 1372, 1365, 1357, 1350, 1342, 1335, 1327, 1320, 1312, 1305, 1297, 1290, 1282, 1275, 1267, 1260, 1252, 1245, 1237, 1230, 1222, 1215, 1207, 1200, 1192, 1185, 1177, 1170, 1162, 1155, 1147, 1140, 1132, 1125, 1117, 1110, 1102, 1095, 1087, 1080, 1072, 1065, 1057, 1050, 1042, 1035, 1027, 1020, 1012, 1005, 997, 990, 982, 975, 967, 960, 952, 945, 937, 930, 922, 915, 907, 900, 892, 885, 877, 870, 862, 855, 847, 840, 832, 825, 817, 810, 802, 795, 787, 780, 772, 765, 757, 750, 742, 735, 727, 720, 712, 705, 697, 690, 682, 675, 667, 660, 652, 645, 637, 630, 622, 615, 607, 600, 592, 585, 577, 570, 562, 555, 547, 540, 532, 525, 517, 510, 502, 495, 487, 480, 472, 465, 457, 450, 442, 435, 427, 420, 412, 405, 397, 390, 382, 375, 367, 360, 352, 345, 337, 330, 322, 315, 307, 300, 292, 285, 277, 270, 262, 255, 247, 240, 232, 225, 217, 210, 202, 195, 187, 180, 172, 165, 157, 150, 142, 135]}], 'processes': [{'pid': 1988011, 'process_name': '/home/musk/anaconda3/envs/HARedit/bin/python', 'used_memory': 1049, 'unit': 'MiB'}, {'pid': 2520091, 'process_name': '/home/musk/anaconda3/envs/HARedit/bin/python', 'used_memory': 1049, 'unit': 'MiB'}], 'accounted_processes': None}, {'id': '00000000:B3:00.0', 'product_name': 'Quadro GV100', 'product_brand': 'Quadro', 'display_mode': 'Enabled', 'display_active': 'Disabled', 'persistence_mode': 'Disabled', 'mig_mode': {'current_mm': 'Disabled', 'pending_mm': 'Disabled'}, 'accounting_mode': 'Disabled', 'accounting_mode_buffer_size': '4000', 'driver_model': {'current_dm': 'N/A', 'pending_dm': 'N/A'}, 'serial': '1322220060880', 'uuid': 'GPU-67afe83f-8644-a878-d6fb-8eda0d093f5a', 'minor_number': '1', 'vbios_version': '88.00.87.00.04', 'multigpu_board': 'No', 'board_id': '0xb300', 'inforom_version': {'img_version': 'G500.0500.00.05', 'oem_object': '1.1', 'ecc_object': '5.0', 'pwr_object': 'N/A'}, 'gpu_operation_mode': {'current_gom': 'N/A', 'pending_gom': 'N/A'}, 'pci': {'pci_bus': 'B3', 'pci_device': '00', 'pci_domain': '0000', 'pci_device_id': '1DBA10DE', 'pci_bus_id': '00000000:B3:00.0', 'pci_sub_system_id': '121A1028', 'pci_gpu_link_info': {'pcie_gen': {'max_link_gen': '3', 'current_link_gen': '3'}, 'link_widths': {'max_link_width': '16x', 'current_link_width': '16x'}}, 'pci_bridge_chip': {'bridge_chip_type': 'N/A', 'bridge_chip_fw': 'N/A'}, 'replay_counter': '0', 'tx_util': 0, 'tx_util_unit': 'KB/s', 'rx_util': 0, 'rx_util_unit': 'KB/s'}, 'fan_speed': 35, 'fan_speed_unit': '%', 'performance_state': 'P2', 'clocks_throttle': {'clocks_throttle_reason_gpu_idle': 'Active', 'clocks_throttle_reason_applications_clocks_setting': 'Not Active', 'clocks_throttle_reason_sw_power_cap': 'Not Active', 'clocks_throttle_reason_hw_slowdown': 'Not Active', 'clocks_throttle_reason_none': 'N/A'}, 'fb_memory_usage': {'total': 32505.0625, 'used': 59.9375, 'free': 32445.125, 'unit': 'MiB'}, 'bar1_memory_usage': {'total': 256.0, 'used': 4.875, 'free': 251.125, 'unit': 'MiB'}, 'compute_mode': 'Default', 'utilization': {'gpu_util': 0, 'memory_util': 0, 'encoder_util': 0, 'decoder_util': 0, 'unit': '%'}, 'ecc_mode': {'current_ecc': 'Disabled', 'pending_ecc': 'Disabled'}, 'ecc_errors': {'volatile': {'single_bit': {'device_memory': 'N/A', 'dram': 'N/A', 'register_file': 'N/A', 'l1_cache': 'N/A', 'l2_cache': 'N/A', 'texture_memory': 'N/A', 'cbu': 'N/A', 'sram': 'N/A', 'total': 'N/A'}, 'double_bit': {'device_memory': 'N/A', 'dram': 'N/A', 'register_file': 'N/A', 'l1_cache': 'N/A', 'l2_cache': 'N/A', 'texture_memory': 'N/A', 'cbu': 'N/A', 'sram': 'N/A', 'total': 'N/A'}}, 'aggregate': {'single_bit': {'device_memory': 'N/A', 'dram': 'N/A', 'register_file': 'N/A', 'l1_cache': 'N/A', 'l2_cache': 'N/A', 'texture_memory': 'N/A', 'cbu': 'N/A', 'sram': 'N/A', 'total': 'N/A'}, 'double_bit': {'device_memory': 'N/A', 'dram': 'N/A', 'register_file': 'N/A', 'l1_cache': 'N/A', 'l2_cache': 'N/A', 'texture_memory': 'N/A', 'cbu': 'N/A', 'sram': 'N/A', 'total': 'N/A'}}}, 'retired_pages': {'multiple_single_bit_retirement': None, 'double_bit_retirement': None, 'pending_retirement': 'No'}, 'temperature': {'gpu_temp': 48, 'gpu_temp_max_threshold': 90, 'gpu_temp_slow_threshold': 88, 'unit': 'C'}, 'power_readings': {'power_management': 'Supported', 'power_draw': 38.971, 'power_limit': 250.0, 'default_power_limit': 250.0, 'enforced_power_limit': 250.0, 'min_power_limit': 100.0, 'max_power_limit': 250.0, 'power_state': 'P2', 'unit': 'W'}, 'clocks': {'graphics_clock': 135, 'sm_clock': 135, 'mem_clock': 850, 'unit': 'MHz'}, 'applications_clocks': {'graphics_clock': 1132, 'mem_clock': 850, 'unit': 'MHz'}, 'default_applications_clocks': {'graphics_clock': 1132, 'mem_clock': 850, 'unit': 'MHz'}, 'max_clocks': {'graphics_clock': 1912, 'sm_clock': 1912, 'mem_clock': 850, 'unit': 'MHz'}, 'clock_policy': {'auto_boost': 'N/A', 'auto_boost_default': 'N/A'}, 'supported_clocks': [{'current': 850, 'unit': 'MHz', 'supported_graphics_clock': [1912, 1905, 1897, 1890, 1882, 1875, 1867, 1860, 1852, 1845, 1837, 1830, 1822, 1815, 1807, 1800, 1792, 1785, 1777, 1770, 1762, 1755, 1747, 1740, 1732, 1725, 1717, 1710, 1702, 1695, 1687, 1680, 1672, 1665, 1657, 1650, 1642, 1635, 1627, 1620, 1612, 1605, 1597, 1590, 1582, 1575, 1567, 1560, 1552, 1545, 1537, 1530, 1522, 1515, 1507, 1500, 1492, 1485, 1477, 1470, 1462, 1455, 1447, 1440, 1432, 1425, 1417, 1410, 1402, 1395, 1387, 1380, 1372, 1365, 1357, 1350, 1342, 1335, 1327, 1320, 1312, 1305, 1297, 1290, 1282, 1275, 1267, 1260, 1252, 1245, 1237, 1230, 1222, 1215, 1207, 1200, 1192, 1185, 1177, 1170, 1162, 1155, 1147, 1140, 1132, 1125, 1117, 1110, 1102, 1095, 1087, 1080, 1072, 1065, 1057, 1050, 1042, 1035, 1027, 1020, 1012, 1005, 997, 990, 982, 975, 967, 960, 952, 945, 937, 930, 922, 915, 907, 900, 892, 885, 877, 870, 862, 855, 847, 840, 832, 825, 817, 810, 802, 795, 787, 780, 772, 765, 757, 750, 742, 735, 727, 720, 712, 705, 697, 690, 682, 675, 667, 660, 652, 645, 637, 630, 622, 615, 607, 600, 592, 585, 577, 570, 562, 555, 547, 540, 532, 525, 517, 510, 502, 495, 487, 480, 472, 465, 457, 450, 442, 435, 427, 420, 412, 405, 397, 390, 382, 375, 367, 360, 352, 345, 337, 330, 322, 315, 307, 300, 292, 285, 277, 270, 262, 255, 247, 240, 232, 225, 217, 210, 202, 195, 187, 180, 172, 165, 157, 150, 142, 135]}, {'current': 850, 'unit': 'MHz', 'supported_graphics_clock': [1912, 1905, 1897, 1890, 1882, 1875, 1867, 1860, 1852, 1845, 1837, 1830, 1822, 1815, 1807, 1800, 1792, 1785, 1777, 1770, 1762, 1755, 1747, 1740, 1732, 1725, 1717, 1710, 1702, 1695, 1687, 1680, 1672, 1665, 1657, 1650, 1642, 1635, 1627, 1620, 1612, 1605, 1597, 1590, 1582, 1575, 1567, 1560, 1552, 1545, 1537, 1530, 1522, 1515, 1507, 1500, 1492, 1485, 1477, 1470, 1462, 1455, 1447, 1440, 1432, 1425, 1417, 1410, 1402, 1395, 1387, 1380, 1372, 1365, 1357, 1350, 1342, 1335, 1327, 1320, 1312, 1305, 1297, 1290, 1282, 1275, 1267, 1260, 1252, 1245, 1237, 1230, 1222, 1215, 1207, 1200, 1192, 1185, 1177, 1170, 1162, 1155, 1147, 1140, 1132, 1125, 1117, 1110, 1102, 1095, 1087, 1080, 1072, 1065, 1057, 1050, 1042, 1035, 1027, 1020, 1012, 1005, 997, 990, 982, 975, 967, 960, 952, 945, 937, 930, 922, 915, 907, 900, 892, 885, 877, 870, 862, 855, 847, 840, 832, 825, 817, 810, 802, 795, 787, 780, 772, 765, 757, 750, 742, 735, 727, 720, 712, 705, 697, 690, 682, 675, 667, 660, 652, 645, 637, 630, 622, 615, 607, 600, 592, 585, 577, 570, 562, 555, 547, 540, 532, 525, 517, 510, 502, 495, 487, 480, 472, 465, 457, 450, 442, 435, 427, 420, 412, 405, 397, 390, 382, 375, 367, 360, 352, 345, 337, 330, 322, 315, 307, 300, 292, 285, 277, 270, 262, 255, 247, 240, 232, 225, 217, 210, 202, 195, 187, 180, 172, 165, 157, 150, 142, 135]}], 'processes': None, 'accounted_processes': None}]}
封装个函数
import numpy as np
def getAvailableId(type="min"):
"""
返回可用的 GPU ID
Args:
type: sequence, min,
Returns:
"""
import pynvml
pynvml.nvmlInit()
deviceCount = pynvml.nvmlDeviceGetCount()
current_gpu_unit_use = []
for id in range(deviceCount):
handle = pynvml.nvmlDeviceGetHandleByIndex(id)
use = pynvml.nvmlDeviceGetUtilizationRates(handle)
if use.memory < 80: # 首先保证有可用内存, 然后选择运行着比较小计算量的GPU
if use.gpu < 90:
current_gpu_unit_use.append(use.gpu)
pynvml.nvmlShutdown()
print("GPU used: {}".format(current_gpu_unit_use))
if current_gpu_unit_use == []:
return str(-1)
else:
return str(np.argmin(current_gpu_unit_use))
reference
@online{BibEntry2022Feb,
title = {{Pynvml :: Anaconda.org}},
year = {2022},
month = {2},
date = {2022-02-16},
urldate = {2022-02-16},
language = {english},
hyphenation = {english},
note = {[Online; accessed 16. Feb. 2022]},
url = {https://anaconda.org/conda-forge/pynvml}
}