# coding=utf-8
# @Author: Qifeng Wen
# @Contact: 15399905897@163.com
# @File: get_hardware_info.py
# @Time: 2019/08/05 17:10:00
# @Software: PyCharm
from pynvml import *
import psutil
import socket
import re
import subprocess
def get_time(func):
import time
def inner(*args, **kwargs):
t1 = time.time()
ret = func(*args, **kwargs)
t2 = time.time()
print(t2 - t1)
return ret
return inner
class Utils(object):
__instance = None # 定义一个类属性做判断
def __new__(cls):# 单例模式
if cls.__instance == None:
# 如果__instance为空证明是第一次创建实例
# 通过父类的__new__(cls)创建实例
cls.__instance = object.__new__(cls)
return cls.__instance
else:
# 返回上一个对象的引用
return cls.__instance
__unit_dict = {
'0': 'B',
'1': 'K',
'2': 'M',
'3': 'G',
'4': 'T',
}
# @staticmethod
# @get_time
# def get_gpu_info(): # 耗时较长,丢弃
# '''
# 返回gpu型号,数量,已经占用 情况
# :return:
# '''
#
# gpu_info_dic = dict()
# nvmlInit()
#
# # 显卡数
# gpu_info_dic['num'] = nvmlDeviceGetCount()
#
# # 显卡型号
# gpu_info_dic['model'] = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(0)).decode()
#
#
# # 用来记录显卡相关信息
# use_list = list()
# t1 = time.time()
#
# # 显卡温度
# gpu_temp_list = subprocess.getstatusoutput(
# "nvidia-smi -q | grep 'GPU Current Temp' | cut -d' ' -f 24"
# )[-1].split('\n')
#
# t2 = time.time()
#
# # 显卡风扇转速
# fan_speed_list = subprocess.getstatusoutput("nvidia-smi -q | grep 'Fan' | cut -d' ' -f 30")[-1].split('\n')
#
# # GPU利用率
# gpu_utilization_rate_list = subprocess.getstatusoutput(
# "nvidia-smi -q | grep 'Gpu' | cut -d':' -f 2 | cut -d' ' -f 2"
# )[-1].split('\n')
# print(t2 - t1)
# # 用来计算显卡平均占用率
# used_sum, total_sum = 0, 0
# for i in range(gpu_info_dic['num']):
# handle = nvmlDeviceGetHandleByIndex(i) # 0表示第一块显卡
# memory_info = nvmlDeviceGetMemoryInfo(handle)
# used, total = memory_info.used / 1024 ** 2, memory_info.total / 1024 ** 2
# used_sum += used
# total_sum += total
#
# data = {
# 'used': used, # 显存使用量
# 'total': total, # 显存大小
# 'temp': gpu_temp_list[i], # 显卡温度
# 'fan_speed': fan_speed_list[i], # 风扇转速
# 'utilization_rate': gpu_utilization_rate_list[i], # gpu利用率
# }
#
# use_list.append(data) # 第二块显卡总的显存大小
# gpu_info_dic['use_detail'] = use_list
#
# gpu_info_dic['average_use_rate'] = '%.2f%s' % ((used_sum / total_sum) * 100, '%')
# gpu_info_dic['average_utilization_rate'] = '%.f%s' % (
# (sum(map(float, gpu_utilization_rate_list)) // len(gpu_utilization_rate_list)), '%')
# print(sum(map(float, gpu_utilization_rate_list)) // len(gpu_utilization_rate_list))
# gpu_temp_max = max(map(float, gpu_temp_list))
# gpu_fan_speed_max = max(map(float, fan_speed_list))
# gpu_info_dic['temp_max'] = '%.2f' % gpu_temp_max
# gpu_info_dic['fan_speed_max'] = '%.2f' % gpu_fan_speed_max
#
#
#
# return gpu_info_dic
@staticmethod
@get_time
def get_gpu_info():
'''
返回gpu型号,数量,已经占用 情况
:return:
'''
gpu_info_dic = dict()
nvmlInit()
# 显卡数
gpu_info_dic['num'] = nvmlDeviceGetCount()
# 显卡型号
gpu_info_dic['model'] = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(0)).decode()
# 用来记录显卡相关信息
use_list = list()
gpu_info_str = subprocess.getstatusoutput("nvidia-smi -q")[-1]
# 显卡温度
current_temp_list = re.findall(r'GPU Current Temp.*?: (\d+\.?\d*) C', gpu_info_str) # 当前温度
shutdown_temp_list = re.findall(r'GPU Slowdown Temp.*?: (\d+\.?\d*) C', gpu_info_str) # gpu计算减速温度值
slowdown_temp_list = re.findall(r'GPU Shutdown Temp.*?: (\d+\.?\d*) C', gpu_info_str) # 关机温度值
# 显卡风扇转速
fan_speed_list = re.findall(r'.*Fan Speed.*?: (\d+\.?\d*) %', gpu_info_str)
# 电源功率
power_draw_list = re.findall(r'Power Draw.*?: (\d+\.?\d*) W', gpu_info_str) # 消耗功率
power_limit_list = re.findall(r'Power Limit.*?: (\d+\.?\d*) W.*?Default Power', gpu_info_str, re.S) # 限制功率
# GPU利用率
gpu_utilization_rate_list = re.findall(r'.*Gpu.*?: (\d+\.?\d*) %', gpu_info_str)
# 用来计算显卡平均占用率
used_sum, total_sum = 0, 0
for i in range(gpu_info_dic['num']):
handle = nvmlDeviceGetHandleByIndex(i) # 0表示第一块显卡
memory_info = nvmlDeviceGetMemoryInfo(handle)
used, total = memory_info.used / 1024 ** 2, memory_info.total / 1024 ** 2
used_sum += used
total_sum += total
data = {
'used': used, # 显存使用量
'total': total, # 显存大小
'temp': current_temp_list[i], # 显卡当前温度
'slowdown_temp': slowdown_temp_list[i], # gpu计算减速温度值
'shutdown_temp': shutdown_temp_list[i], # 关机温度值
'fan_speed': fan_speed_list[i], # 风扇转速
'power_draw': power_draw_list[i], # 消耗功率
'power_limit': power_limit_list[i], # 限制功率
'utilization_rate': gpu_utilization_rate_list[i], # gpu利用率
}
use_list.append(data) # 第二块显卡总的显存大小
gpu_info_dic['use_detail'] = use_list
gpu_info_dic['slowdown_temp'] = slowdown_temp_list[0]
gpu_info_dic['shutdown_temp'] = shutdown_temp_list[0]
gpu_info_dic['average_use_rate'] = '%.2f%s' % ((used_sum / total_sum) * 100, '%')
gpu_info_dic['average_utilization_rate'] = '%.f%s' % (
(sum(map(float, gpu_utilization_rate_list)) // len(gpu_utilization_rate_list)), '%')
gpu_temp_max = max(map(float, current_temp_list))
gpu_fan_speed_max = max(map(float, fan_speed_list))
gpu_info_dic['temp_max'] = '%.2f' % gpu_temp_max
gpu_info_dic['fan_speed_max'] = '%.2f' % gpu_fan_speed_max
return gpu_info_dic
@staticmethod
@get_time
def get_cpu_info():
cpu_data = dict()
with open("/proc/cpuinfo") as f:
cpu_model = [val for val in f.readlines() if 'model name' in val][0].split(': ')[-1].strip('\n')
cpu_data['model'] = cpu_model
# cpu厂商
cpu_producer = cpu_model.split(' ')[0]
if cpu_producer == 'Intel(R)':
cpu_data['coretemp_current'] = psutil.sensors_temperatures()['coretemp'][0].current
cpu_data['coretemp_high'] = psutil.sensors_temperatures()['coretemp'][0].high
elif cpu_producer == 'AMD':
cpu_data['coretemp_current'] = psutil.sensors_temperatures()['k10temp'][0].current
cpu_data['coretemp_high'] = psutil.sensors_temperatures()['k10temp'][0].high
return cpu_data
@staticmethod
@get_time
def get_host_ip():
"""
get localhost ip address
:return:
"""
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(('8.8.8.8', 80))
ip = s.getsockname()[0]
s.close()
return ip
except:
return None
@staticmethod
@get_time
def get_memory_info():
'''
获取内存部分信息
:return:
'''
memory_info = psutil.virtual_memory()
memory_data = dict()
memory_data['size'] = Utils.__get_unit(memory_info.total)
memory_data['available'] = Utils.__get_unit(memory_info.available)
memory_data['usage'] = '%.2f%s' % (memory_info.percent, '%')
return memory_data
@staticmethod
def __get_unit(num, index=0):
if num < 1024:
return '%.2f %s' % (num, Utils.__unit_dict[str(index)])
else:
return Utils.__get_unit(num / 1024, index + 1)
@staticmethod
@get_time
def get_disk_info(path='/home/'):
'''
获取传入路径下的硬盘信息
:param path:
:return:
'''
disk_info = psutil.disk_usage(path)
disk_data = dict()
disk_data['size'] = Utils.__get_unit(disk_info.total)
disk_data['available'] = Utils.__get_unit(disk_info.free)
disk_data['usage_rate'] = '%.2f%s' % (disk_info.percent, '%')
return disk_data
@staticmethod
@get_time
def get_user_use_of_gpu_details():
'''
获取当前主机用户使用显卡详情
:return:
'''
info_list = [val.split(' ')[1:] for val in
subprocess.getstatusoutput("gpustat -cu |cut -d'|' -f 4- ")[-1].split('\n')[1:]]
data = dict()
for index, info in enumerate(info_list):
user_use_details_dict = dict()
for val in info:
ret = re.search(r'(.*)\((.*)\)', val)
user_and_command, size = ret.groups()
size_num = int(size[:-1])
if size_num < 800:
continue
if user_and_command in user_use_details_dict:
user_use_details_dict[user_and_command] += size_num
else:
user_use_details_dict[user_and_command] = size_num
data[str(index)] = user_use_details_dict
return data
def return_storage_server_data():
data = dict()
data['disk'] = Utils.get_disk_info()
data['host'] = {'ip': Utils.get_host_ip()}
return data
def return_calculate_server_data():
data = dict()
t1 = time.time()
data['gpu'] = Utils.get_gpu_info()
data['cpu'] = Utils.get_cpu_info()
data['memory'] = Utils.get_memory_info()
data['host'] = {
'ip': Utils.get_host_ip(),
'user_use_details': Utils.get_user_use_of_gpu_details(),
}
t2 = time.time()
print(t2-t1)
return data
def test():
time.sleep(2)
return 'Hello'
if __name__ == '__main__':
import time
print(return_calculate_server_data())
python3获取硬件数据
最新推荐文章于 2024-03-06 15:55:53 发布