import json import pynvml import time import psutil def main(): UNIT = 1024 * 1024 * 1024 pynvml.nvmlInit() gpuDeviceCount = pynvml.nvmlDeviceGetCount() start_time = time.time() while time.time() - start_time < 3600 *24: # print(time.time() - start_time) all_gpu_status = [] for i in range(gpuDeviceCount): handle = pynvml.nvmlDeviceGetHandleByIndex(i) memoryInfo = pynvml.nvmlDeviceGetMemoryInfo(handle) all_gpu_status.append(dict( device = i, total_mem_GB = memoryInfo.total/UNIT, used_mem_GB = memoryInfo.used/UNIT, powerusage_W = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000 )) all_processes_status = [] pidAllInfo = pynvml.nvmlDeviceGetComputeRunningProcesses(handle) for pidInfo in pidAllInfo: pidUser = psutil.Process(pidInfo.pid).username() pidCreateTime = psutil.Process(pidInfo.pid).create_time() pidName = psutil.Process(pidInfo.pid).name() all_processes_status.append(dict( pid = pidInfo.pid, create_time = pidCreateTime, name = pidName, user = pidUser, used_mem_GB = pidInfo.usedGpuMemory/UNIT )) logs = dict( cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), all_gpu_status = all_gpu_status, all_processes_status = all_processes_status ) with open("./results/gpu_status/gpu_status_0825.json", "a", encoding="utf-8") as f: f.write(json.dumps(logs) + "\n") print(logs) time.sleep(60) pynvml.nvmlShutdown() if __name__ == "__main__": main()