From 91db09b6c753d0f0cea34171b66361c03f7046af Mon Sep 17 00:00:00 2001
From: wql <wengqinlan18@163.com>
Date: Wed, 4 Sep 2024 16:15:17 +0800
Subject: [PATCH] feat: add gpu_status.py

---
 gpu_status.py | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 gpu_status.py

diff --git a/gpu_status.py b/gpu_status.py
new file mode 100644
index 00000000..28df947c
--- /dev/null
+++ b/gpu_status.py
@@ -0,0 +1,56 @@
+import json
+import pynvml 
+import time
+import psutil
+
+
+def main():
+    UNIT = 1024 * 1024 * 1024
+
+    pynvml.nvmlInit()
+    gpuDeviceCount = pynvml.nvmlDeviceGetCount()
+    start_time = time.time()
+
+    while time.time() - start_time < 3600 *24:
+        # print(time.time() - start_time)
+        all_gpu_status = []
+        for i in range(gpuDeviceCount):
+            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+            memoryInfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
+            all_gpu_status.append(dict(
+                device = i,
+                total_mem_GB = memoryInfo.total/UNIT,
+                used_mem_GB = memoryInfo.used/UNIT,
+                powerusage_W = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000
+            ))
+        
+        all_processes_status = []
+        pidAllInfo = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
+        for pidInfo in pidAllInfo:
+            pidUser = psutil.Process(pidInfo.pid).username()
+            pidCreateTime = psutil.Process(pidInfo.pid).create_time()
+            pidName = psutil.Process(pidInfo.pid).name()
+            all_processes_status.append(dict(
+                pid = pidInfo.pid,
+                create_time = pidCreateTime,
+                name = pidName,
+                user = pidUser,
+                used_mem_GB = pidInfo.usedGpuMemory/UNIT
+            ))
+
+        logs = dict(
+            cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
+            all_gpu_status = all_gpu_status,
+            all_processes_status =  all_processes_status
+        )
+        formatted_time = time.strftime('%Y%m%d%H%M%S', time.localtime())
+        with open(f"./results/gpu_status_{formatted_time}.json", "a", encoding="utf-8") as f:
+            f.write(json.dumps(logs) + "\n")
+        print(logs)
+
+        time.sleep(60)
+    pynvml.nvmlShutdown()
+
+
+if __name__ == "__main__":
+    main()