From fa5340a68fb44b8da93e3823aa53e21368e29e9f Mon Sep 17 00:00:00 2001
From: iProbe <iprobe@noreply.localhost>
Date: Fri, 26 Jan 2024 16:52:36 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20Others/=E6=A0=B9=E6=8D=AEg?=
 =?UTF-8?q?pu=E4=BD=BF=E7=94=A8=E7=8E=87=E8=B0=83=E5=BA=A6/=E8=8E=B7?=
 =?UTF-8?q?=E5=8F=96gpu=E4=BD=BF=E7=94=A8=E7=8E=87=E8=84=9A=E6=9C=AC.md?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Others/根据gpu使用率调度/获取gpu使用率脚本.md | 55 +------------------
 1 file changed, 2 insertions(+), 53 deletions(-)

diff --git a/Others/根据gpu使用率调度/获取gpu使用率脚本.md b/Others/根据gpu使用率调度/获取gpu使用率脚本.md
index ef7dffb..6efa1b1 100644
--- a/Others/根据gpu使用率调度/获取gpu使用率脚本.md
+++ b/Others/根据gpu使用率调度/获取gpu使用率脚本.md
@@ -8,57 +8,6 @@ import redis
 # 大模型进程（端口）与gpu绑定关系：port: [gpu_device_id..]
 gpu={'1001':['g0','g1'],'1002':['g3'],'1003':['g4']}
 
-def getGpuInfo():
-    gpuUsageInfo={}
-    pynvml.nvmlInit()
-    deviceCount = pynvml.nvmlDeviceGetCount()
-    for i in range(deviceCount):
-        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-        memInfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
-        gpuUtil = pynvml.nvmlDeviceGetUtilizationRates(handle)
-        gpuUsageInfo[f"g{i}"] = {
-            'free': memInfo.free/1024**2,
-            'util': 100-gpuUtil.gpu
-        }
-
-    pynvml.nvmlShutdown()
-    return gpuUsageInfo
-
-def getBanlanceRate(memory,gpu):
-    return int(memory/100+gpu)
-
-def zsetData(**kwargs):
-    data={}
-    for key in kwargs:
-        memory=0
-        gpu=0
-        for d in kwargs[key]:
-            memory+=gpuUsageInfo[d]['free']
-            gpu+=gpuUsageInfo[d]['util']
-        banlance=getBanlanceRate(memory,gpu)
-        data[key]=banlance
-    return data
-
-
-if __name__ == '__main__':
-    local="172.20.1.3"
-    pool=redis.ConnectionPool(host="60.204.148.84",port=6379,password='xm!redis123',db=15)
-    r=redis.Redis(connection_pool=pool)
-    pipe=r.pipeline(transaction=True)
-    gpuUsageInfo=getGpuInfo()
-    data=zsetData(**gpu)
-    for key in data:
-        #print(data[key],f"{local}:{key}")
-        r.zadd("danceai",{f"{local}:{key}":data[key]})
-(smi) spwang@k8s-worker-003:~$ cat smi-v1.py
-#!/usr/bin/env python
-
-import pynvml
-import redis
-
-# 大模型进程（端口）与gpu绑定关系：port: [gpu_device_id..]
-gpu={'1001':['g0','g1'],'1002':['g3'],'1003':['g4']}
-
 def getGpuInfo():
     gpuUsageInfo={}
     pynvml.nvmlInit()
@@ -76,7 +25,7 @@ def getGpuInfo():
     return gpuUsageInfo
 
 def getBanlanceRate(memory,gpu):
-    return int(memory+gpu)
+    return memory+gpu
 
 def zsetData(**kwargs):
     data={}
@@ -86,7 +35,7 @@ def zsetData(**kwargs):
         for d in kwargs[key]:
             memory+=gpuUsageInfo[d]['used']
             gpu+=gpuUsageInfo[d]['util']
-        banlance=getBanlanceRate(memory,gpu)
+        banlance=int(getBanlanceRate(memory,gpu)/len(kwargs[key]))
         data[key]=banlance
     return data