更新 Others/根据gpu使用率调度/获取gpu使用率脚本.md
This commit is contained in:
parent
cd785fef93
commit
3745092472
1 changed files with 51 additions and 0 deletions
|
@ -39,6 +39,57 @@ def zsetData(**kwargs):
|
|||
return data
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
local="172.20.1.3"
|
||||
pool=redis.ConnectionPool(host="60.204.148.84",port=6379,password='xm!redis123',db=15)
|
||||
r=redis.Redis(connection_pool=pool)
|
||||
pipe=r.pipeline(transaction=True)
|
||||
gpuUsageInfo=getGpuInfo()
|
||||
data=zsetData(**gpu)
|
||||
for key in data:
|
||||
#print(data[key],f"{local}:{key}")
|
||||
r.zadd("danceai",{f"{local}:{key}":data[key]})
|
||||
(smi) spwang@k8s-worker-003:~$ cat smi-v1.py
|
||||
#!/usr/bin/env python
|
||||
|
||||
import pynvml
|
||||
import redis
|
||||
|
||||
# 大模型进程(端口)与gpu绑定关系:port: [gpu_device_id..]
|
||||
gpu={'1001':['g0','g1'],'1002':['g3'],'1003':['g4']}
|
||||
|
||||
def getGpuInfo():
|
||||
gpuUsageInfo={}
|
||||
pynvml.nvmlInit()
|
||||
deviceCount = pynvml.nvmlDeviceGetCount()
|
||||
for i in range(deviceCount):
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
memInfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
|
||||
gpuUtil = pynvml.nvmlDeviceGetUtilizationRates(handle)
|
||||
gpuUsageInfo[f"g{i}"] = {
|
||||
'used': (memInfo.used/memInfo.total)*100,
|
||||
'util': gpuUtil.gpu
|
||||
}
|
||||
|
||||
pynvml.nvmlShutdown()
|
||||
return gpuUsageInfo
|
||||
|
||||
def getBanlanceRate(memory,gpu):
|
||||
return int(memory+gpu)
|
||||
|
||||
def zsetData(**kwargs):
|
||||
data={}
|
||||
for key in kwargs:
|
||||
memory=0
|
||||
gpu=0
|
||||
for d in kwargs[key]:
|
||||
memory+=gpuUsageInfo[d]['used']
|
||||
gpu+=gpuUsageInfo[d]['util']
|
||||
banlance=getBanlanceRate(memory,gpu)
|
||||
data[key]=banlance
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
local="172.20.1.3"
|
||||
pool=redis.ConnectionPool(host="60.204.148.84",port=6379,password='xm!redis123',db=15)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue