更新 Others/根据gpu使用率调度/获取gpu使用率脚本.md
This commit is contained in:
parent
cd785fef93
commit
3745092472
1 changed files with 51 additions and 0 deletions
|
@ -39,6 +39,57 @@ def zsetData(**kwargs):
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
local="172.20.1.3"
|
||||||
|
pool=redis.ConnectionPool(host="60.204.148.84",port=6379,password='xm!redis123',db=15)
|
||||||
|
r=redis.Redis(connection_pool=pool)
|
||||||
|
pipe=r.pipeline(transaction=True)
|
||||||
|
gpuUsageInfo=getGpuInfo()
|
||||||
|
data=zsetData(**gpu)
|
||||||
|
for key in data:
|
||||||
|
#print(data[key],f"{local}:{key}")
|
||||||
|
r.zadd("danceai",{f"{local}:{key}":data[key]})
|
||||||
|
(smi) spwang@k8s-worker-003:~$ cat smi-v1.py
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import pynvml
|
||||||
|
import redis
|
||||||
|
|
||||||
|
# 大模型进程(端口)与gpu绑定关系:port: [gpu_device_id..]
|
||||||
|
gpu={'1001':['g0','g1'],'1002':['g3'],'1003':['g4']}
|
||||||
|
|
||||||
|
def getGpuInfo():
|
||||||
|
gpuUsageInfo={}
|
||||||
|
pynvml.nvmlInit()
|
||||||
|
deviceCount = pynvml.nvmlDeviceGetCount()
|
||||||
|
for i in range(deviceCount):
|
||||||
|
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||||
|
memInfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
|
||||||
|
gpuUtil = pynvml.nvmlDeviceGetUtilizationRates(handle)
|
||||||
|
gpuUsageInfo[f"g{i}"] = {
|
||||||
|
'used': (memInfo.used/memInfo.total)*100,
|
||||||
|
'util': gpuUtil.gpu
|
||||||
|
}
|
||||||
|
|
||||||
|
pynvml.nvmlShutdown()
|
||||||
|
return gpuUsageInfo
|
||||||
|
|
||||||
|
def getBanlanceRate(memory,gpu):
|
||||||
|
return int(memory+gpu)
|
||||||
|
|
||||||
|
def zsetData(**kwargs):
|
||||||
|
data={}
|
||||||
|
for key in kwargs:
|
||||||
|
memory=0
|
||||||
|
gpu=0
|
||||||
|
for d in kwargs[key]:
|
||||||
|
memory+=gpuUsageInfo[d]['used']
|
||||||
|
gpu+=gpuUsageInfo[d]['util']
|
||||||
|
banlance=getBanlanceRate(memory,gpu)
|
||||||
|
data[key]=banlance
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
local="172.20.1.3"
|
local="172.20.1.3"
|
||||||
pool=redis.ConnectionPool(host="60.204.148.84",port=6379,password='xm!redis123',db=15)
|
pool=redis.ConnectionPool(host="60.204.148.84",port=6379,password='xm!redis123',db=15)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue