添加 Others/根据gpu使用率调度/获取gpu使用率脚本.md
This commit is contained in:
parent
c7ed98c63e
commit
a0d8b99096
1 changed files with 52 additions and 0 deletions
52
Others/根据gpu使用率调度/获取gpu使用率脚本.md
Normal file
52
Others/根据gpu使用率调度/获取gpu使用率脚本.md
Normal file
|
@ -0,0 +1,52 @@
|
|||
```python
|
||||
#!/usr/bin/env python
|
||||
|
||||
import pynvml
|
||||
import redis
|
||||
|
||||
# 大模型进程(端口)与gpu绑定关系:port: [gpu_device_id..]
|
||||
gpu={'1001':['g0','g1'],'1002':['g3'],'1003':['g4']}
|
||||
|
||||
def getGpuInfo():
|
||||
gpuUsageInfo={}
|
||||
pynvml.nvmlInit()
|
||||
deviceCount = pynvml.nvmlDeviceGetCount()
|
||||
for i in range(deviceCount):
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
memInfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
|
||||
gpuUtil = pynvml.nvmlDeviceGetUtilizationRates(handle)
|
||||
gpuUsageInfo[f"g{i}"] = {
|
||||
'free': memInfo.free/1024**2,
|
||||
'util': 100-gpuUtil.gpu
|
||||
}
|
||||
|
||||
pynvml.nvmlShutdown()
|
||||
return gpuUsageInfo
|
||||
|
||||
def getBanlanceRate(memory,gpu):
|
||||
return int(memory/100+gpu)
|
||||
|
||||
def zsetData(**kwargs):
|
||||
data={}
|
||||
for key in kwargs:
|
||||
memory=0
|
||||
gpu=0
|
||||
for d in kwargs[key]:
|
||||
memory+=gpuUsageInfo[d]['free']
|
||||
gpu+=gpuUsageInfo[d]['util']
|
||||
banlance=getBanlanceRate(memory,gpu)
|
||||
data[key]=banlance
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
local="172.20.1.3"
|
||||
pool=redis.ConnectionPool(host="60.204.148.84",port=6379,password='xm!redis123',db=15)
|
||||
r=redis.Redis(connection_pool=pool)
|
||||
pipe=r.pipeline(transaction=True)
|
||||
gpuUsageInfo=getGpuInfo()
|
||||
data=zsetData(**gpu)
|
||||
for key in data:
|
||||
#print(data[key],f"{local}:{key}")
|
||||
r.zadd("danceai",{f"{local}:{key}":data[key]})
|
||||
```
|
Loading…
Add table
Add a link
Reference in a new issue