50 lines
No EOL
1.5 KiB
Python
50 lines
No EOL
1.5 KiB
Python
#!/usr/bin/env python
|
||
|
||
import pynvml
|
||
import redis
|
||
|
||
# 大模型进程(端口)与gpu绑定关系:port: [gpu_device_id..]
|
||
gpu={'1001':['g0','g1'],'1002':['g3'],'1003':['g4']}
|
||
|
||
def getGpuInfo():
|
||
gpuUsageInfo={}
|
||
pynvml.nvmlInit()
|
||
deviceCount = pynvml.nvmlDeviceGetCount()
|
||
for i in range(deviceCount):
|
||
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||
memInfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
|
||
gpuUtil = pynvml.nvmlDeviceGetUtilizationRates(handle)
|
||
gpuUsageInfo[f"g{i}"] = {
|
||
'used': (memInfo.used/memInfo.total)*100,
|
||
'util': gpuUtil.gpu
|
||
}
|
||
|
||
pynvml.nvmlShutdown()
|
||
return gpuUsageInfo
|
||
|
||
def getBanlanceRate(memory,gpu):
|
||
return memory+gpu
|
||
|
||
def zsetData(**kwargs):
|
||
data={}
|
||
for key in kwargs:
|
||
memory=0
|
||
gpu=0
|
||
for d in kwargs[key]:
|
||
memory+=gpuUsageInfo[d]['used']
|
||
gpu+=gpuUsageInfo[d]['util']
|
||
banlance=int(getBanlanceRate(memory,gpu)/len(kwargs[key]))
|
||
data[key]=banlance
|
||
return data
|
||
|
||
|
||
if __name__ == '__main__':
|
||
local="172.20.1.3"
|
||
pool=redis.ConnectionPool(host="60.204.148.84",port=6379,password='xm!redis123',db=15)
|
||
r=redis.Redis(connection_pool=pool)
|
||
pipe=r.pipeline(transaction=True)
|
||
gpuUsageInfo=getGpuInfo()
|
||
data=zsetData(**gpu)
|
||
for key in data:
|
||
#print(data[key],f"{local}:{key}")
|
||
r.zadd("danceai",{f"{local}:{key}":data[key]}) |