#!/usr/bin/env python import pynvml import redis # 大模型进程(端口)与gpu绑定关系:port: [gpu_device_id..] gpu={'1001':['g0','g1'],'1002':['g3'],'1003':['g4']} def getGpuInfo(): gpuUsageInfo={} pynvml.nvmlInit() deviceCount = pynvml.nvmlDeviceGetCount() for i in range(deviceCount): handle = pynvml.nvmlDeviceGetHandleByIndex(i) memInfo = pynvml.nvmlDeviceGetMemoryInfo(handle) gpuUtil = pynvml.nvmlDeviceGetUtilizationRates(handle) gpuUsageInfo[f"g{i}"] = { 'used': (memInfo.used/memInfo.total)*100, 'util': gpuUtil.gpu } pynvml.nvmlShutdown() return gpuUsageInfo def getBanlanceRate(memory,gpu): return memory+gpu def zsetData(**kwargs): data={} for key in kwargs: memory=0 gpu=0 for d in kwargs[key]: memory+=gpuUsageInfo[d]['used'] gpu+=gpuUsageInfo[d]['util'] banlance=int(getBanlanceRate(memory,gpu)/len(kwargs[key])) data[key]=banlance return data if __name__ == '__main__': local="172.20.1.3" pool=redis.ConnectionPool(host="60.204.148.84",port=6379,password='xm!redis123',db=15) r=redis.Redis(connection_pool=pool) pipe=r.pipeline(transaction=True) gpuUsageInfo=getGpuInfo() data=zsetData(**gpu) for key in data: #print(data[key],f"{local}:{key}") r.zadd("danceai",{f"{local}:{key}":data[key]})