From 3745092472a5dbd3da2d3c146903949f460a45a0 Mon Sep 17 00:00:00 2001 From: iProbe Date: Fri, 26 Jan 2024 16:33:29 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20Others/=E6=A0=B9=E6=8D=AEg?= =?UTF-8?q?pu=E4=BD=BF=E7=94=A8=E7=8E=87=E8=B0=83=E5=BA=A6/=E8=8E=B7?= =?UTF-8?q?=E5=8F=96gpu=E4=BD=BF=E7=94=A8=E7=8E=87=E8=84=9A=E6=9C=AC.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Others/根据gpu使用率调度/获取gpu使用率脚本.md | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/Others/根据gpu使用率调度/获取gpu使用率脚本.md b/Others/根据gpu使用率调度/获取gpu使用率脚本.md index 44f61b3..9ff16a7 100644 --- a/Others/根据gpu使用率调度/获取gpu使用率脚本.md +++ b/Others/根据gpu使用率调度/获取gpu使用率脚本.md @@ -39,6 +39,57 @@ def zsetData(**kwargs): return data +if __name__ == '__main__': + local="172.20.1.3" + pool=redis.ConnectionPool(host="60.204.148.84",port=6379,password='xm!redis123',db=15) + r=redis.Redis(connection_pool=pool) + pipe=r.pipeline(transaction=True) + gpuUsageInfo=getGpuInfo() + data=zsetData(**gpu) + for key in data: + #print(data[key],f"{local}:{key}") + r.zadd("danceai",{f"{local}:{key}":data[key]}) +(smi) spwang@k8s-worker-003:~$ cat smi-v1.py +#!/usr/bin/env python + +import pynvml +import redis + +# 大模型进程(端口)与gpu绑定关系:port: [gpu_device_id..] +gpu={'1001':['g0','g1'],'1002':['g3'],'1003':['g4']} + +def getGpuInfo(): + gpuUsageInfo={} + pynvml.nvmlInit() + deviceCount = pynvml.nvmlDeviceGetCount() + for i in range(deviceCount): + handle = pynvml.nvmlDeviceGetHandleByIndex(i) + memInfo = pynvml.nvmlDeviceGetMemoryInfo(handle) + gpuUtil = pynvml.nvmlDeviceGetUtilizationRates(handle) + gpuUsageInfo[f"g{i}"] = { + 'used': (memInfo.used/memInfo.total)*100, + 'util': gpuUtil.gpu + } + + pynvml.nvmlShutdown() + return gpuUsageInfo + +def getBanlanceRate(memory,gpu): + return int(memory+gpu) + +def zsetData(**kwargs): + data={} + for key in kwargs: + memory=0 + gpu=0 + for d in kwargs[key]: + memory+=gpuUsageInfo[d]['used'] + gpu+=gpuUsageInfo[d]['util'] + banlance=getBanlanceRate(memory,gpu) + data[key]=banlance + return data + + if __name__ == '__main__': local="172.20.1.3" pool=redis.ConnectionPool(host="60.204.148.84",port=6379,password='xm!redis123',db=15)