commit 10ab049cf0b41f441bfa50d70a8b08fe9bb5ebb7 Author: iProbe Date: Fri May 9 17:53:03 2025 +0800 上传文件至 / diff --git a/getGPUUsedInfo.py b/getGPUUsedInfo.py new file mode 100644 index 0000000..036a331 --- /dev/null +++ b/getGPUUsedInfo.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +import pynvml +import redis + +# 大模型进程(端口)与gpu绑定关系:port: [gpu_device_id..] +gpu={'1001':['g0','g1'],'1002':['g3'],'1003':['g4']} + +def getGpuInfo(): + gpuUsageInfo={} + pynvml.nvmlInit() + deviceCount = pynvml.nvmlDeviceGetCount() + for i in range(deviceCount): + handle = pynvml.nvmlDeviceGetHandleByIndex(i) + memInfo = pynvml.nvmlDeviceGetMemoryInfo(handle) + gpuUtil = pynvml.nvmlDeviceGetUtilizationRates(handle) + gpuUsageInfo[f"g{i}"] = { + 'used': (memInfo.used/memInfo.total)*100, + 'util': gpuUtil.gpu + } + + pynvml.nvmlShutdown() + return gpuUsageInfo + +def getBanlanceRate(memory,gpu): + return memory+gpu + +def zsetData(**kwargs): + data={} + for key in kwargs: + memory=0 + gpu=0 + for d in kwargs[key]: + memory+=gpuUsageInfo[d]['used'] + gpu+=gpuUsageInfo[d]['util'] + banlance=int(getBanlanceRate(memory,gpu)/len(kwargs[key])) + data[key]=banlance + return data + + +if __name__ == '__main__': + local="172.20.1.3" + pool=redis.ConnectionPool(host="60.204.148.84",port=6379,password='xm!redis123',db=15) + r=redis.Redis(connection_pool=pool) + pipe=r.pipeline(transaction=True) + gpuUsageInfo=getGpuInfo() + data=zsetData(**gpu) + for key in data: + #print(data[key],f"{local}:{key}") + r.zadd("danceai",{f"{local}:{key}":data[key]}) \ No newline at end of file diff --git a/openrestyProxyBackend.lua b/openrestyProxyBackend.lua new file mode 100644 index 0000000..5490df8 --- /dev/null +++ b/openrestyProxyBackend.lua @@ -0,0 +1,88 @@ +--[[ + Created by: wangsuipeng + Edit: 20240125 + Last Modify: 20240126 + version: 1.1 + Description: + 脚本主要用于实现获取redis中的链接。 +--]] + +-- 参数配置 +-- 默认upstream +local default_backend = "danceaiStream" + +-- redis +local redis_host = "60.204.148.84" +local redis_port = 6379 +local redis_pwd = "xm!redis123" +local redis_db = 15 + +-- 需要获取的zset名称 +local redis_zset = "danceai" + +-- 连接池最大空闲时间 +-- 单位为ms +local redis_pool_idle_time = 10000 +-- 连接池大小 +local redis_pool_size = 100 +-- redis超时时间 +-- 单位为ms +local redis_timeout =1000 + +local redis = require("resty.redis") +-- 关闭redis连接 +local function close_redis(red) + if not red then + return + end + --释放连接(连接池实现) + local pool_max_idle_time = redis_pool_idle_time + local pool_size = redis_pool_size + local ok, err = red:set_keepalive(pool_max_idle_time, pool_size) + if not ok then + ngx.log(ngx.ERR, "set redis keepalive error : ", err) + return ngx.say("redis server error") + --ngx.exit(ngx.HTTP_INTERNAL_SERVER_ERROR) -- 500 + end + ngx.log(ngx.INFO,"close redis success") +end + +-- 创建连接实例 +local r = redis:new() +-- 超时时间 +r:set_timeout(redis_timeout) +-- 连接redis +local ok, err = r:connect(redis_host, redis_port) +-- 判断是否连接成功 +-- 若不成功,使用行配置的upstream +if not ok then + ngx.log(ngx.ERR, "connect redis error : ", err) + ngx.var.backend = default_backend + return + --ngx.exit(ngx.HTTP_INTERNAL_SERVER_ERROR) -- 500 +end + +-- 验证 +local rs,err = r:auth(redis_pwd) +if not rs then + ngx.log(ngx.ERR, "auth redis error : ", err) + ngx.var.backend = default_backend + return + --ngx.exit(ngx.HTTP_INTERNAL_SERVER_ERROR) -- 500 +end +-- 切换到相应db +r:select(redis_db) +-- 查询zset +local data,err = r:zrange(redis_zset,0,0) +-- 判断是否查询到 +-- 若查询失败或结果为空,则使用自行配置的upstream +-- 否则,使用查询到的链接 +if not data or #data == 0 then + ngx.log(ngx.WARN, "get backend error : ", err) + ngx.var.backend = default_backend +else + for _,backend in pairs(data) do + ngx.var.backend = backend + end +end +close_redis(r) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..689672d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +redis +pynvml \ No newline at end of file