linux服务器显卡监控脚本
【摘要】 @TOC前期准备:pip install pynvml 1.watch_nvidia#参数:nvidia_ids:显卡id min_memory:最小可用显存 GBdef watch_nvidia(nvidia_ids,min_memory): flag = [1 for i in nvidia_ids] for i in nvidia_ids: handle = pynvml...
@TOC
- 前期准备:
pip install pynvml
1.watch_nvidia
#参数:nvidia_ids:显卡id min_memory:最小可用显存 GB
def watch_nvidia(nvidia_ids,min_memory):
flag = [1 for i in nvidia_ids]
for i in nvidia_ids:
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
#遍历每块卡的剩余显存
print("card {} free memory is {}GB".format(i,meminfo.free * 1.0 /(1024**3)))
if meminfo.free * 1.0 /(1024**3) > min_memory:
flag[i-1]=0
else:
flag[i-1]=1#统计符合要求的卡的数量
if 0 in flag:
free_num = 0
for i in flag:
if i == 0:
free_num += 1
return free_num#返回符合要求的卡的数量
else:
print("no free card!")
return -1
2.send_msg
#发送邮箱信息
#target_email:接受信息的邮箱,msg:发送的消息
def send_msg(target_email,msg):
sender = 'xxxxx@163.com' #发送信息的邮箱
receivers = [target_email] # 接收邮件,可设置为你的QQ邮箱或者其他邮箱
# 三个参数:第一个为文本内容,第二个 plain 设置文本格式,第三个 utf-8 设置编码
message = MIMEText(msg, 'plain', 'utf-8')
subject = 'nvidia显卡监控'
message['Subject'] = Header(subject, 'utf-8')
#server = smtplib.SMTP('smtp.163.com', 587)
server = smtplib.SMTP_SSL('smtp.163.com')#这部分需要去发送的邮箱账号去开启IMAP服务,获取登录授权码
server.connect('smtp.163.com',465)
#server.starttls()
#server.ehlo()
#xxxxxx是获取的登录授权码
server.login(sender, "xxxxxx")
server.sendmail(sender, receivers, message.as_string())
server.quit()
3.完整脚本nvidia.py
#-*-coding:GBK -*-
import pynvml
pynvml.nvmlInit()
import time
import os
#from send_email import send_msg
import smtplib
from email.mime.text import MIMEText
from email.header import Header
def send_msg(target_email,msg):
sender = 'xxxxx@163.com'
receivers = [target_email] # 接收邮件,可设置为你的QQ邮箱或者其他邮箱
# 三个参数:第一个为文本内容,第二个 plain 设置文本格式,第三个 utf-8 设置编码
message = MIMEText(msg, 'plain', 'utf-8')
subject = 'nvidia显卡监控'
message['Subject'] = Header(subject, 'utf-8')
#server = smtplib.SMTP('smtp.163.com', 587)
server = smtplib.SMTP_SSL('smtp.163.com')
server.connect('smtp.163.com',465)
#server.starttls()
#server.ehlo()
server.login(sender, "xxxxxxx")
server.sendmail(sender, receivers, message.as_string())
server.quit()
#smtpObj = smtplib.SMTP("localhost",1025)
#try:
# smtpObj.sendmail(sender, receivers, message.as_string())
# print("邮件发送成功")
#except smtplib.SMTPException:
# print("Error: 无法发送邮件")
def watch_nvidia(nvidia_ids,min_memory):
flag = [1 for i in nvidia_ids]
for i in nvidia_ids:
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
print("card {} free memory is {}GB".format(i,meminfo.free * 1.0 /(1024**3)))
if meminfo.free * 1.0 /(1024**3) > min_memory:
flag[i-1]=0
else:
flag[i-1]=1
if 0 in flag:
free_num = 0
for i in flag:
if i == 0:
free_num += 1
return free_num
else:
print("no free card!")
return -1
nvidia_ids = [0,1] # 显卡id
min_memory = 8 # 最小可用显存 GB
while True:
flag = watch_nvidia(nvidia_ids,min_memory)
if flag >= 1:
send_msg("xxxxxxx@bjtu.edu.cn","{}张显卡空闲".format(flag))
#os.system("sh veri.sh") # your command
break
time.sleep(10)
4.后台运行
nohup python nvidia.py >nvidia.out&
【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)