基于YOLOv5车牌检测源代码解释
detect.py
import argparse #python的命令解析的模块,内置于python,不需要安装
import torch.backends.cudnn as cudnn
from models.experimental import *
from utils.datasets import *
from utils.utils import *
from models.LPRNet import *
def detect(save_img=False):
#获取out,source, weights, view_img, save_txt, imgsz = \
out, source, weights, view_img, save_txt, imgsz = \
opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
device = torch_utils.select_device(opt.device) #获取设备
if os.path.exists(out):
shutil.rmtree(out) # 删除文件夹
os.makedirs(out) # 创建新输出文件夹
half = device.type != 'cpu' #半精度(16位浮点来支持)
#加载Float32模型,确保用户设定的输入图片分辨率能整除32(如果蹦则调整为能整除并返回)
model = attempt_load(weights, map_location=device)
imgsz = check_img_size(imgsz, s=model.stride.max()) # 进行检查,这里设置的是640可以被整除。
if half:
model.half() # 变成半精度浮点数16,提高速度
#设置第二级分类,默认不使用
classify = True
if classify:
# modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize
# modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights
modelc = LPRNet(lpr_max_len=8, phase=False, class_num=len(CHARS), dropout_rate=0).to(device)
modelc.load_state_dict(torch.load('./weights/Final_LPRNet_model.pth', map_location=torch.device('cpu')))
print("load pretrained model successful!")
modelc.to(device).eval()
#通过不同的输入源来设置不同的数据加载方式
vid_path, vid_writer = None, None
if webcam:
view_img = True
cudnn.benchmark = True # 设置为True以加快恒定图像大小推断
dataset = LoadStreams(source, img_size=imgsz)
else:
save_img = True
dataset = LoadImages(source, img_size=imgsz)#加载图片或视频
# 获取类别名字字符串列表
names = model.module.names if hasattr(model, 'module') else model.names
#设置画框的颜色(RGB(列表)的列表)
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
# 运行判断
t0 = time.time()
#进行一次前向推理,测试程序是否正常
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # 进行初始化
_ = model(img.half() if half else img) if device.type != 'cpu' else None # 返回
#path 图像/视频路径
#img进行resize+pad之后的图片,如(3,640,512)格式(c,h,w)
#img0s 原size图片,如(1080,810,3)
#vid_cap 当读取图片时为None,读取视频为视频源
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() #图片也设置为16为浮点
img /= 255.0 #除以255 0 - 255 变成to 0.0 - 1.0
#没有 bath_size时,在最前面添加一个轴
if img.ndimension() == 3:
img = img.unsqueeze(0) #改变了一下添加维度(例如(3,384,640)转变为(1,3,384,640)这里面添加的维度是1)
# Inference,制造一个事件
t1 = torch_utils.time_synchronized()
#前向传播 返回pred的shape是(1,num_boxes,5+num_class)
#h,w为传入网络图片的高和宽。注意dataset在检测时使用了矩形推理,所以这里h不一定等于w
#num_boxes =(h/32 *w/32+h/16*w/16+h/8*w/8)*3
#例如:图片大小720.1280-->15120个boxes =(20*12 +40*24 +80*48=5040)*3
#pred[...,0:4]为预测框坐标;预测框坐标为xywh(中心点+宽高)格式
#pred[....4]为objectness置信度
#pred[...,5:-1]为分类概率结果
pred = model(img, augment=opt.augment)[0]
print(pred.shape)
#进行 NMS
#pred:前向传播的输出
#confthres:置信度阈值
#iou_thres:iou阈值
#classes:是否只保留特定的类别
#agnostic:进行nms是否也去除不同类别之间的框
#经过nms之后,预测框格式:xywh-->xyxy(左上角右下角)
#pred是一个列表list[torch.tensor],长度为NMS后的目标框的个数
#每一个torch.tensor的shape为(num_boxes,6),内容为box(4个值)+conf+cls
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = torch_utils.time_synchronized()
#添加二次分类,默认不使用
if classify:
pred,plat_num = apply_classifier(pred, modelc, img, im0s)
for i, det in enumerate(pred): #对每一张图片做处理
if webcam: #如果输入源是webcam,则batch_size不为1,取出dataset中的一张图片
p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
else:
p, s, im0 = path, '', im0s
save_path = str(Path(out) / Path(p).name) #设置保存图片或视频的路径,p是原图片路径(含文件名)
txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')#设置保存框坐标txt文件的路径
s += '%gx%g ' % img.shape[2:] # 设置打印信息(图片宽高),s如‘384*640’
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
if det is not None and len(det):#判断det是不是none 判断len是不是零
#调整预测框的坐标,基于resize+pad的图片的坐标-->基于原size图片的坐标
#此时坐标格式为xyxy
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
#打印检测的类别数量
for c in det[:, 5].unique():
n = (det[:, 5] == c).sum() # detections per class
s += '%g %ss, ' % (n, names[int(c)]) # add to string
#保存预测结果
for de,lic_plat in zip(det,plat_num):
# xyxy,conf,cls,lic_plat=de[:4],de[4],de[5],de[6:]
*xyxy, conf, cls=de
if save_txt: # Write to file
#将xyxy(左上角+右下角)格式转为xywh(中心点+宽高)格式,并除上w,h做归一化,转化为列表再保存
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * 5 + '\n') % (cls, xywh))
#在原图上画框
if save_img or view_img: # Add bbox to image
# label = '%s %.2f' % (names[int(cls)], conf)
lb = ""
for a,i in enumerate(lic_plat):
# if a ==0:
# continue
lb += CHARS[int(i)]
label = '%s %.2f' % (lb, conf)
im0=plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
#打印前向传播+nms时间
print('%sDone. (%.3fs)' % (s, t2 - t1))
# 如果设置展示,则画出 图片/视频
if view_img:
cv2.namedWindow("Demo3", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Demo3", 2500, 1000)
cv2.imshow("Demo3", im0)
#if cv2.waitKey(1) == ord('q'): # q to quit #视频
# raise StopIteration #视频
cv2.waitKey()# 图片
# 设置保存图片/视频
if save_img:
if dataset.mode == 'images':
cv2.imwrite(save_path, im0)
else:
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
fourcc = 'mp4v' # output video codec
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img:
print('Results saved to %s' % os.getcwd() + os.sep + out)
if platform == 'darwin': # MacOS
os.system('open ' + save_path)
#打印总时间
print('Done. (%.3fs)' % (time.time() - t0))
if __name__ == '__main__':
#建立参数解析对象parser
parser = argparse.ArgumentParser()
# parser.add_argument这个函数是给parser实例,例如'--weights'添加weigths属性
#添加属性:给xx实例增加一个aa属性,如 xx.add_argument("aa")
# nargs - 应该读取的命令行参数个数。*号,表示0或多个参数;+号表示1或多个参数。
Weight:训练的权重
parser.add_argument('--weights', nargs='+', type=str, default='./weights/last.pt', help='model.pt path(s)')
#source: 测试数据,可以是图片/视频路径,也可以是‘0’(电脑自带摄像头),也可以是rtsp等视频
parser.add_argument('--source', type=str, default=r'F:\8.jpg', help='source')
# default=r'F:\8.jpg'图片
#default=r'F:\YOLOV\plate-main\inference\images\478289752-1-208.mp4'视频
# default='0'电脑摄像头
# default='1'usb接口摄像头
parser.add_argument('--output', type=str, default='inference/output/', help='output folder') # output folder
#img-size:网络输入图片大小
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
#conf-thres:置信度阈值
parser.add_argument('--conf-thres', type=float, default=0.8, help='object confidence threshold')
#iou-thres:做nms的iou阈值
parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
#device:设置设备
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
#view-img:是否展示预测之后的图片/视频,默认False
parser.add_argument('--view-img', action='store_true', help='display results',default=True)
#save-txt:是否将预测的框坐标以txt文件形式保存,默认False
#save-conf:是否将预测的框坐标以txt文件形式保存,默认False
#save-dir: 网络预测之后的图片/视频的保存路径
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
#classes:设置只保留某一部分类别,形如0或者0 2 3
parser.add_argument('--classes', nargs='+', type=int, help='filter by class')
#agnostic-nms:进行nms是否也去除不同类别之间的框,默认False
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
#augment:推理的时候进行多尺度,翻转等操作(TTA)推理
parser.add_argument('--augment', action='store_true', help='augmented inference')
#update: 如果为True,则对所有模型进行strip_optimizer操作,去除pt文件中的优化器等信息,默认为False
parser.add_argument('--update', action='store_true', help='update all models')
#采用parser对象的parse_args函数获取解析的参数
opt = parser.parse_args()
print(opt)
#一个上下文管理器,被该语句wrap起来的部分将不会track梯度
with torch.no_grad():
if opt.update: # 更新所有模型(以修复SourceChangeWarning)
#去除pt文件中的优化器等信息
for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
detect()
create_pretrained(opt.weights, opt.weights)
else:
detect()
test.py
import argparse
import json
from models.experimental import *
from utils.datasets import *
def test(data,
weights=None,
batch_size=16,
imgsz=640,
conf_thres=0.001,
iou_thres=0.6, # for NMS
save_json=False,
single_cls=False,
augment=False,
verbose=False,
model=None,
dataloader=None,
save_dir='',
merge=False):
# 判断是否在训练时调用test,如果时则获取训练时的设备
training = model is not None
if training: # called by train.py
device = next(model.parameters()).device # 选择设备
else: # 直接调用
device = torch_utils.select_device(opt.device, batch_size=batch_size)
merge = opt.merge # 使用并合并NMS
# 删除
for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')):
os.remove(f)
# 加载模型
model = attempt_load(weights, map_location=device)
#检查输入图片分辨率是否能被32整除(这里是640)
imgsz = check_img_size(imgsz, s=model.stride.max())
# Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
# if device.type != 'cpu' and torch.cuda.device_count() > 1:
# model = nn.DataParallel(model)
# Half
#如果设备不是cpu并且gou数且为1,则将模型float32转为float16,提高前向传播速度
half = device.type != 'cpu' and torch.cuda.device_count() == 1 # half precision only supported on single-GPU
if half:
model.half() #GPU上使用FP16推理
print("GPU")
# Configure
#eval()时。框架会自动吧DropOut固定住,用训练好的值;不启用 BatchNormalization 和 Dropout
model.eval()
#加载数据配置信息
with open(data) as f:
data = yaml.load(f, Loader=yaml.FullLoader) # 字典模型
nc = 1 if single_cls else int(data['nc']) # number of classes
#设置iou赋值,从0.5-0.95,每间隔0.05取一次
#iouv iouu值得列表[0.5,0.55,0.6,.....,0.95]
iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
iouv = iouv[0].view(1) # comment for mAP@0.5:0.95
niou = iouv.numel()#iou个数=10
# Dataloader
if not training:
#创建一个全0数组测试一i啊前向传播是否正常运行
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
#获取图片路径
path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images
#创建dataloader
#注意这里rect参数为True,yolov5的测试评估时基于举行推理的
dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt,
hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0]
seen = 0 #初始化测试的图片数量
#获取类别的名字
names = model.names if hasattr(model, 'names') else model.module.names
#获取coco数据集的类别索引
#coco数据集由80个类别(索引范围应该为0-79),但是其索引却属于1-90
#coco80_to_coco91_class()就是为了与上述索引对应起来,返回一个范围在9-90的索引数组
coco91class = coco80_to_coco91_class()
#设置tqdm进度条的显示信息
s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
#初始化指标,时间
p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
#初始化测试集的损失
loss = torch.zeros(3, device=device)
#初始化json文件的字典,统计信息,ap
jdict, stats, ap, ap_class = [], [], [], []
for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
img = img.to(device)
#图片也由FLOAT32->Float16
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
targets = targets.to(device)
nb, _, height, width = img.shape # batch size, channels, height, width
whwh = torch.Tensor([width, height, width, height]).to(device)
# Disable gradients
with torch.no_grad():
# Run model
#time_synchronized()函数里面进行了torch.cuda.synchronize()在返回的time.time()
#torch.cuda.synchronize()等待gpu上完成所有的工作,这样测试时间会更精准
t = torch_utils.time_synchronized()
#前向传播,inf_out为预测结果,train_out训练结果
inf_out, train_out = model(img, augment=augment) # inference and training outputs
#t0积累前向传播的时间
t0 += torch_utils.time_synchronized() - t
# Compute loss
#如果在训练时进行的test,则通过训练结果计算并返回测试集的box,obj,cls损失
if training: # if model has loss hyperparameters
loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # GIoU, obj, cls
# Run NMS
#t1累计后处理nms的时间
t = torch_utils.time_synchronized()
#non_max_suppression进行非极大值控制:
#conf_thres为置信度阈值。iou_thres为iou阈值,marge是否开框
output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge)
t1 += torch_utils.time_synchronized() - t
# Statistics per image
#为每一张图片做统计,写入与猜测信息到txt文件,生成json文件字典,统计tp等
for si, pred in enumerate(output):
#获取低si张图片的标签在信息,包括class,x,y,w,h
labels = targets[targets[:, 0] == si, 1:]
nl = len(labels)
tcls = labels[:, 0].tolist() if nl else [] # 获取标签类别
seen += 1 #统计测试图片数量
#如果预测为空,则添加空的信息到stats里
if pred is None:
if nl:
stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
continue
# 保存测试结果txt文件
# with open('test.txt', 'a') as file:
# [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]
# 修正预测坐标到图片内部
clip_coords(pred, (height, width))
# 保存coco格式的josn文件字典
if save_json:
# [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
#coco格式json文件大概包含信息如上
#获取图片id
image_id = int(Path(paths[si]).stem.split('_')[-1])
#获取框坐标信息
box = pred[:, :4].clone() # xyxy
#将框调整为基于原图大小的
scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape
#转换为xywh格式
box = xyxy2xywh(box) # xywh
#注意,之前所说的xyxy格式左上角右下角的坐标,xywh时中心坐标和宽高
#而coco的json格式中的框u欧标格式为xywh,此处的xy为左上角坐标
#也就是coco的json的格式的坐标的格式为,左上角坐宽高
#所以下面一行代码就是将:中心点坐标->左上角
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
#image_id:图片id,即属于那张图
#categorv_id:类别,coco91class()从索引0——79映射到索引0-90
#bbox:框的坐标
#score:置信度得分
for p, b in zip(pred.tolist(), box.tolist()):
jdict.append({'image_id': image_id,
'category_id': coco91class[int(p[5])],
'bbox': [round(x, 3) for x in b],
'score': round(p[4], 5)})
# Assign all predictions as incorrect
#初始化测试评定,niou为iou阈值的个数
correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
if nl:
detected = [] # detected用来存放已检测的目标
tcls_tensor = labels[:, 0]
# target boxes
#获得xyxy格式的框并乘以wh
tbox = xywh2xyxy(labels[:, 1:5]) * whwh
# Per target class
#对图片中的每个类单独处理
for cls in torch.unique(tcls_tensor):
ti = (cls == tcls_tensor).nonzero().view(-1) # 标签框改类别的索引
pi = (cls == pred[:, 5]).nonzero().view(-1) # 预测框该类别的索引
# Search for detections
if pi.shape[0]:
# Prediction to target ious
#box_iou计算预测框于标签的iou值,max(1)选出最大的ious值,i为对应suoyin
#pred shape[N,4]
#tbox shape[M,4]
#box_iou shape[N,M]
#ious shape[N,1]
#i shape[N,1],i里的值属于0-《
ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices
# Append detections
for j in (ious > iouv[0]).nonzero():
d = ti[i[j]] # 获得检测的目标
if d not in detected:
detected.append(d)#添加d到detected
#iouv为0.05为步长,0.5-0.95列表
#获得不同的iou阈值下的true positive
correct[pi[j]] = ious[j]>iouv # iou_thres is 1xn
if len(detected) == nl: # all targets already located in image
break
# Append statistics (correct, conf, pcls, tcls)
#每张图片的结果统计到stats里
stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
# Plot images
#画出1个batch的图片的groud truth和预测框并保存
if batch_i < 1:
f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename
plot_images(img, targets, paths, str(f), names) # ground truth
f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i)
plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions
# Compute statistics
#将stats列表的信息拼接到一起
stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
if len(stats):
#根据上面得到的TP等信息计算指标
#精准度=Tp/Tp+FP,找回=TP/P,map,fi分数,类别ao
p, r, ap, f1, ap_class = ap_per_class(*stats)
p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95]
mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
#nt是一个列表,测试集每一个类别由多少目标框
nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class
else:
nt = torch.zeros(1)
# Print results
#打印指标结果
pf = '%20s' + '%12.3g' * 6 # print format
print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
# Print results per class
#细节展示每一个类别的指标
if verbose and nc > 1 and len(stats):
for i, c in enumerate(ap_class):
print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
# Print speeds
#打印前向传播耗费时间,nms时间,总时间
t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple
if not training:
print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
# Save JSON
#采用之前保存的json格式预测结果,通过coccapi估平指标
#需要注意的时 测试集的标签也需要转成coco的json的格式
if save_json and map50 and len(jdict):
imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataloader.dataset.img_files]
f = 'detections_val2017_%s_results.json' % \
(weights.split(os.sep)[-1].replace('.pt', '') if isinstance(weights, str) else '') # filename
print('\nCOCO mAP with pycocotools... saving %s...' % f)
with open(f, 'w') as file:
json.dump(jdict, file)
try:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
# https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0]) # initialize COCO ground truth api
cocoDt = cocoGt.loadRes(f) # initialize COCO pred api
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
cocoEval.params.imgIds = imgIds # image IDs to evaluate
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
map, map50 = cocoEval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5)
except:
print('WARNING: pycocotools must be installed with numpy==1.17 to run correctly. '
'See https://github.com/cocodataset/cocoapi/issues/356')
# Return results
#返回测试指标结果
model.float() # for training
maps = np.zeros(nc) + map
for i, c in enumerate(ap_class):
maps[c] = ap[i]
return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='test.py')
# 添加属性:给xx实例增加一个aa属性,如 xx.add_argument("aa")
#weights:测试的模型权重文件
parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
#data:数据集配置文件,数据集路径
parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')
#batch-size:前向传播时的批次,默认32
parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
#img-size:输入图片分辨率大小,默认64日
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
#conf-thres:筛选框的时候的置信度阈值,默认0.001
parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
#iou-thres:进行NMS的时候的IOU阈值,默认0.65
parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS')
#save-json:是否按照coco的json格式保存预测框,并且使用cocoapi做评估(需要同样coco的json格式的标签),默认False
parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
#task:设置测试形式,默认val,具体可看下面代码解析注释
parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
#device:测试的设备,cpu;0(表示一个gpu设备cuda:0);0,1,2,3(多个gpu设备)
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
#single-cls:数据集是否只有一个类别,默认False
parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
#augment:测试时是否使用TTA(Test Time Augmentation),默认False
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--merge', action='store_true', help='use Merge NMS')
#verbose:是否打印出每个类别的mAP,默认False
parser.add_argument('--verbose', action='store_true', help='report mAP by class')
#采用parser.parse_args函数获取解析的的参数
opt = parser.parse_args()
#设置参数save_json
opt.save_json = opt.save_json or opt.data.endswith('coco.yaml')
#check_file检查文件是否存在
opt.data = check_file(opt.data) # check file
print(opt)
# task in ['val', 'test']时就正常测试验证集,测试集
if opt.task in ['val', 'test']: # (默认)正常运行
test(opt.data,
opt.weights,
opt.batch_size,
opt.img_size,
opt.conf_thres,
opt.iou_thres,
opt.save_json,
opt.single_cls,
opt.augment,
opt.verbose)
#task =='study'时,就评估yolov5和yolov3-spp 各个模型在各个尺寸下的指标并可视化
elif opt.task == 'study': # run over a range of settings and save/plot
for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem) # filename to save to
x = list(range(352, 832, 64)) # x axis
y = [] # y axis
for i in x: # img-size
print('\nRunning %s point %s...' % (f, i))
r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json)
y.append(r + t) # results and times
np.savetxt(f, y, fmt='%10.4g') # save
os.system('zip -r study.zip study_*.txt')
# plot_study_txt(f, x) # plot
train.py
import argparse
import glob
import logging
import math
import os
import random
import shutil
import time
from pathlib import Path
import numpy as np
import torch.distributed as dist
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import torch.utils.data
import yaml
from torch.cuda import amp
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
import test # import test.py to get mAP after each epoch
from models.yolo import Model
from utils.datasets_robust import create_dataloader
from utils.general import (
torch_distributed_zero_first, labels_to_class_weights, plot_labels, check_anchors, labels_to_image_weights,
compute_loss, plot_images, fitness, strip_optimizer, plot_results, get_latest_run, check_dataset, check_file,
check_git_status, check_img_size, increment_dir, print_mutation, plot_evolution, set_logging)
from utils.google_utils import attempt_download
from utils.torch_utils import init_seeds, ModelEMA, select_device, intersect_dicts
# from models.spinenet_yolo import YoloV5
# from models.yolov6 import YoloV6
from models.spinenet_yolo import YoloV6
logger = logging.getLogger(__name__)
def train(hyp, opt, device, tb_writer=None):
logger.info(f'Hyperparameters {hyp}')
#获取记录训练日志的路径
#训练日志包括:权重、tensorboard文件、超参数hyp、设置的训练参数opt(也就是epochs,batch_size等),result.txt
#result.txt包括:占GPU内存、训练集的box loss,obiectness loss, classification loss,总loss,
#targets的数量,输入图片分辨率,准确率TP/(TP+FP),召回率TP/P ;
#验证集(测试集)的mAP50,mAP@0.5:0.95,box loss, objectness loss, classification loss.
#还会保存batch<3(前三个batch)的ground truth
#如果设置进化算法则不会传入tb_writer.log_dir(则为None),设置一个evolve文件夹作为日志目录
log_dir = Path(tb_writer.log_dir) if tb_writer else Path(opt.logdir) / 'evolve' # logging directory
#设置保存权重的路径
wdir = log_dir / 'weights' # weights directory
os.makedirs(wdir, exist_ok=True)
last = wdir / 'last.pt'
best = wdir / 'best.pt'
best_f32 = wdir / 'best_float32.pt'
#设置保存results的路径
results_file = str(log_dir / 'results.txt')
#获取轮次,批次,总批次(设计到分布式训练),权重,进程序号(主要用于分布式训练)
epochs, batch_size, total_batch_size, weights, rank = \
opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank
# Save run settings
#保存hyp和opt
with open(log_dir / 'hyp.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
#torch_distributed_zero_first同步所有进程
#check_datase检查数据集,如果没照到数据集则下载数据集(适用于项目中自带的yaml文件数据集)
with open(log_dir / 'opt.yaml', 'w') as f:
yaml.dump(vars(opt), f, sort_keys=False)
# Configure
cuda = device.type != 'cpu'
init_seeds(2 + rank) #设置随机种子
#加载数据配置信息
with open(opt.data) as f:
data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict
with torch_distributed_zero_first(rank):
check_dataset(data_dict) # check
#获取类别数量和类别名字
#如果设置了opt.single_cls别为一类
train_path = data_dict['train']
test_path = data_dict['val']
nc, names = (1, ['item']) if opt.single_cls else (int(data_dict['nc']), data_dict['names']) # number classes, names
assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check
# Model
# pretrained = weights.endswith('.pt')
pretrained=False
if pretrained:
#加载模型,从google云盘中自动下载模型
#但通常会下载失败,建议提前下载下来放进weights目录
with torch_distributed_zero_first(rank):
attempt_download(weights) # download if not found locally
ckpt = torch.load(weights, map_location=device) # load checkpoint
if hyp.get('anchors'):
#加载模型及参数
ckpt['model'].yaml['anchors'] = round(hyp['anchors']) # force autoanchor
# model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create
#这里模型创建,可通过opt.cfg,也可通过ckpt['model'].yaml
#这里的区别在于是否是resume,resume时会将opt.cfg设为空,
#则按照ckpt['model'].yaml创建模型;
#这也影响着下面是否除去anchor的key(也就是不加载anchor),如果resume则不加载anchor
#主要是因为保存的模型会保存anchors,有时候用户自定义了anchor之后,再resume,则原来基于coco数据集的anchor就会覆盖自己设定的ancho
#参考https://github.com/ultralytics/yolov5/issues/459
#所以下面设置了intersect_dicts,该函数就是忽略掉exclude
model = YoloV6(opt.data or ckpt['model'].yaml, ch=3).to(device) # create
exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [] # exclude keys
state_dict = ckpt['model'].float().state_dict() # to FP32
state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect
model.load_state_dict(state_dict, strict=False) # load
#显示加载预训练权重的键值对创建模型的键值对
#如果设置了resume,则会少加载两个键值对(anchors,anchor_gird)
logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report
else:
#创建模型,ch为输入图片通道
model = YoloV6(opt.data or 'yolov5s.yaml', ch=3).to(device) # create
# Freeze
#冻结模型层,设置冻结层名字即可,
#具体可以查看https://github.com/ultralytics/yolov5/issues/679
#其实这里只是给一个freeze的示例:
#You can add any parameters you want to this list, with full or partial names,
#to freeze them before training starts.
#This code freezes all weights, leaving only biases with active qradients:
freeze = ['', ] # parameter names to freeze (full or partial)
if any(freeze):
for k, v in model.named_parameters():
if any(x in k for x in freeze):
print('freezing %s' % k)
v.requires_grad = False #不进行梯度计算
# Optimizer
#nbs为标称的batch_size;
#比如默认的话上面设置的opt.batch_size为16,nbs为64,
#则模型梯度累积了64/16=4(accumulate)次之后
#再更新一次模型,变相的扩大了batch_size
nbs = 64 # nominal batch size
accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing
#根据accumulate设置权重衰减系数
hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay
pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
#将模型分成三组(weight,bias,其他所有参数)进行优化
for k, v in model.named_parameters():
v.requires_grad = True
if '.bias' in k:
pg2.append(v) # biases
elif '.weight' in k and '.bn' not in k:
pg1.append(v) # apply weight decay
else:
pg0.append(v) # all else
#选用优化器,并设置pg0组的优化方式
if opt.adam:
optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
else:
optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
#设置weight的优化方式
optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
#设置biases的优化方式
optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
#打印优化信息
logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
del pg0, pg1, pg2
#设置学习率半衰减,这里为余弦退火方式进行衰减
#就是根据一下公式lf,epoch和超参数hyp['lrf']进行衰减
# Scheduler https://arxiv.org/pdf/1812.01187.pdf
# https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp['lrf']) + hyp['lrf'] # cosine
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
# plot_lr_scheduler(optimizer, scheduler, epochs)
# Resume断点续训
#初始化开始训练的epoch和最好的结果
#best_fitness是以[0.0,0.0,0.1,0.9]为系数并乘以[精确度,召回率,mAP@0.5,mAP@0.5:0.95]再求和所得
#根据best_fitness来保存best.pt
start_epoch, best_fitness = 0, 0.0
if pretrained:
# Optimizer
if ckpt['optimizer'] is not None:
optimizer.load_state_dict(ckpt['optimizer'])
best_fitness = ckpt['best_fitness']
# Results
#加载训练结果result.txt
if ckpt.get('training_results') is not None:
with open(results_file, 'w') as file:
file.write(ckpt['training_results']) # write results.txt
# Epochs
start_epoch = ckpt['epoch'] + 1
#如果resume,则备份权重
#尽管目前resume能够近似100%成功起作用,参照:https://github.com/ultralytics/yolov5/pull/756
#但为了防止resume时出现其他问题导致把之前的权重覆盖了,在这里进行备份,参照:https://qithub.com/ultralytics/yolov5/pull/765
if opt.resume:
assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs)
shutil.copytree(wdir, wdir.parent / f'weights_backup_epoch{start_epoch - 1}') # save previous weights
#如果新设置epochs小手加载的epoch,
#则视新设置的epochs为需要再训练的轮次数而不再是总的轮次数
if epochs < start_epoch:
logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
(weights, ckpt['epoch'], epochs))
epochs += ckpt['epoch'] # finetune additional epochs
del ckpt, state_dict
# Image sizes
#获取模型总部长和模型输入图片分辨率
gs = int(max(model.stride)) # grid size (max stride)
#检查输入图片分辨率确保能够整除步长为gs
imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples
print('imgsz: {}, imgsz_test: {}'.format(imgsz, imgsz_test))
# DP mode
#分布式训练,参照:https://github.com/ultralytics/yolov5/issues/475
#DataParallel模式,仅支持单机多卡
#rank为进程编号,如果设置为rank=-1并且有多块gpu,则使用DataParallel模式
#rank=-1且gpu数量=1时,不会进行分布式
if cuda and rank == -1 and torch.cuda.device_count() > 1:
model = torch.nn.DataParallel(model)
# SyncBatchNorm
if opt.sync_bn and cuda and rank != -1:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
logger.info('Using SyncBatchNorm()')
# Exponential moving average
ema = ModelEMA(model) if rank in [-1, 0] else None
# DDP mode
if cuda and rank != -1:
model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank)
# Trainloader
dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,
world_size=opt.world_size, workers=opt.workers)
mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class
nb = len(dataloader) # number of batches
assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)
# Process 0
if rank in [-1, 0]:
ema.updates = start_epoch * nb // accumulate # set EMA updates
testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt,
hyp=hyp, augment=False, cache=opt.cache_images, rect=True, rank=-1,
world_size=opt.world_size, workers=opt.workers)[0] # testloader
if not opt.resume:
labels = np.concatenate(dataset.labels, 0)
c = torch.tensor(labels[:, 0]) # classes
# cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency
# model._initialize_biases(cf.to(device))
plot_labels(labels, save_dir=log_dir)
if tb_writer:
# tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384
tb_writer.add_histogram('classes', c, 0)
# Anchors
if not opt.noautoanchor:
check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
# Model parameters
hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
model.nc = nc # attach number of classes to model
model.hyp = hyp # attach hyperparameters to model
model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou)
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
model.names = names
# Start training
t0 = time.time()
nw = max(3 * nb, 1e3) # number of warmup iterations, max(3 epochs, 1k iterations)
# nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
maps = np.zeros(nc) # mAP per class
results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
scheduler.last_epoch = start_epoch - 1 # do not move
scaler = amp.GradScaler(enabled=cuda)
logger.info('Image sizes %g train, %g test\nUsing %g dataloader workers\nLogging results to %s\n'
'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs))
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
model.train()
# Update image weights (optional)
if opt.image_weights:
# Generate indices
if rank in [-1, 0]:
cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights
iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights
dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx
# Broadcast if DDP
if rank != -1:
indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int()
dist.broadcast(indices, 0)
if rank != 0:
dataset.indices = indices.cpu().numpy()
# Update mosaic border
# b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
# dataset.mosaic_border = [b - imgsz, -b] # height, width borders
mloss = torch.zeros(4, device=device) # mean losses
if rank != -1:
dataloader.sampler.set_epoch(epoch)
pbar = enumerate(dataloader)
logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
if rank in [-1, 0]:
pbar = tqdm(pbar, total=nb) # progress bar
optimizer.zero_grad()
for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
ni = i + nb * epoch # number integrated batches (since train start)
imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0
# Warmup
if ni <= nw:
xi = [0, nw] # x interp
# model.gr = np.interp(ni, xi, [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou)
accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
for j, x in enumerate(optimizer.param_groups):
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
x['lr'] = np.interp(ni, xi, [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
if 'momentum' in x:
x['momentum'] = np.interp(ni, xi, [0.9, hyp['momentum']])
# Multi-scale
if opt.multi_scale:
sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
sf = sz / max(imgs.shape[2:]) # scale factor
if sf != 1:
ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
# Forward
with amp.autocast(enabled=cuda):
pred = model(imgs) # forward
loss, loss_items = compute_loss(pred, targets.to(device), model) # loss scaled by batch_size
if rank != -1:
loss *= opt.world_size # gradient averaged between devices in DDP mode
# Backward
scaler.scale(loss).backward()
# Optimize
if ni % accumulate == 0:
scaler.step(optimizer) # optimizer.step
scaler.update()
optimizer.zero_grad()
if ema:
ema.update(model)
if rank in [-1, 0]:
mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB)
s = ('%10s' * 2 + '%10.4g' * 6) % (
'%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
pbar.set_description(s)
# Plot
if ni < 3:
f = str(log_dir / ('train_batch%g.jpg' % ni)) # filename
result = plot_images(images=imgs, targets=targets, paths=paths, fname=f)
if tb_writer and result is not None:
tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
# tb_writer.add_graph(model, imgs) # add model to tensorboard
# end batch ------------------------------------------------------------------------------------------------
# Scheduler
lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard
scheduler.step()
# DDP process 0 or single-GPU
if rank in [-1, 0]:
# mAP
if ema:
ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride'])
final_epoch = epoch + 1 == epochs
if not opt.notest or final_epoch: # Calculate mAP
if final_epoch: # replot predictions
[os.remove(x) for x in glob.glob(str(log_dir / 'test_batch*_pred.jpg')) if os.path.exists(x)]
results, maps, times = test.test(opt.data,
batch_size=total_batch_size,
imgsz=imgsz_test,
model=ema.ema,
single_cls=opt.single_cls,
dataloader=testloader,
save_dir=log_dir)
# Write
with open(results_file, 'a') as f:
f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
if len(opt.name) and opt.bucket:
os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))
# Tensorboard
if tb_writer:
tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', # train loss
'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
'val/giou_loss', 'val/obj_loss', 'val/cls_loss', # val loss
'x/lr0', 'x/lr1', 'x/lr2'] # params
for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
tb_writer.add_scalar(tag, x, epoch)
# Update best mAP
fi = fitness(np.array(results).reshape(1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1]
if fi > best_fitness:
best_fitness = fi
# Save model
save = (not opt.nosave) or (final_epoch and not opt.evolve)
if save:
with open(results_file, 'r') as f: # create checkpoint
ckpt = {'epoch': epoch,
'best_fitness': best_fitness,
'training_results': f.read(),
'model': ema.ema,
'optimizer': None if final_epoch else optimizer.state_dict()}
# Save last, best and delete
torch.save(ckpt, last)
if best_fitness == fi:
torch.save(ckpt, best)
torch.save({'model': model.float()}, best_f32)
del ckpt
# end epoch ----------------------------------------------------------------------------------------------------
# end training
if rank in [-1, 0]:
# Strip optimizers
n = opt.name if opt.name.isnumeric() else ''
fresults, flast, fbest = log_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt'
for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]):
if os.path.exists(f1):
os.rename(f1, f2) # rename
if str(f2).endswith('.pt'): # is *.pt
strip_optimizer(f2) # strip optimizer
os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload
# Finish
if not opt.evolve:
plot_results(save_dir=log_dir) # save as results.png
logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
dist.destroy_process_group() if rank not in [-1, 0] else None
torch.cuda.empty_cache()
return results
if __name__ == '__main__':
parser = argparse.ArgumentParser() #建立参数解析对象parser
#weights:加载的权重文件
parser.add_argument('--weights', type=str, help='initial weights path')
#cfg:模型配置文件,网络结构
parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
#data: 数据集配置文件,数据集路径,类名等
parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
#hyp:超参数文件
parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path')
#epochs:训练总轮次
parser.add_argument('--epochs', type=int, default=300)
#batch-size: 批次大小
parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')
#img-size:输入图片分辨率大小
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes')
#rect:是否采用矩形训练,默认False
parser.add_argument('--rect', action='store_true', help='rectangular training')
#resume:断点续训(从上次打断训练的结果接着训练)
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
#nosave:不保存模型,默认False
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
#notest:不进行test,默认False
parser.add_argument('--notest', action='store_true', help='only test final epoch')
#noautoanchor:不自动调整anchor,默认False
parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
#evolve: 是否进行超参数进化,默认False
parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
#bucket:谷歌云盘bucket,一般不会用到
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
#cache-images: 是否提前缓存图片到内存,以加快训练速度,默认False
parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
#weights:加载的权重文件
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
#name: 数据集名字,如果设置:results.txt to results_name.txt,默认无
parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
#device: 训练的设备,cpu;0(表示一个gpu设备cuda:0);0,1,2,3(多个gpu设备)
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
#multi-scale: 是否进行多尺度训练,默认False
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
#single-cls:数据集是否只有一个类别,默认False
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
#adam: 是否使用adam优化器
parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
#sync-bn:是否使用跨卡同步BN,在DDP模式使用
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
#local_rank:gpu编号
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
#logdir: 存放日志的目录
parser.add_argument('--logdir', type=str, default='runs/', help='logging directory')
#workers:dataloader的最大worker数量
parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
opt = parser.parse_args()
# 设置DDP模式的参数
#word_size:表示全局进程个数
#glpbal_rank:进程文件
#opt参数解析:
opt.total_batch_size = opt.batch_size
opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1
set_logging(opt.global_rank)
if opt.global_rank in [-1, 0]:
check_git_status()#检查你的代码版本是否是最新的(不适用于windows系统)
# Resume
if opt.resume: # resume an interrupted run
#如果resume时str,则表示传入的时模型的路径地址
#get_latest_run()函数获取runs文件夹中最近的last.pt
ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path
log_dir = Path(ckpt).parent.parent # runs/exp0
assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
#opt参数也全部替换
with open(log_dir / 'opt.yaml') as f:
opt = argparse.Namespace(**yaml.load(f, Loader=yaml.FullLoader)) # replace
#opt.cfg设置为''对应train函数里面的操作(加载权重的时候是否加载权重里的anchor)
opt.cfg, opt.weights, opt.resume = '', ckpt, True
logger.info('Resuming training from %s' % ckpt)
else:
# opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
# 检查配置文件信息
opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp) # check files
opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test)
#根据opt.logdir生成目录
log_dir = increment_dir(Path(opt.logdir) / 'exp', opt.name) # runs/exp1
device = select_device(opt.device, batch_size=opt.batch_size)
# DDP mode
#选择设备
if opt.local_rank != -1:
assert torch.cuda.device_count() > opt.local_rank
torch.cuda.set_device(opt.local_rank)
device = torch.device('cuda', opt.local_rank)#根据gpu编号选择设备
#初始化进程组
dist.init_process_group(backend='nccl', init_method='env://') # distributed backend
assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'
#将总批次按照进程数分配给各个gpu
opt.batch_size = opt.total_batch_size // opt.world_size
#打印opt参数信息
logger.info(opt)
#Hyperparameters 加载参数列表
with open(opt.hyp) as f:
hyp = yaml.load(f, Loader=yaml.FullLoader) # load hyps
# Train
#如果不进行超参数进化,则直接调用train()函数,开始训练
if not opt.evolve:
tb_writer = None
if opt.global_rank in [-1, 0]:
#创建tensorboard
logger.info('Start Tensorboard with "tensorboard --logdir %s", view at http://localhost:6006/' % opt.logdir)
tb_writer = SummaryWriter(log_dir=log_dir) # runs/exp0
train(hyp, opt, device, tb_writer)
# Evolve hyperparameters (optional)
else:
# Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
#超参数进化列表,括号里分别为(突变规模,最小值,最大值)
meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
'momentum': (0.1, 0.6, 0.98), # SGD momentum/Adam beta1
'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
'giou': (1, 0.02, 0.2), # GIoU loss gain
'cls': (1, 0.2, 4.0), # cls loss gain
'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight
'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels)
'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
'iou_t': (0, 0.1, 0.7), # IoU training threshold
'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
'anchors': (1, 2.0, 10.0), # anchors per output grid (0 to ignore)
'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction)
'degrees': (1, 0.0, 45.0), # image rotation (+/- deg)
'translate': (1, 0.0, 0.9), # image translation (+/- fraction)
'scale': (1, 0.0, 0.9), # image scale (+/- gain)
'shear': (1, 0.0, 10.0), # image shear (+/- deg)
'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
'mixup': (1, 0.0, 1.0)} # image mixup (probability)
assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
opt.notest, opt.nosave = True, True # only test/save final epoch
# ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
yaml_file = Path('runs/evolve/hyp_evolved.yaml') # save best result here
if opt.bucket:
os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
#这里的进化算法是:根据之前训练时的hyp来确定一个base hyp再进行突变;
#具体方法:
#通过之前每次进化得到的results来确定之前每个hyp的权重
#有了每个hyp和每个hyp的权重之后有两种进化方式;
#1.single:根据每个hyd的权重随机选择一个之前的hvp作为base hvp.random.choices(range(n). weiahts=w)
#2.weighted: 根据每个hyp的权重对之前所有的hyp进行融合获得一个base hyp.(x *w.reshape(n,1)).sum(0)/ w.sum()
#evolve.txt会记录每次进化之后的results+hyp
#每次进化时,hyp会根据之前的results进行从大到小的排序;
#再根据fitness函数计算之前每次进化得到的hyp的权重
#再确定哪一种进化方式,从而进行进化
for _ in range(1): # generations to evolve
if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate
# Select parent(s)
#选择进化方式
parent = 'single' # parent selection method: 'single' or 'weighted'
#加载evolve.txt
x = np.loadtxt('evolve.txt', ndmin=2)
#选取之所前五次进化的结果
n = min(5, len(x)) # number of previous results to consider
x = x[np.argsort(-fitness(x))][:n] # top n mutations
#根据results计算hyp的权重
w = fitness(x) - fitness(x).min() # weights
#根据不同进化方式获得 base hyp
if parent == 'single' or len(x) == 1:
# x = x[random.randint(0, n - 1)] # random selection
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
elif parent == 'weighted':
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
# Mutate
#超参数进化
mp, s = 0.9, 0.2 # mutation probability, sigma
npr = np.random
npr.seed(int(time.time()))
#获取突变初始值
g = np.array([x[0] for x in meta.values()]) # gains 0-1
ng = len(meta)
v = np.ones(ng)
#设置突变
while all(v == 1): # mutate until a change occurs (prevent duplicates)
v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
#将突变添加到base hyp上
#[i+7]是因为x中前七个数字为results的指标(P,R,mAP,Fi,test_losser=(box.obj,cls)),之后才时超参数hyp
for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300)
hyp[k] = float(x[i + 7] * v[i]) # mutate
# Constrain to limits
#修建hyp在规定范围里
for k, v in meta.items():
hyp[k] = max(hyp[k], v[1]) # lower limit
hyp[k] = min(hyp[k], v[2]) # upper limit
hyp[k] = round(hyp[k], 5) # significant digits
# Train mutation
#训练
results = train(hyp.copy(), opt, device)
# Write mutation results
#写入results和对应的hyp到evolve.txt
#evolve.txt文件每一行为一次进化的结果
#一行中前七个数字为(P,R,mAP,F1,test_losses=(GIoU,obj,cls)),之后为hyp
#保存hyp到yaml文件
print_mutation(hyp.copy(), results, yaml_file, opt.bucket)
# Plot results
plot_evolution(yaml_file)
print('Hyperparameter evolution complete. Best results saved as: %s\nCommand to train a new model with these '
'hyperparameters: $ python train.py --hyp %s' % (yaml_file, yaml_file))
- 点赞
- 收藏
- 关注作者
评论(0)