- 微信
- 微博
  
  分享文章到微博
- 复制链接
  
  复制链接到剪贴板

基于YOLOv5车牌检测源代码解释

凉城予梦发表于 2022/10/13 13:05:57 2022/10/13

【摘要】 detect.pyimport argparse #python的命令解析的模块，内置于python，不需要安装import torch.backends.cudnn as cudnn from models.experimental import *from utils.datasets import *from utils.utils import *from models.LPRN...

detect.py

import argparse #python的命令解析的模块，内置于python，不需要安装

import torch.backends.cudnn as cudnn

from models.experimental import *

from utils.datasets import *

from utils.utils import *

from models.LPRNet import *

def detect(save_img=False):

#获取out，source, weights, view_img, save_txt, imgsz = \

out, source, weights, view_img, save_txt, imgsz = \

opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size

webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

device = torch_utils.select_device(opt.device) #获取设备

if os.path.exists(out):

shutil.rmtree(out) # 删除文件夹

os.makedirs(out) # 创建新输出文件夹

half = device.type != 'cpu' #半精度（16位浮点来支持）

#加载Float32模型，确保用户设定的输入图片分辨率能整除32（如果蹦则调整为能整除并返回）

model = attempt_load(weights, map_location=device)

imgsz = check_img_size(imgsz, s=model.stride.max()) # 进行检查，这里设置的是640可以被整除。

if half:

model.half() # 变成半精度浮点数16，提高速度

#设置第二级分类，默认不使用

classify = True

if classify:

# modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize

# modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights

modelc = LPRNet(lpr_max_len=8, phase=False, class_num=len(CHARS), dropout_rate=0).to(device)

modelc.load_state_dict(torch.load('./weights/Final_LPRNet_model.pth', map_location=torch.device('cpu')))

print("load pretrained model successful!")

modelc.to(device).eval()

#通过不同的输入源来设置不同的数据加载方式

vid_path, vid_writer = None, None

if webcam:

view_img = True

cudnn.benchmark = True # 设置为True以加快恒定图像大小推断

dataset = LoadStreams(source, img_size=imgsz)

else:

save_img = True

dataset = LoadImages(source, img_size=imgsz)#加载图片或视频

# 获取类别名字字符串列表

names = model.module.names if hasattr(model, 'module') else model.names

#设置画框的颜色（RGB(列表)的列表）

colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

# 运行判断

t0 = time.time()

#进行一次前向推理，测试程序是否正常

img = torch.zeros((1, 3, imgsz, imgsz), device=device) # 进行初始化

_ = model(img.half() if half else img) if device.type != 'cpu' else None # 返回

#path 图像/视频路径

#img进行resize+pad之后的图片，如（3，640，512）格式（c，h，w）

#img0s 原size图片，如（1080，810，3）

#vid_cap 当读取图片时为None，读取视频为视频源

for path, img, im0s, vid_cap in dataset:

img = torch.from_numpy(img).to(device)

img = img.half() if half else img.float() #图片也设置为16为浮点

img /= 255.0 #除以255 0 - 255 变成to 0.0 - 1.0

#没有 bath_size时，在最前面添加一个轴

if img.ndimension() == 3:

img = img.unsqueeze(0) #改变了一下添加维度（例如（3，384，640）转变为（1，3，384，640）这里面添加的维度是1）

# Inference，制造一个事件

t1 = torch_utils.time_synchronized()

#前向传播返回pred的shape是(1，num_boxes，5+num_class)

#h,w为传入网络图片的高和宽。注意dataset在检测时使用了矩形推理，所以这里h不一定等于w

#num_boxes =(h/32 *w/32+h/16*w/16+h/8*w/8)*3

#例如:图片大小720.1280-->15120个boxes =(20*12 +40*24 +80*48=5040)*3

#pred[...，0:4]为预测框坐标;预测框坐标为xywh(中心点+宽高)格式

#pred[....4]为objectness置信度

#pred[...，5:-1]为分类概率结果

pred = model(img, augment=opt.augment)[0]

print(pred.shape)

#进行 NMS

#pred:前向传播的输出

#confthres:置信度阈值

#iou_thres:iou阈值

#classes:是否只保留特定的类别

#agnostic:进行nms是否也去除不同类别之间的框

#经过nms之后，预测框格式:xywh-->xyxy(左上角右下角)

#pred是一个列表list[torch.tensor]，长度为NMS后的目标框的个数

#每一个torch.tensor的shape为(num_boxes，6),内容为box(4个值)+conf+cls

pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)

t2 = torch_utils.time_synchronized()

#添加二次分类，默认不使用

if classify:

pred,plat_num = apply_classifier(pred, modelc, img, im0s)

for i, det in enumerate(pred): #对每一张图片做处理

if webcam: #如果输入源是webcam，则batch_size不为1，取出dataset中的一张图片

p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()

else:

p, s, im0 = path, '', im0s

save_path = str(Path(out) / Path(p).name) #设置保存图片或视频的路径，p是原图片路径（含文件名）

txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')#设置保存框坐标txt文件的路径

s += '%gx%g ' % img.shape[2:] # 设置打印信息（图片宽高），s如‘384*640’

gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]

if det is not None and len(det):#判断det是不是none 判断len是不是零

#调整预测框的坐标，基于resize+pad的图片的坐标-->基于原size图片的坐标

#此时坐标格式为xyxy

det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

#打印检测的类别数量

for c in det[:, 5].unique():

n = (det[:, 5] == c).sum() # detections per class

s += '%g %ss, ' % (n, names[int(c)]) # add to string

#保存预测结果

for de,lic_plat in zip(det,plat_num):

# xyxy,conf,cls,lic_plat=de[:4],de[4],de[5],de[6:]

*xyxy, conf, cls=de

if save_txt: # Write to file

#将xyxy（左上角+右下角）格式转为xywh（中心点+宽高）格式，并除上w，h做归一化，转化为列表再保存

xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()

with open(txt_path + '.txt', 'a') as f:

f.write(('%g ' * 5 + '\n') % (cls, xywh))

#在原图上画框

if save_img or view_img: # Add bbox to image

# label = '%s %.2f' % (names[int(cls)], conf)

lb = ""

for a,i in enumerate(lic_plat):

# if a ==0:

# continue

lb += CHARS[int(i)]

label = '%s %.2f' % (lb, conf)

im0=plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

#打印前向传播+nms时间

print('%sDone. (%.3fs)' % (s, t2 - t1))

# 如果设置展示，则画出图片/视频

if view_img:

cv2.namedWindow("Demo3", cv2.WINDOW_NORMAL)

cv2.resizeWindow("Demo3", 2500, 1000)

cv2.imshow("Demo3", im0)

#if cv2.waitKey(1) == ord('q'): # q to quit #视频

# raise StopIteration #视频

cv2.waitKey()# 图片

# 设置保存图片/视频

if save_img:

if dataset.mode == 'images':

cv2.imwrite(save_path, im0)

else:

if vid_path != save_path: # new video

vid_path = save_path

if isinstance(vid_writer, cv2.VideoWriter):

vid_writer.release() # release previous video writer

fourcc = 'mp4v' # output video codec

fps = vid_cap.get(cv2.CAP_PROP_FPS)

w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))

h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))

vid_writer.write(im0)

if save_txt or save_img:

print('Results saved to %s' % os.getcwd() + os.sep + out)

if platform == 'darwin': # MacOS

os.system('open ' + save_path)

#打印总时间

print('Done. (%.3fs)' % (time.time() - t0))

if __name__ == '__main__':

#建立参数解析对象parser

parser = argparse.ArgumentParser()

# parser.add_argument这个函数是给parser实例，例如'--weights'添加weigths属性

#添加属性:给xx实例增加一个aa属性，如 xx.add_argument("aa")

# nargs - 应该读取的命令行参数个数。*号，表示0或多个参数;+号表示1或多个参数。

Weight:训练的权重

parser.add_argument('--weights', nargs='+', type=str, default='./weights/last.pt', help='model.pt path(s)')

#source：测试数据，可以是图片/视频路径，也可以是‘0’（电脑自带摄像头），也可以是rtsp等视频

parser.add_argument('--source', type=str, default=r'F:\8.jpg', help='source')

# default=r'F:\8.jpg'图片

#default=r'F:\YOLOV\plate-main\inference\images\478289752-1-208.mp4'视频

# default='0'电脑摄像头

# default='1'usb接口摄像头

parser.add_argument('--output', type=str, default='inference/output/', help='output folder') # output folder

#img-size:网络输入图片大小

parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')

#conf-thres:置信度阈值

parser.add_argument('--conf-thres', type=float, default=0.8, help='object confidence threshold')

#iou-thres:做nms的iou阈值

parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')

#device:设置设备

parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')

#view-img:是否展示预测之后的图片/视频，默认False

parser.add_argument('--view-img', action='store_true', help='display results',default=True)

#save-txt:是否将预测的框坐标以txt文件形式保存，默认False

#save-conf:是否将预测的框坐标以txt文件形式保存，默认False

#save-dir: 网络预测之后的图片/视频的保存路径

parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')

#classes:设置只保留某一部分类别，形如0或者0 2 3

parser.add_argument('--classes', nargs='+', type=int, help='filter by class')

#agnostic-nms:进行nms是否也去除不同类别之间的框，默认False

parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')

#augment:推理的时候进行多尺度，翻转等操作(TTA)推理

parser.add_argument('--augment', action='store_true', help='augmented inference')

#update: 如果为True，则对所有模型进行strip_optimizer操作，去除pt文件中的优化器等信息，默认为False

parser.add_argument('--update', action='store_true', help='update all models')

#采用parser对象的parse_args函数获取解析的参数

opt = parser.parse_args()

print(opt)

#一个上下文管理器，被该语句wrap起来的部分将不会track梯度

with torch.no_grad():

if opt.update: # 更新所有模型（以修复SourceChangeWarning）

#去除pt文件中的优化器等信息

for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:

detect()

create_pretrained(opt.weights, opt.weights)

else:

detect()

test.py

import argparse

import json

from models.experimental import *

from utils.datasets import *

def test(data,

weights=None,

batch_size=16,

imgsz=640,

conf_thres=0.001,

iou_thres=0.6, # for NMS

save_json=False,

single_cls=False,

augment=False,

verbose=False,

model=None,

dataloader=None,

save_dir='',

merge=False):

# 判断是否在训练时调用test，如果时则获取训练时的设备

training = model is not None

if training: # called by train.py

device = next(model.parameters()).device # 选择设备

else: # 直接调用

device = torch_utils.select_device(opt.device, batch_size=batch_size)

merge = opt.merge # 使用并合并NMS

# 删除

for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')):

os.remove(f)

# 加载模型

model = attempt_load(weights, map_location=device)

#检查输入图片分辨率是否能被32整除（这里是640）

imgsz = check_img_size(imgsz, s=model.stride.max())

# Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99

# if device.type != 'cpu' and torch.cuda.device_count() > 1:

# model = nn.DataParallel(model)

# Half

#如果设备不是cpu并且gou数且为1，则将模型float32转为float16，提高前向传播速度

half = device.type != 'cpu' and torch.cuda.device_count() == 1 # half precision only supported on single-GPU

if half:

model.half() #GPU上使用FP16推理

print("GPU")

# Configure

#eval（）时。框架会自动吧DropOut固定住，用训练好的值；不启用 BatchNormalization 和 Dropout

model.eval()

#加载数据配置信息

with open(data) as f:

data = yaml.load(f, Loader=yaml.FullLoader) # 字典模型

nc = 1 if single_cls else int(data['nc']) # number of classes

#设置iou赋值，从0.5-0.95，每间隔0.05取一次

#iouv iouu值得列表[0.5，0.55，0.6，.....，0.95]

iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95

iouv = iouv[0].view(1) # comment for mAP@0.5:0.95

niou = iouv.numel()#iou个数=10

# Dataloader

if not training:

#创建一个全0数组测试一i啊前向传播是否正常运行

img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img

_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once

#获取图片路径

path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images

#创建dataloader

#注意这里rect参数为True，yolov5的测试评估时基于举行推理的

dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt,

hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0]

seen = 0 #初始化测试的图片数量

#获取类别的名字

names = model.names if hasattr(model, 'names') else model.module.names

#获取coco数据集的类别索引

#coco数据集由80个类别（索引范围应该为0-79），但是其索引却属于1-90

#coco80_to_coco91_class()就是为了与上述索引对应起来，返回一个范围在9-90的索引数组

coco91class = coco80_to_coco91_class()

#设置tqdm进度条的显示信息

s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')

#初始化指标，时间

p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.

#初始化测试集的损失

loss = torch.zeros(3, device=device)

#初始化json文件的字典，统计信息，ap

jdict, stats, ap, ap_class = [], [], [], []

for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):

img = img.to(device)

#图片也由FLOAT32->Float16

img = img.half() if half else img.float() # uint8 to fp16/32

img /= 255.0 # 0 - 255 to 0.0 - 1.0

targets = targets.to(device)

nb, _, height, width = img.shape # batch size, channels, height, width

whwh = torch.Tensor([width, height, width, height]).to(device)

# Disable gradients

with torch.no_grad():

# Run model

#time_synchronized()函数里面进行了torch.cuda.synchronize()在返回的time.time()

#torch.cuda.synchronize()等待gpu上完成所有的工作，这样测试时间会更精准

t = torch_utils.time_synchronized()

#前向传播，inf_out为预测结果，train_out训练结果

inf_out, train_out = model(img, augment=augment) # inference and training outputs

#t0积累前向传播的时间

t0 += torch_utils.time_synchronized() - t

# Compute loss

#如果在训练时进行的test，则通过训练结果计算并返回测试集的box，obj，cls损失

if training: # if model has loss hyperparameters

loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # GIoU, obj, cls

# Run NMS

#t1累计后处理nms的时间

t = torch_utils.time_synchronized()

#non_max_suppression进行非极大值控制：

#conf_thres为置信度阈值。iou_thres为iou阈值，marge是否开框

output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge)

t1 += torch_utils.time_synchronized() - t

# Statistics per image

#为每一张图片做统计，写入与猜测信息到txt文件，生成json文件字典，统计tp等

for si, pred in enumerate(output):

#获取低si张图片的标签在信息，包括class，x，y，w，h

labels = targets[targets[:, 0] == si, 1:]

nl = len(labels)

tcls = labels[:, 0].tolist() if nl else [] # 获取标签类别

seen += 1 #统计测试图片数量

#如果预测为空，则添加空的信息到stats里

if pred is None:

if nl:

stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))

continue

# 保存测试结果txt文件

# with open('test.txt', 'a') as file:

# [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]

# 修正预测坐标到图片内部

clip_coords(pred, (height, width))

# 保存coco格式的josn文件字典

if save_json:

# [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...

#coco格式json文件大概包含信息如上

#获取图片id

image_id = int(Path(paths[si]).stem.split('_')[-1])

#获取框坐标信息

box = pred[:, :4].clone() # xyxy

#将框调整为基于原图大小的

scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape

#转换为xywh格式

box = xyxy2xywh(box) # xywh

#注意，之前所说的xyxy格式左上角右下角的坐标，xywh时中心坐标和宽高

#而coco的json格式中的框u欧标格式为xywh，此处的xy为左上角坐标

#也就是coco的json的格式的坐标的格式为，左上角坐宽高

#所以下面一行代码就是将：中心点坐标->左上角

box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner

#image_id：图片id，即属于那张图

#categorv_id:类别，coco91class（）从索引0——79映射到索引0-90

#bbox：框的坐标

#score：置信度得分

for p, b in zip(pred.tolist(), box.tolist()):

jdict.append({'image_id': image_id,

'category_id': coco91class[int(p[5])],

'bbox': [round(x, 3) for x in b],

'score': round(p[4], 5)})

# Assign all predictions as incorrect

#初始化测试评定，niou为iou阈值的个数

correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)

if nl:

detected = [] # detected用来存放已检测的目标

tcls_tensor = labels[:, 0]

# target boxes

#获得xyxy格式的框并乘以wh

tbox = xywh2xyxy(labels[:, 1:5]) * whwh

# Per target class

#对图片中的每个类单独处理

for cls in torch.unique(tcls_tensor):

ti = (cls == tcls_tensor).nonzero().view(-1) # 标签框改类别的索引

pi = (cls == pred[:, 5]).nonzero().view(-1) # 预测框该类别的索引

# Search for detections

if pi.shape[0]:

# Prediction to target ious

#box_iou计算预测框于标签的iou值，max（1)选出最大的ious值，i为对应suoyin

#pred shape[N,4]

#tbox shape[M,4]

#box_iou shape[N,M]

#ious shape[N,1]

#i shape[N,1],i里的值属于0-《

ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices

# Append detections

for j in (ious > iouv[0]).nonzero():

d = ti[i[j]] # 获得检测的目标

if d not in detected:

detected.append(d)#添加d到detected

#iouv为0.05为步长，0.5-0.95列表

#获得不同的iou阈值下的true positive

correct[pi[j]] = ious[j]>iouv # iou_thres is 1xn

if len(detected) == nl: # all targets already located in image

break

# Append statistics (correct, conf, pcls, tcls)

#每张图片的结果统计到stats里

stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

# Plot images

#画出1个batch的图片的groud truth和预测框并保存

if batch_i < 1:

f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename

plot_images(img, targets, paths, str(f), names) # ground truth

f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i)

plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions

# Compute statistics

#将stats列表的信息拼接到一起

stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy

if len(stats):

#根据上面得到的TP等信息计算指标

#精准度=Tp/Tp+FP，找回=TP/P，map，fi分数，类别ao

p, r, ap, f1, ap_class = ap_per_class(*stats)

p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95]

mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()

#nt是一个列表，测试集每一个类别由多少目标框

nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class

else:

nt = torch.zeros(1)

# Print results

#打印指标结果

pf = '%20s' + '%12.3g' * 6 # print format

print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

# Print results per class

#细节展示每一个类别的指标

if verbose and nc > 1 and len(stats):

for i, c in enumerate(ap_class):

print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

# Print speeds

#打印前向传播耗费时间，nms时间，总时间

t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple

if not training:

print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)

# Save JSON

#采用之前保存的json格式预测结果，通过coccapi估平指标

#需要注意的时测试集的标签也需要转成coco的json的格式

if save_json and map50 and len(jdict):

imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataloader.dataset.img_files]

f = 'detections_val2017_%s_results.json' % \

(weights.split(os.sep)[-1].replace('.pt', '') if isinstance(weights, str) else '') # filename

print('\nCOCO mAP with pycocotools... saving %s...' % f)

with open(f, 'w') as file:

json.dump(jdict, file)

try:

from pycocotools.coco import COCO

from pycocotools.cocoeval import COCOeval

# https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb

cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0]) # initialize COCO ground truth api

cocoDt = cocoGt.loadRes(f) # initialize COCO pred api

cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')

cocoEval.params.imgIds = imgIds # image IDs to evaluate

cocoEval.evaluate()

cocoEval.accumulate()

cocoEval.summarize()

map, map50 = cocoEval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5)

except:

print('WARNING: pycocotools must be installed with numpy==1.17 to run correctly. '

'See https://github.com/cocodataset/cocoapi/issues/356')

# Return results

#返回测试指标结果

model.float() # for training

maps = np.zeros(nc) + map

for i, c in enumerate(ap_class):

maps[c] = ap[i]

return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t

if __name__ == '__main__':

parser = argparse.ArgumentParser(prog='test.py')

# 添加属性:给xx实例增加一个aa属性，如 xx.add_argument("aa")

#weights:测试的模型权重文件

parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')

#data:数据集配置文件，数据集路径

parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')

#batch-size:前向传播时的批次，默认32

parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')

#img-size:输入图片分辨率大小，默认64日

parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')

#conf-thres:筛选框的时候的置信度阈值，默认0.001

parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')

#iou-thres:进行NMS的时候的IOU阈值，默认0.65

parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS')

#save-json:是否按照coco的json格式保存预测框，并且使用cocoapi做评估(需要同样coco的json格式的标签)，默认False

parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')

#task:设置测试形式，默认val，具体可看下面代码解析注释

parser.add_argument('--task', default='val', help="'val', 'test', 'study'")

#device:测试的设备，cpu;0(表示一个gpu设备cuda:0);0,1,2,3(多个gpu设备)

parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')

#single-cls:数据集是否只有一个类别，默认False

parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')

#augment:测试时是否使用TTA(Test Time Augmentation)，默认False

parser.add_argument('--augment', action='store_true', help='augmented inference')

parser.add_argument('--merge', action='store_true', help='use Merge NMS')

#verbose:是否打印出每个类别的mAP，默认False

parser.add_argument('--verbose', action='store_true', help='report mAP by class')

#采用parser.parse_args函数获取解析的的参数

opt = parser.parse_args()

#设置参数save_json

opt.save_json = opt.save_json or opt.data.endswith('coco.yaml')

#check_file检查文件是否存在

opt.data = check_file(opt.data) # check file

print(opt)

# task in ['val', 'test']时就正常测试验证集，测试集

if opt.task in ['val', 'test']: # （默认）正常运行

test(opt.data,

opt.weights,

opt.batch_size,

opt.img_size,

opt.conf_thres,

opt.iou_thres,

opt.save_json,

opt.single_cls,

opt.augment,

opt.verbose)

#task =='study'时，就评估yolov5和yolov3-spp 各个模型在各个尺寸下的指标并可视化

elif opt.task == 'study': # run over a range of settings and save/plot

for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:

f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem) # filename to save to

x = list(range(352, 832, 64)) # x axis

y = [] # y axis

for i in x: # img-size

print('\nRunning %s point %s...' % (f, i))

r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json)

y.append(r + t) # results and times

np.savetxt(f, y, fmt='%10.4g') # save

os.system('zip -r study.zip study_*.txt')

# plot_study_txt(f, x) # plot

train.py

import argparse

import glob

import logging

import math

import os

import random

import shutil

import time

from pathlib import Path

import numpy as np

import torch.distributed as dist

import torch.nn.functional as F

import torch.optim as optim

import torch.optim.lr_scheduler as lr_scheduler

import torch.utils.data

import yaml

from torch.cuda import amp

from torch.nn.parallel import DistributedDataParallel as DDP

from torch.utils.tensorboard import SummaryWriter

from tqdm import tqdm

import test # import test.py to get mAP after each epoch

from models.yolo import Model

from utils.datasets_robust import create_dataloader

from utils.general import (

torch_distributed_zero_first, labels_to_class_weights, plot_labels, check_anchors, labels_to_image_weights,

compute_loss, plot_images, fitness, strip_optimizer, plot_results, get_latest_run, check_dataset, check_file,

check_git_status, check_img_size, increment_dir, print_mutation, plot_evolution, set_logging)

from utils.google_utils import attempt_download

from utils.torch_utils import init_seeds, ModelEMA, select_device, intersect_dicts

# from models.spinenet_yolo import YoloV5

# from models.yolov6 import YoloV6

from models.spinenet_yolo import YoloV6

logger = logging.getLogger(__name__)

def train(hyp, opt, device, tb_writer=None):

logger.info(f'Hyperparameters {hyp}')

#获取记录训练日志的路径

#训练日志包括:权重、tensorboard文件、超参数hyp、设置的训练参数opt(也就是epochs,batch_size等),result.txt

#result.txt包括:占GPU内存、训练集的box loss，obiectness loss, classification loss，总loss,

#targets的数量，输入图片分辨率，准确率TP/(TP+FP)，召回率TP/P ;

#验证集(测试集)的mAP50，mAP@0.5:0.95,box loss, objectness loss, classification loss.

#还会保存batch<3(前三个batch)的ground truth

#如果设置进化算法则不会传入tb_writer.log_dir（则为None），设置一个evolve文件夹作为日志目录

log_dir = Path(tb_writer.log_dir) if tb_writer else Path(opt.logdir) / 'evolve' # logging directory

#设置保存权重的路径

wdir = log_dir / 'weights' # weights directory

os.makedirs(wdir, exist_ok=True)

last = wdir / 'last.pt'

best = wdir / 'best.pt'

best_f32 = wdir / 'best_float32.pt'

#设置保存results的路径

results_file = str(log_dir / 'results.txt')

#获取轮次，批次，总批次（设计到分布式训练），权重，进程序号（主要用于分布式训练）

epochs, batch_size, total_batch_size, weights, rank = \

opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank

# Save run settings

#保存hyp和opt

with open(log_dir / 'hyp.yaml', 'w') as f:

yaml.dump(hyp, f, sort_keys=False)

#torch_distributed_zero_first同步所有进程

#check_datase检查数据集，如果没照到数据集则下载数据集（适用于项目中自带的yaml文件数据集）

with open(log_dir / 'opt.yaml', 'w') as f:

yaml.dump(vars(opt), f, sort_keys=False)

# Configure

cuda = device.type != 'cpu'

init_seeds(2 + rank) #设置随机种子

#加载数据配置信息

with open(opt.data) as f:

data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict

with torch_distributed_zero_first(rank):

check_dataset(data_dict) # check

#获取类别数量和类别名字

#如果设置了opt.single_cls别为一类

train_path = data_dict['train']

test_path = data_dict['val']

nc, names = (1, ['item']) if opt.single_cls else (int(data_dict['nc']), data_dict['names']) # number classes, names

assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check

# Model

# pretrained = weights.endswith('.pt')

pretrained=False

if pretrained:

#加载模型，从google云盘中自动下载模型

#但通常会下载失败，建议提前下载下来放进weights目录

with torch_distributed_zero_first(rank):

attempt_download(weights) # download if not found locally

ckpt = torch.load(weights, map_location=device) # load checkpoint

if hyp.get('anchors'):

#加载模型及参数

ckpt['model'].yaml['anchors'] = round(hyp['anchors']) # force autoanchor

# model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create

#这里模型创建，可通过opt.cfg，也可通过ckpt['model'].yaml

#这里的区别在于是否是resume，resume时会将opt.cfg设为空，

#则按照ckpt['model'].yaml创建模型;

#这也影响着下面是否除去anchor的key(也就是不加载anchor)，如果resume则不加载anchor

#主要是因为保存的模型会保存anchors，有时候用户自定义了anchor之后，再resume，则原来基于coco数据集的anchor就会覆盖自己设定的ancho

#参考https://github.com/ultralytics/yolov5/issues/459

#所以下面设置了intersect_dicts，该函数就是忽略掉exclude

model = YoloV6(opt.data or ckpt['model'].yaml, ch=3).to(device) # create

exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [] # exclude keys

state_dict = ckpt['model'].float().state_dict() # to FP32

state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect

model.load_state_dict(state_dict, strict=False) # load

#显示加载预训练权重的键值对创建模型的键值对

#如果设置了resume，则会少加载两个键值对（anchors，anchor_gird)

logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report

else:

#创建模型，ch为输入图片通道

model = YoloV6(opt.data or 'yolov5s.yaml', ch=3).to(device) # create

# Freeze

#冻结模型层，设置冻结层名字即可，

#具体可以查看https://github.com/ultralytics/yolov5/issues/679

#其实这里只是给一个freeze的示例:

#You can add any parameters you want to this list, with full or partial names,

#to freeze them before training starts.

#This code freezes all weights, leaving only biases with active qradients:

freeze = ['', ] # parameter names to freeze (full or partial)

if any(freeze):

for k, v in model.named_parameters():

if any(x in k for x in freeze):

print('freezing %s' % k)

v.requires_grad = False #不进行梯度计算

# Optimizer

#nbs为标称的batch_size;

#比如默认的话上面设置的opt.batch_size为16,nbs为64,

#则模型梯度累积了64/16=4(accumulate)次之后

#再更新一次模型，变相的扩大了batch_size

nbs = 64 # nominal batch size

accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing

#根据accumulate设置权重衰减系数

hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay

pg0, pg1, pg2 = [], [], [] # optimizer parameter groups

#将模型分成三组（weight，bias，其他所有参数）进行优化

for k, v in model.named_parameters():

v.requires_grad = True

if '.bias' in k:

pg2.append(v) # biases

elif '.weight' in k and '.bn' not in k:

pg1.append(v) # apply weight decay

else:

pg0.append(v) # all else

#选用优化器，并设置pg0组的优化方式

if opt.adam:

optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum

else:

optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)

#设置weight的优化方式

optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay

#设置biases的优化方式

optimizer.add_param_group({'params': pg2}) # add pg2 (biases)

#打印优化信息

logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))

del pg0, pg1, pg2

#设置学习率半衰减，这里为余弦退火方式进行衰减

#就是根据一下公式lf，epoch和超参数hyp['lrf']进行衰减

# Scheduler https://arxiv.org/pdf/1812.01187.pdf

# https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR

lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp['lrf']) + hyp['lrf'] # cosine

scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

# plot_lr_scheduler(optimizer, scheduler, epochs)

# Resume断点续训

#初始化开始训练的epoch和最好的结果

#best_fitness是以[0.0，0.0，0.1，0.9]为系数并乘以[精确度，召回率，mAP@0.5，mAP@0.5:0.95]再求和所得

#根据best_fitness来保存best.pt

start_epoch, best_fitness = 0, 0.0

if pretrained:

# Optimizer

if ckpt['optimizer'] is not None:

optimizer.load_state_dict(ckpt['optimizer'])

best_fitness = ckpt['best_fitness']

# Results

#加载训练结果result.txt

if ckpt.get('training_results') is not None:

with open(results_file, 'w') as file:

file.write(ckpt['training_results']) # write results.txt

# Epochs

start_epoch = ckpt['epoch'] + 1

#如果resume，则备份权重

#尽管目前resume能够近似100%成功起作用，参照:https://github.com/ultralytics/yolov5/pull/756

#但为了防止resume时出现其他问题导致把之前的权重覆盖了，在这里进行备份，参照:https://qithub.com/ultralytics/yolov5/pull/765

if opt.resume:

assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs)

shutil.copytree(wdir, wdir.parent / f'weights_backup_epoch{start_epoch - 1}') # save previous weights

#如果新设置epochs小手加载的epoch，

#则视新设置的epochs为需要再训练的轮次数而不再是总的轮次数

if epochs < start_epoch:

logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %

(weights, ckpt['epoch'], epochs))

epochs += ckpt['epoch'] # finetune additional epochs

del ckpt, state_dict

# Image sizes

#获取模型总部长和模型输入图片分辨率

gs = int(max(model.stride)) # grid size (max stride)

#检查输入图片分辨率确保能够整除步长为gs

imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples

print('imgsz: {}, imgsz_test: {}'.format(imgsz, imgsz_test))

# DP mode

#分布式训练，参照:https://github.com/ultralytics/yolov5/issues/475

#DataParallel模式，仅支持单机多卡

#rank为进程编号，如果设置为rank=-1并且有多块gpu，则使用DataParallel模式

#rank=-1且gpu数量=1时，不会进行分布式

if cuda and rank == -1 and torch.cuda.device_count() > 1:

model = torch.nn.DataParallel(model)

# SyncBatchNorm

if opt.sync_bn and cuda and rank != -1:

model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)

logger.info('Using SyncBatchNorm()')

# Exponential moving average

ema = ModelEMA(model) if rank in [-1, 0] else None

# DDP mode

if cuda and rank != -1:

model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank)

# Trainloader

dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,

hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,

world_size=opt.world_size, workers=opt.workers)

mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class

nb = len(dataloader) # number of batches

assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)

# Process 0

if rank in [-1, 0]:

ema.updates = start_epoch * nb // accumulate # set EMA updates

testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt,

hyp=hyp, augment=False, cache=opt.cache_images, rect=True, rank=-1,

world_size=opt.world_size, workers=opt.workers)[0] # testloader

if not opt.resume:

labels = np.concatenate(dataset.labels, 0)

c = torch.tensor(labels[:, 0]) # classes

# cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency

# model._initialize_biases(cf.to(device))

plot_labels(labels, save_dir=log_dir)

if tb_writer:

# tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384

tb_writer.add_histogram('classes', c, 0)

# Anchors

if not opt.noautoanchor:

check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)

# Model parameters

hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset

model.nc = nc # attach number of classes to model

model.hyp = hyp # attach hyperparameters to model

model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou)

model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights

model.names = names

# Start training

t0 = time.time()

nw = max(3 * nb, 1e3) # number of warmup iterations, max(3 epochs, 1k iterations)

# nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training

maps = np.zeros(nc) # mAP per class

results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'

scheduler.last_epoch = start_epoch - 1 # do not move

scaler = amp.GradScaler(enabled=cuda)

logger.info('Image sizes %g train, %g test\nUsing %g dataloader workers\nLogging results to %s\n'

'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs))

for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------

model.train()

# Update image weights (optional)

if opt.image_weights:

# Generate indices

if rank in [-1, 0]:

cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights

iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights

dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx

# Broadcast if DDP

if rank != -1:

indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int()

dist.broadcast(indices, 0)

if rank != 0:

dataset.indices = indices.cpu().numpy()

# Update mosaic border

# b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)

# dataset.mosaic_border = [b - imgsz, -b] # height, width borders

mloss = torch.zeros(4, device=device) # mean losses

if rank != -1:

dataloader.sampler.set_epoch(epoch)

pbar = enumerate(dataloader)

logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))

if rank in [-1, 0]:

pbar = tqdm(pbar, total=nb) # progress bar

optimizer.zero_grad()

for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------

ni = i + nb * epoch # number integrated batches (since train start)

imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0

# Warmup

if ni <= nw:

xi = [0, nw] # x interp

# model.gr = np.interp(ni, xi, [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou)

accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())

for j, x in enumerate(optimizer.param_groups):

# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0

x['lr'] = np.interp(ni, xi, [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])

if 'momentum' in x:

x['momentum'] = np.interp(ni, xi, [0.9, hyp['momentum']])

# Multi-scale

if opt.multi_scale:

sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size

sf = sz / max(imgs.shape[2:]) # scale factor

if sf != 1:

ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)

imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)

# Forward

with amp.autocast(enabled=cuda):

pred = model(imgs) # forward

loss, loss_items = compute_loss(pred, targets.to(device), model) # loss scaled by batch_size

if rank != -1:

loss *= opt.world_size # gradient averaged between devices in DDP mode

# Backward

scaler.scale(loss).backward()

# Optimize

if ni % accumulate == 0:

scaler.step(optimizer) # optimizer.step

scaler.update()

optimizer.zero_grad()

if ema:

ema.update(model)

# Print

if rank in [-1, 0]:

mloss = (mloss * i + loss_items) / (i + 1) # update mean losses

mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB)

s = ('%10s' * 2 + '%10.4g' * 6) % (

'%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])

pbar.set_description(s)

# Plot

if ni < 3:

f = str(log_dir / ('train_batch%g.jpg' % ni)) # filename

result = plot_images(images=imgs, targets=targets, paths=paths, fname=f)

if tb_writer and result is not None:

tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)

# tb_writer.add_graph(model, imgs) # add model to tensorboard

# end batch ------------------------------------------------------------------------------------------------

# Scheduler

lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard

scheduler.step()

# DDP process 0 or single-GPU

if rank in [-1, 0]:

# mAP

if ema:

ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride'])

final_epoch = epoch + 1 == epochs

if not opt.notest or final_epoch: # Calculate mAP

if final_epoch: # replot predictions

[os.remove(x) for x in glob.glob(str(log_dir / 'test_batch*_pred.jpg')) if os.path.exists(x)]

results, maps, times = test.test(opt.data,

batch_size=total_batch_size,

imgsz=imgsz_test,

model=ema.ema,

single_cls=opt.single_cls,

dataloader=testloader,

save_dir=log_dir)

# Write

with open(results_file, 'a') as f:

f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls)

if len(opt.name) and opt.bucket:

os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))

# Tensorboard

if tb_writer:

tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', # train loss

'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',

'val/giou_loss', 'val/obj_loss', 'val/cls_loss', # val loss

'x/lr0', 'x/lr1', 'x/lr2'] # params

for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):

tb_writer.add_scalar(tag, x, epoch)

# Update best mAP

fi = fitness(np.array(results).reshape(1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1]

if fi > best_fitness:

best_fitness = fi

# Save model

save = (not opt.nosave) or (final_epoch and not opt.evolve)

if save:

with open(results_file, 'r') as f: # create checkpoint

ckpt = {'epoch': epoch,

'best_fitness': best_fitness,

'training_results': f.read(),

'model': ema.ema,

'optimizer': None if final_epoch else optimizer.state_dict()}

# Save last, best and delete

torch.save(ckpt, last)

if best_fitness == fi:

torch.save(ckpt, best)

torch.save({'model': model.float()}, best_f32)

del ckpt

# end epoch ----------------------------------------------------------------------------------------------------

# end training

if rank in [-1, 0]:

# Strip optimizers

n = opt.name if opt.name.isnumeric() else ''

fresults, flast, fbest = log_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt'

for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]):

if os.path.exists(f1):

os.rename(f1, f2) # rename

if str(f2).endswith('.pt'): # is *.pt

strip_optimizer(f2) # strip optimizer

os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload

# Finish

if not opt.evolve:

plot_results(save_dir=log_dir) # save as results.png

logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))

dist.destroy_process_group() if rank not in [-1, 0] else None

torch.cuda.empty_cache()

return results

if __name__ == '__main__':

parser = argparse.ArgumentParser() #建立参数解析对象parser

#weights:加载的权重文件

parser.add_argument('--weights', type=str, help='initial weights path')

#cfg:模型配置文件，网络结构

parser.add_argument('--cfg', type=str, default='', help='model.yaml path')

#data: 数据集配置文件，数据集路径，类名等

parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')

#hyp:超参数文件

parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path')

#epochs:训练总轮次

parser.add_argument('--epochs', type=int, default=300)

#batch-size: 批次大小

parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')

#img-size:输入图片分辨率大小

parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes')

#rect:是否采用矩形训练，默认False

parser.add_argument('--rect', action='store_true', help='rectangular training')

#resume:断点续训(从上次打断训练的结果接着训练)

parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')

#nosave:不保存模型，默认False

parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')

#notest:不进行test，默认False

parser.add_argument('--notest', action='store_true', help='only test final epoch')

#noautoanchor:不自动调整anchor，默认False

parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')

#evolve: 是否进行超参数进化，默认False

parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')

#bucket:谷歌云盘bucket，一般不会用到

parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')

#cache-images: 是否提前缓存图片到内存，以加快训练速度，默认False

parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')

#weights:加载的权重文件

parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')

#name: 数据集名字，如果设置:results.txt to results_name.txt，默认无

parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')

#device: 训练的设备，cpu;0(表示一个gpu设备cuda:0);0,1,2,3(多个gpu设备)

parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')

#multi-scale: 是否进行多尺度训练，默认False

parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')

#single-cls:数据集是否只有一个类别，默认False

parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')

#adam: 是否使用adam优化器

parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')

#sync-bn:是否使用跨卡同步BN，在DDP模式使用

parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')

#local_rank:gpu编号

parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')

#logdir: 存放日志的目录

parser.add_argument('--logdir', type=str, default='runs/', help='logging directory')

#workers:dataloader的最大worker数量

parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')

opt = parser.parse_args()

# 设置DDP模式的参数

#word_size：表示全局进程个数

#glpbal_rank:进程文件

#opt参数解析:

opt.total_batch_size = opt.batch_size

opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1

opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1

set_logging(opt.global_rank)

if opt.global_rank in [-1, 0]:

check_git_status()#检查你的代码版本是否是最新的（不适用于windows系统）

# Resume

if opt.resume: # resume an interrupted run

#如果resume时str，则表示传入的时模型的路径地址

#get_latest_run()函数获取runs文件夹中最近的last.pt

ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path

log_dir = Path(ckpt).parent.parent # runs/exp0

assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'

#opt参数也全部替换

with open(log_dir / 'opt.yaml') as f:

opt = argparse.Namespace(**yaml.load(f, Loader=yaml.FullLoader)) # replace

#opt.cfg设置为''对应train函数里面的操作（加载权重的时候是否加载权重里的anchor）

opt.cfg, opt.weights, opt.resume = '', ckpt, True

logger.info('Resuming training from %s' % ckpt)

else:

# opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')

# 检查配置文件信息

opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp) # check files

opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test)

#根据opt.logdir生成目录

log_dir = increment_dir(Path(opt.logdir) / 'exp', opt.name) # runs/exp1

device = select_device(opt.device, batch_size=opt.batch_size)

# DDP mode

#选择设备

if opt.local_rank != -1:

assert torch.cuda.device_count() > opt.local_rank

torch.cuda.set_device(opt.local_rank)

device = torch.device('cuda', opt.local_rank)#根据gpu编号选择设备

#初始化进程组

dist.init_process_group(backend='nccl', init_method='env://') # distributed backend

assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'

#将总批次按照进程数分配给各个gpu

opt.batch_size = opt.total_batch_size // opt.world_size

#打印opt参数信息

logger.info(opt)

#Hyperparameters 加载参数列表

with open(opt.hyp) as f:

hyp = yaml.load(f, Loader=yaml.FullLoader) # load hyps

# Train

#如果不进行超参数进化，则直接调用train（）函数，开始训练

if not opt.evolve:

tb_writer = None

if opt.global_rank in [-1, 0]:

#创建tensorboard

logger.info('Start Tensorboard with "tensorboard --logdir %s", view at http://localhost:6006/' % opt.logdir)

tb_writer = SummaryWriter(log_dir=log_dir) # runs/exp0

train(hyp, opt, device, tb_writer)

# Evolve hyperparameters (optional)

else:

# Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)

#超参数进化列表，括号里分别为（突变规模，最小值，最大值）

meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)

'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)

'momentum': (0.1, 0.6, 0.98), # SGD momentum/Adam beta1

'weight_decay': (1, 0.0, 0.001), # optimizer weight decay

'giou': (1, 0.02, 0.2), # GIoU loss gain

'cls': (1, 0.2, 4.0), # cls loss gain

'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight

'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels)

'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight

'iou_t': (0, 0.1, 0.7), # IoU training threshold

'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold

'anchors': (1, 2.0, 10.0), # anchors per output grid (0 to ignore)

'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)

'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)

'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)

'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction)

'degrees': (1, 0.0, 45.0), # image rotation (+/- deg)

'translate': (1, 0.0, 0.9), # image translation (+/- fraction)

'scale': (1, 0.0, 0.9), # image scale (+/- gain)

'shear': (1, 0.0, 10.0), # image shear (+/- deg)

'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001

'flipud': (1, 0.0, 1.0), # image flip up-down (probability)

'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)

'mixup': (1, 0.0, 1.0)} # image mixup (probability)

assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'

opt.notest, opt.nosave = True, True # only test/save final epoch

# ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices

yaml_file = Path('runs/evolve/hyp_evolved.yaml') # save best result here

if opt.bucket:

os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists

#这里的进化算法是:根据之前训练时的hyp来确定一个base hyp再进行突变;

#具体方法:

#通过之前每次进化得到的results来确定之前每个hyp的权重

#有了每个hyp和每个hyp的权重之后有两种进化方式;

#1.single:根据每个hyd的权重随机选择一个之前的hvp作为base hvp.random.choices(range(n). weiahts=w)

#2.weighted: 根据每个hyp的权重对之前所有的hyp进行融合获得一个base hyp.(x *w.reshape(n，1)).sum(0)/ w.sum()

#evolve.txt会记录每次进化之后的results+hyp

#每次进化时，hyp会根据之前的results进行从大到小的排序;

#再根据fitness函数计算之前每次进化得到的hyp的权重

#再确定哪一种进化方式，从而进行进化

for _ in range(1): # generations to evolve

if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate

# Select parent(s)

#选择进化方式

parent = 'single' # parent selection method: 'single' or 'weighted'

#加载evolve.txt

x = np.loadtxt('evolve.txt', ndmin=2)

#选取之所前五次进化的结果

n = min(5, len(x)) # number of previous results to consider

x = x[np.argsort(-fitness(x))][:n] # top n mutations

#根据results计算hyp的权重

w = fitness(x) - fitness(x).min() # weights

#根据不同进化方式获得 base hyp

if parent == 'single' or len(x) == 1:

# x = x[random.randint(0, n - 1)] # random selection

x = x[random.choices(range(n), weights=w)[0]] # weighted selection

elif parent == 'weighted':

x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination

# Mutate

#超参数进化

mp, s = 0.9, 0.2 # mutation probability, sigma

npr = np.random

npr.seed(int(time.time()))

#获取突变初始值

g = np.array([x[0] for x in meta.values()]) # gains 0-1

ng = len(meta)

v = np.ones(ng)

#设置突变

while all(v == 1): # mutate until a change occurs (prevent duplicates)

v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)

#将突变添加到base hyp上

#[i+7]是因为x中前七个数字为results的指标（P,R,mAP,Fi,test_losser=(box.obj,cls)）,之后才时超参数hyp

for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300)

hyp[k] = float(x[i + 7] * v[i]) # mutate

# Constrain to limits

#修建hyp在规定范围里

for k, v in meta.items():

hyp[k] = max(hyp[k], v[1]) # lower limit

hyp[k] = min(hyp[k], v[2]) # upper limit

hyp[k] = round(hyp[k], 5) # significant digits

# Train mutation

#训练

results = train(hyp.copy(), opt, device)

# Write mutation results

#写入results和对应的hyp到evolve.txt

#evolve.txt文件每一行为一次进化的结果

#一行中前七个数字为(P，R，mAP，F1，test_losses=(GIoU，obj，cls))，之后为hyp

#保存hyp到yaml文件

print_mutation(hyp.copy(), results, yaml_file, opt.bucket)

# Plot results

plot_evolution(yaml_file)

print('Hyperparameter evolution complete. Best results saved as: %s\nCommand to train a new model with these '

'hyperparameters: $ python train.py --hyp %s' % (yaml_file, yaml_file))

点赞
收藏
关注作者

0/1000

抱歉，系统识别当前为高风险访问，暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称，即可参与社区互动！

*长度不超过10个汉字或20个英文字符，设置后3个月内不可修改。

确认取消

加入云驻计划，成为创作者

华为云周边好礼
免费体验产品
特殊身份标识
线下官方门票
内部专家零距离
与10000+优质创作者共同成长

立即加入

基于YOLOv5车牌检测源代码解释

detect.py

test.py

train.py

全部回复

设置昵称

关于作者

目录

加入云驻计划，成为创作者

推荐阅读

相关产品