基于YOLOv5车牌检测源代码解释

举报
凉城予梦 发表于 2022/10/13 13:05:57 2022/10/13
【摘要】 detect.pyimport argparse   #python的命令解析的模块,内置于python,不需要安装import torch.backends.cudnn as cudnn from models.experimental import *from utils.datasets import *from utils.utils import *from models.LPRN...

detect.py

import argparse   #python的命令解析的模块,内置于python,不需要安装

import torch.backends.cudnn as cudnn

 

from models.experimental import *

from utils.datasets import *

from utils.utils import *

from models.LPRNet import *

 

def detect(save_img=False):

    #获取outsource, weights, view_img, save_txt, imgsz = \

    out, source, weights, view_img, save_txt, imgsz = \  

        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size

    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

    device = torch_utils.select_device(opt.device)  #获取设备

   

    if os.path.exists(out):

        shutil.rmtree(out)  # 删除文件夹

    os.makedirs(out)  # 创建新输出文件夹

    half = device.type != 'cpu'  #半精度(16位浮点来支持)

 

    #加载Float32模型,确保用户设定的输入图片分辨率能整除32(如果蹦则调整为能整除并返回)

    model = attempt_load(weights, map_location=device)  

    imgsz = check_img_size(imgsz, s=model.stride.max())  # 进行检查,这里设置的是640可以被整除。

    if half:

        model.half()  # 变成半精度浮点数16,提高速度

 

    #设置第二级分类,默认不使用

    classify = True

    if classify:

        # modelc = torch_utils.load_classifier(name='resnet101', n=2)  # initialize

        # modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights

        modelc = LPRNet(lpr_max_len=8, phase=False, class_num=len(CHARS), dropout_rate=0).to(device)

        modelc.load_state_dict(torch.load('./weights/Final_LPRNet_model.pth', map_location=torch.device('cpu')))

        print("load pretrained model successful!")

        modelc.to(device).eval()

 

    #通过不同的输入源来设置不同的数据加载方式

    vid_path, vid_writer = None, None

    if webcam:

        view_img = True

        cudnn.benchmark = True  # 设置为True以加快恒定图像大小推断

        dataset = LoadStreams(source, img_size=imgsz)

    else:

        save_img = True

        dataset = LoadImages(source, img_size=imgsz)#加载图片或视频

 

    # 获取类别名字字符串列表

    names = model.module.names if hasattr(model, 'module') else model.names

    #设置画框的颜色(RGB(列表)的列表)

    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

 

    # 运行判断

    t0 = time.time()

    #进行一次前向推理,测试程序是否正常

    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # 进行初始化

    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # 返回

    #path 图像/视频路径

    #img进行resize+pad之后的图片,如(3640512)格式(chw

    #img0s size图片,如(10808103

    #vid_cap 当读取图片时为None,读取视频为视频源

    for path, img, im0s, vid_cap in dataset:

 

        img = torch.from_numpy(img).to(device)

        img = img.half() if half else img.float()  #图片也设置为16为浮点

        img /= 255.0  #除以255  0 - 255 变成to 0.0 - 1.0

        #没有 bath_size时,在最前面添加一个轴

        if img.ndimension() == 3:

            img = img.unsqueeze(0) #改变了一下添加维度(例如(3384640)转变为(13384640)这里面添加的维度是1

 

        # Inference,制造一个事件

        t1 = torch_utils.time_synchronized()

        #前向传播 返回predshape(1num_boxes5+num_class)

        #h,w为传入网络图片的高和宽。注意dataset在检测时使用了矩形推理,所以这里h不一定等于w

        #num_boxes =(h/32 *w/32+h/16*w/16+h/8*w/8)*3

        #例如:图片大小720.1280-->15120boxes =(20*12 +40*24 +80*48=5040)*3

        #pred[...0:4]为预测框坐标;预测框坐标为xywh(中心点+宽高)格式

        #pred[....4]objectness置信度

        #pred[...5:-1]为分类概率结果

   

        pred = model(img, augment=opt.augment)[0]

        print(pred.shape)

        #进行 NMS

        #pred:前向传播的输出

        #confthres:置信度阈值

        #iou_thres:iou阈值

        #classes:是否只保留特定的类别

        #agnostic:进行nms是否也去除不同类别之间的框

        #经过nms之后,预测框格式:xywh-->xyxy(左上角右下角)

        #pred是一个列表list[torch.tensor],长度为NMS后的目标框的个数

        #每一个torch.tensorshape(num_boxes6),内容为box(4个值)+conf+cls

        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)

        t2 = torch_utils.time_synchronized()

 

        #添加二次分类,默认不使用

        if classify:

            pred,plat_num = apply_classifier(pred, modelc, img, im0s)

 

        for i, det in enumerate(pred):  #对每一张图片做处理

            if webcam:   #如果输入源是webcam,则batch_size不为1,取出dataset中的一张图片

                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()

            else:

                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)  #设置保存图片或视频的路径,p是原图片路径(含文件名)

            txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')#设置保存框坐标txt文件的路径

            s += '%gx%g ' % img.shape[2:]  # 设置打印信息(图片宽高),s‘384*640’

            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]

            if det is not None and len(det):#判断det是不是none 判断len是不是零

                #调整预测框的坐标,基于resize+pad的图片的坐标-->基于原size图片的坐标

                #此时坐标格式为xyxy

                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

 

                #打印检测的类别数量

                for c in det[:, 5].unique():

                    n = (det[:, 5] == c).sum()  # detections per class

                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

 

           

                #保存预测结果

                for de,lic_plat in zip(det,plat_num):

                    # xyxy,conf,cls,lic_plat=de[:4],de[4],de[5],de[6:]

                    *xyxy, conf, cls=de

 

                    if save_txt:  # Write to file

                    #xyxy(左上角+右下角)格式转为xywh(中心点+宽高)格式,并除上wh做归一化,转化为列表再保存

                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  

                        with open(txt_path + '.txt', 'a') as f:

                            f.write(('%g ' * 5 + '\n') % (cls, xywh))  

                    #在原图上画框

                    if save_img or view_img:  # Add bbox to image

                        # label = '%s %.2f' % (names[int(cls)], conf)

                        lb = ""

                        for a,i in enumerate(lic_plat):

                            # if a ==0:

                            #     continue

                            lb += CHARS[int(i)]

                        label = '%s %.2f' % (lb, conf)

                        im0=plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

 

            #打印前向传播+nms时间

            print('%sDone. (%.3fs)' % (s, t2 - t1))

 

            # 如果设置展示,则画出 图片/视频

            if view_img:

                cv2.namedWindow("Demo3", cv2.WINDOW_NORMAL)

                cv2.resizeWindow("Demo3", 2500, 1000)

                cv2.imshow("Demo3", im0)

            #if cv2.waitKey(1) == ord('q'):  # q to quit #视频

             #   raise StopIteration  #视频

 

            cv2.waitKey()# 图片

 

            # 设置保存图片/视频

 

            if save_img:

                if dataset.mode == 'images':

                    cv2.imwrite(save_path, im0)

                else:

                    if vid_path != save_path:  # new video

                        vid_path = save_path

                        if isinstance(vid_writer, cv2.VideoWriter):

                            vid_writer.release()  # release previous video writer

 

                        fourcc = 'mp4v'  # output video codec

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)

                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))

                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))

                    vid_writer.write(im0)

 

    if save_txt or save_img:

        print('Results saved to %s' % os.getcwd() + os.sep + out)

        if platform == 'darwin':  # MacOS

            os.system('open ' + save_path)

    #打印总时间

    print('Done. (%.3fs)' % (time.time() - t0))

 

if __name__ == '__main__':

    #建立参数解析对象parser

    parser = argparse.ArgumentParser()

     # parser.add_argument这个函数是给parser实例,例如'--weights'添加weigths属性

     #添加属性:xx实例增加一个aa属性,如 xx.add_argument("aa")

     # nargs - 应该读取的命令行参数个数。*号,表示0或多个参数;+号表示1或多个参数。

Weight:训练的权重

    parser.add_argument('--weights', nargs='+', type=str, default='./weights/last.pt', help='model.pt path(s)')

     #source: 测试数据,可以是图片/视频路径,也可以是‘0’(电脑自带摄像头),也可以是rtsp等视频

    parser.add_argument('--source', type=str, default=r'F:\8.jpg', help='source')

    # default=r'F:\8.jpg'图片

    #default=r'F:\YOLOV\plate-main\inference\images\478289752-1-208.mp4'视频

    # default='0'电脑摄像头

    # default='1'usb接口摄像头

    parser.add_argument('--output', type=str, default='inference/output/', help='output folder')  # output folder

    #img-size:网络输入图片大小

    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')

    #conf-thres:置信度阈值  

    parser.add_argument('--conf-thres', type=float, default=0.8, help='object confidence threshold')

    #iou-thres:nmsiou阈值

    parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')

    #device:设置设备

    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')

    #view-img:是否展示预测之后的图片/视频,默认False

    parser.add_argument('--view-img', action='store_true', help='display results',default=True)

    #save-txt:是否将预测的框坐标以txt文件形式保存,默认False

    #save-conf:是否将预测的框坐标以txt文件形式保存,默认False

    #save-dir: 网络预测之后的图片/视频的保存路径

    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')

    #classes:设置只保留某一部分类别,形如0或者0 2 3

    parser.add_argument('--classes', nargs='+', type=int, help='filter by class')

    #agnostic-nms:进行nms是否也去除不同类别之间的框,默认False

    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')

    #augment:推理的时候进行多尺度,翻转等操作(TTA)推理

    parser.add_argument('--augment', action='store_true', help='augmented inference')

    #update: 如果为True,则对所有模型进行strip_optimizer操作,去除pt文件中的优化器等信息,默认为False

    parser.add_argument('--update', action='store_true', help='update all models')

    #采用parser对象的parse_args函数获取解析的参数

    opt = parser.parse_args()

    print(opt)

    #一个上下文管理器,被该语句wrap起来的部分将不会track梯度

    with torch.no_grad():

        if opt.update:  # 更新所有模型(以修复SourceChangeWarning

            #去除pt文件中的优化器等信息

            for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:

                detect()

                create_pretrained(opt.weights, opt.weights)

        else:

            detect()

test.py

import argparse

import json

 

from models.experimental import *

from utils.datasets import *

 

def test(data,

         weights=None,

         batch_size=16,

         imgsz=640,

         conf_thres=0.001,

         iou_thres=0.6,  # for NMS

         save_json=False,

         single_cls=False,

         augment=False,

         verbose=False,

         model=None,

         dataloader=None,

         save_dir='',

         merge=False):

    # 判断是否在训练时调用test,如果时则获取训练时的设备

    training = model is not None

    if training:  # called by train.py

        device = next(model.parameters()).device  # 选择设备

 

    else:  # 直接调用

        device = torch_utils.select_device(opt.device, batch_size=batch_size)

        merge = opt.merge  # 使用并合并NMS

 

        # 删除

        for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')):

            os.remove(f)

 

        # 加载模型

        model = attempt_load(weights, map_location=device)  

        #检查输入图片分辨率是否能被32整除(这里是640

        imgsz = check_img_size(imgsz, s=model.stride.max())  

 

        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99

        # if device.type != 'cpu' and torch.cuda.device_count() > 1:

        #     model = nn.DataParallel(model)

 

    # Half

    #如果设备不是cpu并且gou数且为1,则将模型float32转为float16,提高前向传播速度

    half = device.type != 'cpu' and torch.cuda.device_count() == 1  # half precision only supported on single-GPU

    if half:

        model.half()  #GPU上使用FP16推理

        print("GPU")

    # Configure

    #eval()时。框架会自动吧DropOut固定住,用训练好的值;不启用 BatchNormalization Dropout

    model.eval()

    #加载数据配置信息

    with open(data) as f:

        data = yaml.load(f, Loader=yaml.FullLoader)  #  字典模型

    nc = 1 if single_cls else int(data['nc'])  # number of classes

    #设置iou赋值,从0.5-0.95,每间隔0.05取一次

    #iouv iouu值得列表[0.50.550.6.....0.95]

    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95

    iouv = iouv[0].view(1)  # comment for mAP@0.5:0.95

    niou = iouv.numel()#iou个数=10

 

    # Dataloader

    if not training:

        #创建一个全0数组测试一i啊前向传播是否正常运行

        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img

        _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once

       #获取图片路径

        path = data['test'] if opt.task == 'test' else data['val']  # path to val/test images

        #创建dataloader

        #注意这里rect参数为Trueyolov5的测试评估时基于举行推理的

        dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt,

                                       hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0]

 

    seen = 0 #初始化测试的图片数量

    #获取类别的名字

    names = model.names if hasattr(model, 'names') else model.module.names

   

    #获取coco数据集的类别索引

    #coco数据集由80个类别(索引范围应该为0-79),但是其索引却属于1-90

    #coco80_to_coco91_class()就是为了与上述索引对应起来,返回一个范围在9-90的索引数组

    coco91class = coco80_to_coco91_class()

    #设置tqdm进度条的显示信息

    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')

    #初始化指标,时间

    p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.

    #初始化测试集的损失

    loss = torch.zeros(3, device=device)

    #初始化json文件的字典,统计信息,ap

    jdict, stats, ap, ap_class = [], [], [], []

    for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):

        img = img.to(device)

        #图片也由FLOAT32->Float16

        img = img.half() if half else img.float()  # uint8 to fp16/32

        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        targets = targets.to(device)

        nb, _, height, width = img.shape  # batch size, channels, height, width

        whwh = torch.Tensor([width, height, width, height]).to(device)

 

        # Disable gradients

        with torch.no_grad():

            # Run model

            #time_synchronized()函数里面进行了torch.cuda.synchronize()在返回的time.time()

            #torch.cuda.synchronize()等待gpu上完成所有的工作,这样测试时间会更精准

            t = torch_utils.time_synchronized()

           #前向传播,inf_out为预测结果,train_out训练结果

            inf_out, train_out = model(img, augment=augment)  # inference and training outputs

            #t0积累前向传播的时间

            t0 += torch_utils.time_synchronized() - t

 

            # Compute loss

            #如果在训练时进行的test,则通过训练结果计算并返回测试集的boxobjcls损失

            if training:  # if model has loss hyperparameters

                loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3]  # GIoU, obj, cls

 

            # Run NMS

            #t1累计后处理nms的时间

            t = torch_utils.time_synchronized()

            #non_max_suppression进行非极大值控制:

            #conf_thres为置信度阈值。iou_thresiou阈值,marge是否开框

            output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge)

            t1 += torch_utils.time_synchronized() - t

 

        # Statistics per image

        #为每一张图片做统计,写入与猜测信息到txt文件,生成json文件字典,统计tp

        for si, pred in enumerate(output):

            #获取低si张图片的标签在信息,包括classxywh

            labels = targets[targets[:, 0] == si, 1:]

            nl = len(labels)

            tcls = labels[:, 0].tolist() if nl else []  # 获取标签类别

            seen += 1   #统计测试图片数量

            #如果预测为空,则添加空的信息到stats

            if pred is None:

                if nl:

                    stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))

                continue

 

            # 保存测试结果txt文件

            # with open('test.txt', 'a') as file:

            #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]

 

            # 修正预测坐标到图片内部

            clip_coords(pred, (height, width))

 

            # 保存coco格式的josn文件字典

            if save_json:

                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...

                #coco格式json文件大概包含信息如上

                #获取图片id

                image_id = int(Path(paths[si]).stem.split('_')[-1])

                #获取框坐标信息

                box = pred[:, :4].clone()  # xyxy

               #将框调整为基于原图大小的

                scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1])  # to original shape

                #转换为xywh格式

                box = xyxy2xywh(box)  # xywh

               

                #注意,之前所说的xyxy格式左上角右下角的坐标,xywh时中心坐标和宽高

                #cocojson格式中的框u欧标格式为xywh,此处的xy为左上角坐标

                #也就是cocojson的格式的坐标的格式为,左上角坐宽高

                #所以下面一行代码就是将:中心点坐标->左上角

                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner

               #image_id:图片id,即属于那张图

               #categorv_id:类别,coco91class()从索引0——79映射到索引0-90

               #bbox:框的坐标

               #score:置信度得分

                for p, b in zip(pred.tolist(), box.tolist()):

                    jdict.append({'image_id': image_id,

                                  'category_id': coco91class[int(p[5])],

                                  'bbox': [round(x, 3) for x in b],

                                  'score': round(p[4], 5)})

 

            # Assign all predictions as incorrect

            #初始化测试评定,niouiou阈值的个数

            correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)

            if nl:

                detected = []  # detected用来存放已检测的目标

                tcls_tensor = labels[:, 0]

 

                # target boxes

                #获得xyxy格式的框并乘以wh

                tbox = xywh2xyxy(labels[:, 1:5]) * whwh

 

                # Per target class

                #对图片中的每个类单独处理

                for cls in torch.unique(tcls_tensor):

                    ti = (cls == tcls_tensor).nonzero().view(-1)  # 标签框改类别的索引

                    pi = (cls == pred[:, 5]).nonzero().view(-1)  # 预测框该类别的索引

 

                    # Search for detections

                    if pi.shape[0]:

                        # Prediction to target ious

                        #box_iou计算预测框于标签的iou值,max1)选出最大的ious值,i为对应suoyin

                       

                        #pred shape[N,4]

                        #tbox shape[M,4]

                        #box_iou shape[N,M]

                        #ious shape[N,1]

                        #i shape[N,1],i里的值属于0-

                        ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1)  # best ious, indices

 

                        # Append detections

                        for j in (ious > iouv[0]).nonzero():

                            d = ti[i[j]]  # 获得检测的目标

                            if d not in detected:

                                detected.append(d)#添加ddetected

                                #iouv0.05为步长,0.5-0.95列表

                                #获得不同的iou阈值下的true positive

                                correct[pi[j]] = ious[j]>iouv # iou_thres is 1xn

                                if len(detected) == nl:  # all targets already located in image

                                    break

 

            # Append statistics (correct, conf, pcls, tcls)

            #每张图片的结果统计到stats

            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

 

        # Plot images

        #画出1batch的图片的groud truth和预测框并保存

        if batch_i < 1:

            f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i)  # filename

            plot_images(img, targets, paths, str(f), names)  # ground truth

            f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i)

            plot_images(img, output_to_target(output, width, height), paths, str(f), names)  # predictions

 

    # Compute statistics

    #stats列表的信息拼接到一起

    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy

    if len(stats):

        #根据上面得到的TP等信息计算指标

        #精准度=Tp/Tp+FP,找回=TP/Pmapfi分数,类别ao

        p, r, ap, f1, ap_class = ap_per_class(*stats)

        p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1)  # [P, R, AP@0.5, AP@0.5:0.95]

        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()

        #nt是一个列表,测试集每一个类别由多少目标框

        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class

    else:

        nt = torch.zeros(1)

 

    # Print results

    #打印指标结果

    pf = '%20s' + '%12.3g' * 6  # print format

    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

 

    # Print results per class

    #细节展示每一个类别的指标

    if verbose and nc > 1 and len(stats):

        for i, c in enumerate(ap_class):

            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

 

    # Print speeds

    #打印前向传播耗费时间,nms时间,总时间

    t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple

    if not training:

        print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)

 

    # Save JSON

    #采用之前保存的json格式预测结果,通过coccapi估平指标

    #需要注意的时 测试集的标签也需要转成cocojson的格式

    if save_json and map50 and len(jdict):

        imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataloader.dataset.img_files]

        f = 'detections_val2017_%s_results.json' % \

            (weights.split(os.sep)[-1].replace('.pt', '') if isinstance(weights, str) else '')  # filename

        print('\nCOCO mAP with pycocotools... saving %s...' % f)

        with open(f, 'w') as file:

            json.dump(jdict, file)

 

        try:

            from pycocotools.coco import COCO

            from pycocotools.cocoeval import COCOeval

 

            # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb

            cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0])  # initialize COCO ground truth api

            cocoDt = cocoGt.loadRes(f)  # initialize COCO pred api

 

            cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')

            cocoEval.params.imgIds = imgIds  # image IDs to evaluate

            cocoEval.evaluate()

            cocoEval.accumulate()

            cocoEval.summarize()

            map, map50 = cocoEval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)

        except:

            print('WARNING: pycocotools must be installed with numpy==1.17 to run correctly. '

                  'See https://github.com/cocodataset/cocoapi/issues/356')

 

    # Return results

    #返回测试指标结果

    model.float()  # for training

    maps = np.zeros(nc) + map

    for i, c in enumerate(ap_class):

        maps[c] = ap[i]

    return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t

 

if __name__ == '__main__':

    parser = argparse.ArgumentParser(prog='test.py')

# 添加属性:xx实例增加一个aa属性,如 xx.add_argument("aa")

    #weights:测试的模型权重文件

    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')

    #data:数据集配置文件,数据集路径

    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')

    #batch-size:前向传播时的批次,默认32

    parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')

    #img-size:输入图片分辨率大小,默认64

    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')

    #conf-thres:筛选框的时候的置信度阈值,默认0.001

    parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')

    #iou-thres:进行NMS的时候的IOU阈值,默认0.65

    parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS')

    #save-json:是否按照cocojson格式保存预测框,并且使用cocoapi做评估(需要同样cocojson格式的标签),默认False

    parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')

    #task:设置测试形式,默认val,具体可看下面代码解析注释

    parser.add_argument('--task', default='val', help="'val', 'test', 'study'")

    #device:测试的设备,cpu;0(表示一个gpu设备cuda:0);0,1,2,3(多个gpu设备)

    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')

    #single-cls:数据集是否只有一个类别,默认False

    parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')

    #augment:测试时是否使用TTA(Test Time Augmentation),默认False

    parser.add_argument('--augment', action='store_true', help='augmented inference')

    parser.add_argument('--merge', action='store_true', help='use Merge NMS')

    #verbose:是否打印出每个类别的mAP,默认False

    parser.add_argument('--verbose', action='store_true', help='report mAP by class')

   

    #采用parser.parse_args函数获取解析的的参数

    opt = parser.parse_args()

    #设置参数save_json

    opt.save_json = opt.save_json or opt.data.endswith('coco.yaml')

    #check_file检查文件是否存在

    opt.data = check_file(opt.data)  # check file

    print(opt)

 

    # task in ['val', 'test']时就正常测试验证集,测试集

    if opt.task in ['val', 'test']:  # (默认)正常运行

        test(opt.data,

             opt.weights,

             opt.batch_size,

             opt.img_size,

             opt.conf_thres,

             opt.iou_thres,

             opt.save_json,

             opt.single_cls,

             opt.augment,

             opt.verbose)

 

#task =='study'时,就评估yolov5yolov3-spp 各个模型在各个尺寸下的指标并可视化

    elif opt.task == 'study':  # run over a range of settings and save/plot

        for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:

            f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem)  # filename to save to

            x = list(range(352, 832, 64))  # x axis

            y = []  # y axis

            for i in x:  # img-size

                print('\nRunning %s point %s...' % (f, i))

                r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json)

                y.append(r + t)  # results and times

            np.savetxt(f, y, fmt='%10.4g')  # save

        os.system('zip -r study.zip study_*.txt')

        # plot_study_txt(f, x)  # plot

train.py

import argparse

import glob

import logging

import math

import os

import random

import shutil

import time

from pathlib import Path

 

import numpy as np

import torch.distributed as dist

import torch.nn.functional as F

import torch.optim as optim

import torch.optim.lr_scheduler as lr_scheduler

import torch.utils.data

import yaml

from torch.cuda import amp

from torch.nn.parallel import DistributedDataParallel as DDP

from torch.utils.tensorboard import SummaryWriter

from tqdm import tqdm

 

import test  # import test.py to get mAP after each epoch

from models.yolo import Model

from utils.datasets_robust import create_dataloader

from utils.general import (

    torch_distributed_zero_first, labels_to_class_weights, plot_labels, check_anchors, labels_to_image_weights,

    compute_loss, plot_images, fitness, strip_optimizer, plot_results, get_latest_run, check_dataset, check_file,

    check_git_status, check_img_size, increment_dir, print_mutation, plot_evolution, set_logging)

from utils.google_utils import attempt_download

from utils.torch_utils import init_seeds, ModelEMA, select_device, intersect_dicts

 

# from models.spinenet_yolo import YoloV5

# from models.yolov6 import YoloV6

from models.spinenet_yolo import YoloV6

 

logger = logging.getLogger(__name__)

 

def train(hyp, opt, device, tb_writer=None):

    logger.info(f'Hyperparameters {hyp}')

    #获取记录训练日志的路径

   

    #训练日志包括:权重、tensorboard文件、超参数hyp、设置的训练参数opt(也就是epochs,batch_size),result.txt

    #result.txt包括:GPU内存、训练集的box lossobiectness loss, classification loss,总loss,

    #targets的数量,输入图片分辨率,准确率TP/(TP+FP),召回率TP/P ;

    #验证集(测试集)mAP50mAP@0.5:0.95,box loss, objectness loss, classification loss.

    #还会保存batch<3(前三个batch)ground truth

   

    #如果设置进化算法则不会传入tb_writer.log_dir(则为None),设置一个evolve文件夹作为日志目录

    log_dir = Path(tb_writer.log_dir) if tb_writer else Path(opt.logdir) / 'evolve'  # logging directory

    #设置保存权重的路径

    wdir = log_dir / 'weights'  # weights directory

    os.makedirs(wdir, exist_ok=True)

    last = wdir / 'last.pt'

    best = wdir / 'best.pt'

    best_f32 = wdir / 'best_float32.pt'

    #设置保存results的路径

    results_file = str(log_dir / 'results.txt')

    #获取轮次,批次,总批次(设计到分布式训练),权重,进程序号(主要用于分布式训练)

    epochs, batch_size, total_batch_size, weights, rank = \

        opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank

 

    # Save run settings

    #保存hypopt

    with open(log_dir / 'hyp.yaml', 'w') as f:

        yaml.dump(hyp, f, sort_keys=False)

    #torch_distributed_zero_first同步所有进程

    #check_datase检查数据集,如果没照到数据集则下载数据集(适用于项目中自带的yaml文件数据集)

    with open(log_dir / 'opt.yaml', 'w') as f:

        yaml.dump(vars(opt), f, sort_keys=False)

 

    # Configure

    cuda = device.type != 'cpu'

    init_seeds(2 + rank) #设置随机种子

    #加载数据配置信息

    with open(opt.data) as f:

        data_dict = yaml.load(f, Loader=yaml.FullLoader)  # data dict

    with torch_distributed_zero_first(rank):

        check_dataset(data_dict)  # check

    #获取类别数量和类别名字

    #如果设置了opt.single_cls别为一类

    train_path = data_dict['train']

    test_path = data_dict['val']

    nc, names = (1, ['item']) if opt.single_cls else (int(data_dict['nc']), data_dict['names'])  # number classes, names

    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data)  # check

 

    # Model

    # pretrained = weights.endswith('.pt')

    pretrained=False

    if pretrained:

        #加载模型,从google云盘中自动下载模型

        #但通常会下载失败,建议提前下载下来放进weights目录

        with torch_distributed_zero_first(rank):

            attempt_download(weights)  # download if not found locally

        ckpt = torch.load(weights, map_location=device)  # load checkpoint

        if hyp.get('anchors'):

            #加载模型及参数

            ckpt['model'].yaml['anchors'] = round(hyp['anchors'])  # force autoanchor

        # model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device)  # create

       

        #这里模型创建,可通过opt.cfg,也可通过ckpt['model'].yaml

        #这里的区别在于是否是resumeresume时会将opt.cfg设为空,

        #则按照ckpt['model'].yaml创建模型;

        #这也影响着下面是否除去anchorkey(也就是不加载anchor),如果resume则不加载anchor

        #主要是因为保存的模型会保存anchors,有时候用户自定义了anchor之后,再resume,则原来基于coco数据集的anchor就会覆盖自己设定的ancho

        #参考https://github.com/ultralytics/yolov5/issues/459

        #所以下面设置了intersect_dicts,该函数就是忽略掉exclude

       

        model = YoloV6(opt.data or ckpt['model'].yaml, ch=3).to(device)  # create

        exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else []  # exclude keys

        state_dict = ckpt['model'].float().state_dict()  # to FP32

        state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect

        model.load_state_dict(state_dict, strict=False)  # load

        #显示加载预训练权重的键值对创建模型的键值对

        #如果设置了resume,则会少加载两个键值对(anchorsanchor_gird)

        logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights))  # report

    else:

        #创建模型,ch为输入图片通道

        model = YoloV6(opt.data or 'yolov5s.yaml', ch=3).to(device)  # create

 

    # Freeze

   

    #冻结模型层,设置冻结层名字即可,

    #具体可以查看https://github.com/ultralytics/yolov5/issues/679

    #其实这里只是给一个freeze的示例:

    #You can add any parameters you want to this list, with full or partial names,

    #to freeze them before training starts.

    #This code freezes all weights, leaving only biases with active qradients:

   

    freeze = ['', ]  # parameter names to freeze (full or partial)

    if any(freeze):

        for k, v in model.named_parameters():

            if any(x in k for x in freeze):

                print('freezing %s' % k)

                v.requires_grad = False #不进行梯度计算

 

    # Optimizer

   

    #nbs为标称的batch_size;

    #比如默认的话上面设置的opt.batch_size16,nbs64,

    #则模型梯度累积了64/16=4(accumulate)次之后

    #再更新一次模型,变相的扩大了batch_size

 

    nbs = 64  # nominal batch size

    accumulate = max(round(nbs / total_batch_size), 1)  # accumulate loss before optimizing

    #根据accumulate设置权重衰减系数

    hyp['weight_decay'] *= total_batch_size * accumulate / nbs  # scale weight_decay

 

    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups

    #将模型分成三组(weightbias,其他所有参数)进行优化

    for k, v in model.named_parameters():

        v.requires_grad = True

        if '.bias' in k:

            pg2.append(v)  # biases

        elif '.weight' in k and '.bn' not in k:

            pg1.append(v)  # apply weight decay

        else:

            pg0.append(v)  # all else

   

    #选用优化器,并设置pg0组的优化方式

    if opt.adam:

        optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum

    else:

        optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)

    #设置weight的优化方式

    optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay

    #设置biases的优化方式

    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)

    #打印优化信息

    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))

    del pg0, pg1, pg2

   

 

    #设置学习率半衰减,这里为余弦退火方式进行衰减

    #就是根据一下公式lfepoch和超参数hyp['lrf']进行衰减

    # Scheduler https://arxiv.org/pdf/1812.01187.pdf

    # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR

    lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp['lrf']) + hyp['lrf']  # cosine

    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    # plot_lr_scheduler(optimizer, scheduler, epochs)

 

    # Resume断点续训

    #初始化开始训练的epoch和最好的结果

    #best_fitness是以[0.00.00.10.9]为系数并乘以[精确度,召回率,mAP@0.5mAP@0.5:0.95]再求和所得

    #根据best_fitness来保存best.pt

    start_epoch, best_fitness = 0, 0.0

    if pretrained:

        # Optimizer

        if ckpt['optimizer'] is not None:

            optimizer.load_state_dict(ckpt['optimizer'])

            best_fitness = ckpt['best_fitness']

 

        # Results

        #加载训练结果result.txt

        if ckpt.get('training_results') is not None:

            with open(results_file, 'w') as file:

                file.write(ckpt['training_results'])  # write results.txt

 

        # Epochs

        start_epoch = ckpt['epoch'] + 1

        #如果resume,则备份权重

        #尽管目前resume能够近似100%成功起作用,参照:https://github.com/ultralytics/yolov5/pull/756

        #但为了防止resume时出现其他问题导致把之前的权重覆盖了,在这里进行备份,参照:https://qithub.com/ultralytics/yolov5/pull/765

        if opt.resume:

            assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs)

            shutil.copytree(wdir, wdir.parent / f'weights_backup_epoch{start_epoch - 1}')  # save previous weights

        #如果新设置epochs小手加载的epoch

        #则视新设置的epochs为需要再训练的轮次数而不再是总的轮次数

        if epochs < start_epoch:

            logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %

                        (weights, ckpt['epoch'], epochs))

            epochs += ckpt['epoch']  # finetune additional epochs

 

        del ckpt, state_dict

 

    # Image sizes

    #获取模型总部长和模型输入图片分辨率

    gs = int(max(model.stride))  # grid size (max stride)

    #检查输入图片分辨率确保能够整除步长为gs

    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]  # verify imgsz are gs-multiples

    print('imgsz: {}, imgsz_test: {}'.format(imgsz, imgsz_test))

 

    # DP mode

    #分布式训练,参照:https://github.com/ultralytics/yolov5/issues/475

    #DataParallel模式,仅支持单机多卡

    #rank为进程编号,如果设置为rank=-1并且有多块gpu,则使用DataParallel模式

    #rank=-1gpu数量=1时,不会进行分布式

    if cuda and rank == -1 and torch.cuda.device_count() > 1:

        model = torch.nn.DataParallel(model)

 

    # SyncBatchNorm

    if opt.sync_bn and cuda and rank != -1:

        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)

        logger.info('Using SyncBatchNorm()')

 

    # Exponential moving average

    ema = ModelEMA(model) if rank in [-1, 0] else None

 

    # DDP mode

    if cuda and rank != -1:

        model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank)

 

    # Trainloader

    dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,

                                            hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,

                                            world_size=opt.world_size, workers=opt.workers)

    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class

    nb = len(dataloader)  # number of batches

    assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)

 

    # Process 0

    if rank in [-1, 0]:

        ema.updates = start_epoch * nb // accumulate  # set EMA updates

        testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt,

                                       hyp=hyp, augment=False, cache=opt.cache_images, rect=True, rank=-1,

                                       world_size=opt.world_size, workers=opt.workers)[0]  # testloader

 

        if not opt.resume:

            labels = np.concatenate(dataset.labels, 0)

            c = torch.tensor(labels[:, 0])  # classes

            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency

            # model._initialize_biases(cf.to(device))

            plot_labels(labels, save_dir=log_dir)

            if tb_writer:

                # tb_writer.add_hparams(hyp, {})  # causes duplicate https://github.com/ultralytics/yolov5/pull/384

                tb_writer.add_histogram('classes', c, 0)

 

            # Anchors

            if not opt.noautoanchor:

                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)

 

    # Model parameters

    hyp['cls'] *= nc / 80.  # scale coco-tuned hyp['cls'] to current dataset

    model.nc = nc  # attach number of classes to model

    model.hyp = hyp  # attach hyperparameters to model

    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)

    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights

    model.names = names

 

    # Start training

    t0 = time.time()

    nw = max(3 * nb, 1e3)  # number of warmup iterations, max(3 epochs, 1k iterations)

    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training

    maps = np.zeros(nc)  # mAP per class

    results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'

    scheduler.last_epoch = start_epoch - 1  # do not move

    scaler = amp.GradScaler(enabled=cuda)

    logger.info('Image sizes %g train, %g test\nUsing %g dataloader workers\nLogging results to %s\n'

                'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs))

    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------

        model.train()

 

        # Update image weights (optional)

        if opt.image_weights:

            # Generate indices

            if rank in [-1, 0]:

                cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2  # class weights

                iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights

                dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx

            # Broadcast if DDP

            if rank != -1:

                indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int()

                dist.broadcast(indices, 0)

                if rank != 0:

                    dataset.indices = indices.cpu().numpy()

 

        # Update mosaic border

        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)

        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders

 

        mloss = torch.zeros(4, device=device)  # mean losses

        if rank != -1:

            dataloader.sampler.set_epoch(epoch)

        pbar = enumerate(dataloader)

        logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))

        if rank in [-1, 0]:

            pbar = tqdm(pbar, total=nb)  # progress bar

        optimizer.zero_grad()

        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------

            ni = i + nb * epoch  # number integrated batches (since train start)

            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0

 

            # Warmup

            if ni <= nw:

                xi = [0, nw]  # x interp

                # model.gr = np.interp(ni, xi, [0.0, 1.0])  # giou loss ratio (obj_loss = 1.0 or giou)

                accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())

                for j, x in enumerate(optimizer.param_groups):

                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0

                    x['lr'] = np.interp(ni, xi, [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])

                    if 'momentum' in x:

                        x['momentum'] = np.interp(ni, xi, [0.9, hyp['momentum']])

 

            # Multi-scale

            if opt.multi_scale:

                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size

                sf = sz / max(imgs.shape[2:])  # scale factor

                if sf != 1:

                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)

                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)

 

            # Forward

            with amp.autocast(enabled=cuda):

                pred = model(imgs)  # forward

                loss, loss_items = compute_loss(pred, targets.to(device), model)  # loss scaled by batch_size

                if rank != -1:

                    loss *= opt.world_size  # gradient averaged between devices in DDP mode

 

            # Backward

            scaler.scale(loss).backward()

 

            # Optimize

            if ni % accumulate == 0:

                scaler.step(optimizer)  # optimizer.step

                scaler.update()

                optimizer.zero_grad()

                if ema:

                    ema.update(model)

 

            # Print

            if rank in [-1, 0]:

                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses

                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)

                s = ('%10s' * 2 + '%10.4g' * 6) % (

                    '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])

                pbar.set_description(s)

 

                # Plot

                if ni < 3:

                    f = str(log_dir / ('train_batch%g.jpg' % ni))  # filename

                    result = plot_images(images=imgs, targets=targets, paths=paths, fname=f)

                    if tb_writer and result is not None:

                        tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)

                        # tb_writer.add_graph(model, imgs)  # add model to tensorboard

 

            # end batch ------------------------------------------------------------------------------------------------

 

        # Scheduler

        lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard

        scheduler.step()

 

        # DDP process 0 or single-GPU

        if rank in [-1, 0]:

            # mAP

            if ema:

                ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride'])

            final_epoch = epoch + 1 == epochs

            if not opt.notest or final_epoch:  # Calculate mAP

                if final_epoch:  # replot predictions

                    [os.remove(x) for x in glob.glob(str(log_dir / 'test_batch*_pred.jpg')) if os.path.exists(x)]

                results, maps, times = test.test(opt.data,

                                                 batch_size=total_batch_size,

                                                 imgsz=imgsz_test,

                                                 model=ema.ema,

                                                 single_cls=opt.single_cls,

                                                 dataloader=testloader,

                                                 save_dir=log_dir)

 

            # Write

            with open(results_file, 'a') as f:

                f.write(s + '%10.4g' * 7 % results + '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)

            if len(opt.name) and opt.bucket:

                os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))

 

            # Tensorboard

            if tb_writer:

                tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss',  # train loss

                        'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',

                        'val/giou_loss', 'val/obj_loss', 'val/cls_loss',  # val loss

                        'x/lr0', 'x/lr1', 'x/lr2']  # params

                for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):

                    tb_writer.add_scalar(tag, x, epoch)

 

            # Update best mAP

            fi = fitness(np.array(results).reshape(1, -1))  # fitness_i = weighted combination of [P, R, mAP, F1]

            if fi > best_fitness:

                best_fitness = fi

 

            # Save model

            save = (not opt.nosave) or (final_epoch and not opt.evolve)

            if save:

                with open(results_file, 'r') as f:  # create checkpoint

                    ckpt = {'epoch': epoch,

                            'best_fitness': best_fitness,

                            'training_results': f.read(),

                            'model': ema.ema,

                            'optimizer': None if final_epoch else optimizer.state_dict()}

 

                # Save last, best and delete

                torch.save(ckpt, last)

                if best_fitness == fi:

                    torch.save(ckpt, best)

                    torch.save({'model': model.float()}, best_f32)

                del ckpt

        # end epoch ----------------------------------------------------------------------------------------------------

    # end training

 

    if rank in [-1, 0]:

        # Strip optimizers

        n = opt.name if opt.name.isnumeric() else ''

        fresults, flast, fbest = log_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt'

        for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]):

            if os.path.exists(f1):

                os.rename(f1, f2)  # rename

                if str(f2).endswith('.pt'):  # is *.pt

                    strip_optimizer(f2)  # strip optimizer

                    os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None  # upload

        # Finish

        if not opt.evolve:

            plot_results(save_dir=log_dir)  # save as results.png

        logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))

 

    dist.destroy_process_group() if rank not in [-1, 0] else None

    torch.cuda.empty_cache()

    return results

 

if __name__ == '__main__':

    parser = argparse.ArgumentParser() #建立参数解析对象parser

    #weights:加载的权重文件

    parser.add_argument('--weights', type=str, help='initial weights path')

    #cfg:模型配置文件,网络结构

    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')

    #data: 数据集配置文件,数据集路径,类名等

    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')

    #hyp:超参数文件

    parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path')

    #epochs:训练总轮次

    parser.add_argument('--epochs', type=int, default=300)

    #batch-size: 批次大小

    parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')

    #img-size:输入图片分辨率大小

    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes')

    #rect:是否采用矩形训练,默认False

    parser.add_argument('--rect', action='store_true', help='rectangular training')

    #resume:断点续训(从上次打断训练的结果接着训练)

    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')

    #nosave:不保存模型,默认False

    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')

    #notest:不进行test,默认False

    parser.add_argument('--notest', action='store_true', help='only test final epoch')

    #noautoanchor:不自动调整anchor,默认False

    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')

    #evolve: 是否进行超参数进化,默认False

    parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')

    #bucket:谷歌云盘bucket,一般不会用到

    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')

    #cache-images: 是否提前缓存图片到内存,以加快训练速度,默认False

    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')

    #weights:加载的权重文件

    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')

    #name: 数据集名字,如果设置:results.txt to results_name.txt,默认无

    parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')

    #device: 训练的设备,cpu;0(表示一个gpu设备cuda:0);0,1,2,3(多个gpu设备)

    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')

    #multi-scale: 是否进行多尺度训练,默认False

    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')

    #single-cls:数据集是否只有一个类别,默认False

    parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')

    #adam: 是否使用adam优化器

    parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')

    #sync-bn:是否使用跨卡同步BN,在DDP模式使用

    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')

    #local_rank:gpu编号

    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')

    #logdir: 存放日志的目录

    parser.add_argument('--logdir', type=str, default='runs/', help='logging directory')

    #workers:dataloader的最大worker数量

    parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')

    opt = parser.parse_args()

 

    # 设置DDP模式的参数

    #word_size:表示全局进程个数

    #glpbal_rank:进程文件

    #opt参数解析:

    opt.total_batch_size = opt.batch_size

    opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1

    opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1

    set_logging(opt.global_rank)

    if opt.global_rank in [-1, 0]:

        check_git_status()#检查你的代码版本是否是最新的(不适用于windows系统)

 

    # Resume

    if opt.resume:  # resume an interrupted run

    #如果resumestr,则表示传入的时模型的路径地址

    #get_latest_run()函数获取runs文件夹中最近的last.pt

        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path

        log_dir = Path(ckpt).parent.parent  # runs/exp0

        assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'

        #opt参数也全部替换

        with open(log_dir / 'opt.yaml') as f:

            opt = argparse.Namespace(**yaml.load(f, Loader=yaml.FullLoader))  # replace

         #opt.cfg设置为''对应train函数里面的操作(加载权重的时候是否加载权重里的anchor

        opt.cfg, opt.weights, opt.resume = '', ckpt, True

        logger.info('Resuming training from %s' % ckpt)

 

    else:

        # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')

        # 检查配置文件信息

        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp)  # check files

        opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)

        #根据opt.logdir生成目录

        log_dir = increment_dir(Path(opt.logdir) / 'exp', opt.name)  # runs/exp1

 

    device = select_device(opt.device, batch_size=opt.batch_size)

 

    # DDP mode

    #选择设备

    if opt.local_rank != -1:

        assert torch.cuda.device_count() > opt.local_rank

        torch.cuda.set_device(opt.local_rank)

        device = torch.device('cuda', opt.local_rank)#根据gpu编号选择设备

        #初始化进程组

        dist.init_process_group(backend='nccl', init_method='env://')  # distributed backend

        assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'

        #将总批次按照进程数分配给各个gpu

        opt.batch_size = opt.total_batch_size // opt.world_size

    #打印opt参数信息

    logger.info(opt)

    #Hyperparameters 加载参数列表

    with open(opt.hyp) as f:

        hyp = yaml.load(f, Loader=yaml.FullLoader)  # load hyps

 

    # Train

    #如果不进行超参数进化,则直接调用train()函数,开始训练

    if not opt.evolve:

        tb_writer = None

        if opt.global_rank in [-1, 0]:

            #创建tensorboard

            logger.info('Start Tensorboard with "tensorboard --logdir %s", view at http://localhost:6006/' % opt.logdir)

            tb_writer = SummaryWriter(log_dir=log_dir)  # runs/exp0

 

        train(hyp, opt, device, tb_writer)

 

    # Evolve hyperparameters (optional)

    else:

        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)

        #超参数进化列表,括号里分别为(突变规模,最小值,最大值)

        meta = {'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)

                'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)

                'momentum': (0.1, 0.6, 0.98),  # SGD momentum/Adam beta1

                'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay

                'giou': (1, 0.02, 0.2),  # GIoU loss gain

                'cls': (1, 0.2, 4.0),  # cls loss gain

                'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight

                'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)

                'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight

                'iou_t': (0, 0.1, 0.7),  # IoU training threshold

                'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold

                'anchors': (1, 2.0, 10.0),  # anchors per output grid (0 to ignore)

                'fl_gamma': (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)

                'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)

                'hsv_s': (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)

                'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)

                'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)

                'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)

                'scale': (1, 0.0, 0.9),  # image scale (+/- gain)

                'shear': (1, 0.0, 10.0),  # image shear (+/- deg)

                'perspective': (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001

                'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)

                'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)

                'mixup': (1, 0.0, 1.0)}  # image mixup (probability)

 

        assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'

        opt.notest, opt.nosave = True, True  # only test/save final epoch

        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices

        yaml_file = Path('runs/evolve/hyp_evolved.yaml')  # save best result here

        if opt.bucket:

            os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists

       

        #这里的进化算法是:根据之前训练时的hyp来确定一个base hyp再进行突变;

        #具体方法:

                 #通过之前每次进化得到的results来确定之前每个hyp的权重

                 #有了每个hyp和每个hyp的权重之后有两种进化方式;

                 #1.single:根据每个hyd的权重随机选择一个之前的hvp作为base hvp.random.choices(range(n). weiahts=w)

                 #2.weighted: 根据每个hyp的权重对之前所有的hyp进行融合获得一个base hyp.(x *w.reshape(n1)).sum(0)/ w.sum()

                 #evolve.txt会记录每次进化之后的results+hyp

                 #每次进化时,hyp会根据之前的results进行从大到小的排序;

                 #再根据fitness函数计算之前每次进化得到的hyp的权重

                 #再确定哪一种进化方式,从而进行进化

        for _ in range(1):  # generations to evolve

            if os.path.exists('evolve.txt'):  # if evolve.txt exists: select best hyps and mutate

                # Select parent(s)

                #选择进化方式

                parent = 'single'  # parent selection method: 'single' or 'weighted'

                #加载evolve.txt

                x = np.loadtxt('evolve.txt', ndmin=2)

                #选取之所前五次进化的结果

                n = min(5, len(x))  # number of previous results to consider

                x = x[np.argsort(-fitness(x))][:n]  # top n mutations

                #根据results计算hyp的权重

                w = fitness(x) - fitness(x).min()  # weights

                #根据不同进化方式获得 base hyp

                if parent == 'single' or len(x) == 1:

                    # x = x[random.randint(0, n - 1)]  # random selection

                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection

                elif parent == 'weighted':

                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination

 

                # Mutate

                #超参数进化

                mp, s = 0.9, 0.2  # mutation probability, sigma

                npr = np.random

                npr.seed(int(time.time()))

                #获取突变初始值

                g = np.array([x[0] for x in meta.values()])  # gains 0-1

                ng = len(meta)

                v = np.ones(ng)

                #设置突变

                while all(v == 1):  # mutate until a change occurs (prevent duplicates)

                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)

                #将突变添加到base hyp

                #[i+7]是因为x中前七个数字为results的指标(P,R,mAP,Fi,test_losser=(box.obj,cls),之后才时超参数hyp

                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)

                    hyp[k] = float(x[i + 7] * v[i])  # mutate

 

            # Constrain to limits

            #修建hyp在规定范围里

            for k, v in meta.items():

                hyp[k] = max(hyp[k], v[1])  # lower limit

                hyp[k] = min(hyp[k], v[2])  # upper limit

                hyp[k] = round(hyp[k], 5)  # significant digits

 

            # Train mutation

            #训练

            results = train(hyp.copy(), opt, device)

 

            # Write mutation results

            #写入results和对应的hypevolve.txt

            #evolve.txt文件每一行为一次进化的结果

            #一行中前七个数字为(PRmAPF1test_losses=(GIoUobjcls)),之后为hyp

            #保存hypyaml文件

            print_mutation(hyp.copy(), results, yaml_file, opt.bucket)

 

        # Plot results

        plot_evolution(yaml_file)

        print('Hyperparameter evolution complete. Best results saved as: %s\nCommand to train a new model with these '

              'hyperparameters: $ python train.py --hyp %s' % (yaml_file, yaml_file))

 

 

【版权声明】本文为华为云社区用户原创内容,未经允许不得转载,如需转载请自行联系原作者进行授权。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。