回顾-AI全栈成长计划-AI进阶篇,使用Notebook物体检测-Faster R-CNN
Faster R-CNN
物体检测是计算机视觉中的一个重要的研究领域,在人流检测,行人跟踪,自动驾驶,医学影像等领域有着广泛的应用。不同于简单的图像分类,物体检测旨在对图像中的目标进行精确识别,包括物体的位置和分类,因此能够应用于更多高层视觉处理的场景。例如在自动驾驶领域,需要辨识摄像头拍摄的图像中的车辆、行人、交通指示牌及其位置,以便进一步根据这些数据决定驾驶策略。上一期学习案例中,我们聚焦于YOLO算法,YOLO(You Only Look Once)是一种one-stage物体检测算法,在本期案例中,我们介绍一种two-stage算法——Faster R-CNN,将目标区域检测和类别识别分为两个任务进行物体检测。
实验环境准备
具体操作步骤:
登陆ModelArts控制台https://www.huaweicloud.com/product/modelarts.html -> 开发环境 -> Notebook -> 创建
名称 自定义
工作环境 Python3
资源池 选择\"公共资源池\"即可
类型 GPU
规格 [限时免费]体验规格GPU版或算力更好的计费版本
存储配置 EVS
磁盘规格 5GB
创建Notebook
可以选择免费的版本,但是免费的要排队哦~点击下一步
创建步骤我就直接省略了,直接启动以及创建好的,初次创建只要选择好GPU一般都不会出现什么问题,如果选择cpu可能会出现内存耗尽的问题,所以建议选择GPU~
点击‘new’选择 ’Pytorch-1.0.0’
在Notebook中,我们输入一个简单的打印语句,然后点击上方的运行按钮,可以查看语句执行的结果:"如果未输出结果,则环境还没准备好,稍等一下在试试,还不能输出结果,则重新创建环境
实验环境准备好
数据准备
首先,我们将需要的代码和数据下载到Notebook。
本案例我们使用PASCAL VOC 2007数据集训练模型,共20个类别的物体。
import os
from modelarts.session import Session
sess = Session()
if sess.region_name == 'cn-north-1':
bucket_path="modelarts-labs/notebook/DL_object_detection_faster/fasterrcnn.tar.gz"
elif sess.region_name == 'cn-north-4':
bucket_path="modelarts-labs-bj4/notebook/DL_object_detection_faster/fasterrcnn.tar.gz"
else:
print("请更换地区到北京一或北京四")
if not os.path.exists('./experiments'):
sess.download_data(bucket_path=bucket_path, path="./fasterrcnn.tar.gz")
if os.path.exists('./fasterrcnn.tar.gz'):
# 解压压缩包
os.system("tar -xf ./fasterrcnn.tar.gz")
# 清理压缩包
os.system("rm -r ./fasterrcnn.tar.gz")
!pip install pycocotools==2.0.0
!pip install torchvision==0.4.0
!pip install protobuf==3.9.0
import tools._init_paths
%matplotlib inline
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorboardX as tb
from datasets.factory import get_imdb
from model.train_val import get_training_roidb, train_net
from model.config import cfg, cfg_from_file, cfg_from_list, get_output_dir, get_output_tb_dir
import roi_data_layer.roidb as rdl_roidb
from roi_data_layer.layer import RoIDataLayer
import utils.timer
import pickle
import torch
import torch.optim as optim
from nets.vgg16 import vgg16
import numpy as np
import os
import sys
import glob
import time
imdb_name = "voc_2007_trainval"
imdbval_name = "voc_2007_test"
# 使用的预训练模型位置
weight = "./data/imagenet_weights/vgg16.pth"
# 训练迭代次数
max_iters = 100
# cfg模型文件位置
cfg_file = './experiments/cfgs/vgg16.yml'
set_cfgs = None
if cfg_file is not None:
cfg_from_file(cfg_file)
if set_cfgs is not None:
cfg_from_list(set_cfgs)
print('Using config:')
print(cfg)
def combined_roidb(imdb_names):
def get_roidb(imdb_name):
# 加载数据集
imdb = get_imdb(imdb_name)
print('Loaded dataset `{:s}` for training'.format(imdb.name))
# 使用ground truth作为数据集策略
imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
roidb = get_training_roidb(imdb)
return roidb
roidbs = [get_roidb(s) for s in imdb_names.split('+')]
roidb = roidbs[0]
if len(roidbs) > 1:
for r in roidbs[1:]:
roidb.extend(r)
tmp = get_imdb(imdb_names.split('+')[1])
imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
else:
imdb = get_imdb(imdb_names)
return imdb, roidb
np.random.seed(cfg.RNG_SEED)
# 加载训练数据集
imdb, roidb = combined_roidb(imdb_name)
print('{:d} roidb entries'.format(len(roidb)))
# 设置输出路径
output_dir = get_output_dir(imdb,None)
print('Output will be saved to `{:s}`'.format(output_dir))
# 设置日志保存路径
tb_dir = get_output_tb_dir(imdb, None)
print('TensorFlow summaries will be saved to `{:s}`'.format(tb_dir))
# 加载验证数据集
orgflip = cfg.TRAIN.USE_FLIPPED
cfg.TRAIN.USE_FLIPPED = False
_, valroidb = combined_roidb(imdbval_name)
print('{:d} validation roidb entries'.format(len(valroidb)))
cfg.TRAIN.USE_FLIPPED = orgflip
# 创建backbone网络
# 在案例中使用的是VGG16模型,可以尝试其他不同的模型结构,例如Resnet等
net = vgg16()
rom model.train_val import filter_roidb, SolverWrapper
# 对ROI进行筛选,将无效的ROI数据筛选掉
roidb = filter_roidb(roidb)
valroidb = filter_roidb(valroidb)
sw = SolverWrapper(
net,
imdb,
roidb,
valroidb,
output_dir,
tb_dir,
pretrained_model=weight)
print('Solving...')
# 显示所有模型属性
sw.__dict__.keys()
# sw.net为主干网络
print(sw.net)
# 构建网络结构,模型加入ROI数据层
sw.data_layer = RoIDataLayer(sw.roidb, sw.imdb.num_classes)
sw.data_layer_val = RoIDataLayer(sw.valroidb, sw.imdb.num_classes, random=True)
# 构建网络结构,在VGG16基础上加入ROI和Classifier部分
lr, train_op = sw.construct_graph()
# 加载之前的snapshot
lsf, nfiles, sfiles = sw.find_previous()
# snapshot 为训练提供了断点训练,如果有snapshot将加载进来,继续训练
if lsf == 0:
lr, last_snapshot_iter, stepsizes, np_paths, ss_paths = sw.initialize()
else:
lr, last_snapshot_iter, stepsizes, np_paths, ss_paths = sw.restore(str(sfiles[-1]), str(nfiles[-1]))
iter = last_snapshot_iter + 1
last_summary_time = time.time()
# 在之前的训练基础上继续进行训练
stepsizes.append(max_iters)
stepsizes.reverse()
next_stepsize = stepsizes.pop()
# 将net切换成训练模式
print("网络结构:")
sw.net.train()
sw.net.to(sw.net._device)
开始训练
while iter < max_iters + 1:
if iter == next_stepsize + 1:
# 加入snapshot节点
sw.snapshot(iter)
lr *= cfg.TRAIN.GAMMA
scale_lr(sw.optimizer, cfg.TRAIN.GAMMA)
next_stepsize = stepsizes.pop()
utils.timer.timer.tic()
# 数据通过ROI数据层,进行前向计算
blobs = sw.data_layer.forward()
now = time.time()
if iter == 1 or now - last_summary_time > cfg.TRAIN.SUMMARY_INTERVAL:
# 计算loss函数
# 根据loss函数对模型进行训练
rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss, summary = \
sw.net.train_step_with_summary(blobs, sw.optimizer)
for _sum in summary:
sw.writer.add_summary(_sum, float(iter))
# 进行数据层验证计算
blobs_val = sw.data_layer_val.forward()
summary_val = sw.net.get_summary(blobs_val)
for _sum in summary_val:
sw.valwriter.add_summary(_sum, float(iter))
last_summary_time = now
else:
rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss = \
sw.net.train_step(blobs, sw.optimizer)
utils.timer.timer.toc()
if iter % (cfg.TRAIN.DISPLAY) == 0:
print('iter: %d / %d, total loss: %.6f\n >>> rpn_loss_cls: %.6f\n '
'>>> rpn_loss_box: %.6f\n >>> loss_cls: %.6f\n >>> loss_box: %.6f\n >>> lr: %f' % \
(iter, max_iters, total_loss, rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, lr))
print('speed: {:.3f}s / iter'.format(
utils.timer.timer.average_time()))
# 进行snapshot存储
if iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
last_snapshot_iter = iter
ss_path, np_path = sw.snapshot(iter)
np_paths.append(np_path)
ss_paths.append(ss_path)
# 删掉多余的snapshot
if len(np_paths) > cfg.TRAIN.SNAPSHOT_KEPT:
sw.remove_snapshot(np_paths, ss_paths)
iter += 1
if last_snapshot_iter != iter - 1:
sw.snapshot(iter - 1)
sw.writer.close()
sw.valwriter.close()
我们利用训练得到的模型进行推理测试。
%matplotlib inline
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# 将路径转入lib
import tools._init_paths
from model.config import cfg
from model.test import im_detect
from torchvision.ops import nms
from utils.timer import Timer
import matplotlib.pyplot as plt
import numpy as np
import os, cv2
import argparse
from nets.vgg16 import vgg16
from nets.resnet_v1 import resnetv1
from model.bbox_transform import clip_boxes, bbox_transform_inv
import torch
# PASCAL VOC类别设置
CLASSES = ('__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
# 网络模型文件名定义
NETS = {'vgg16': ('vgg16_faster_rcnn_iter_%d.pth',),'res101': ('res101_faster_rcnn_iter_%d.pth',)}
# 数据集文件名定义
DATASETS= {'pascal_voc': ('voc_2007_trainval',),'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',)}
def vis_detections(im, class_dets, thresh=0.5):
"""Draw detected bounding boxes."""
im = im[:, :, (2, 1, 0)]
fig, ax = plt.subplots(figsize=(12, 12))
ax.imshow(im, aspect='equal')
for class_name in class_dets:
dets = class_dets[class_name]
inds = np.where(dets[:, -1] >= thresh)[0]
if len(inds) == 0:
continue
for i in inds:
bbox = dets[i, :4]
score = dets[i, -1]
ax.add_patch(
plt.Rectangle((bbox[0], bbox[1]),
bbox[2] - bbox[0],
bbox[3] - bbox[1], fill=False,
edgecolor='red', linewidth=3.5)
)
ax.text(bbox[0], bbox[1] - 2,
'{:s} {:.3f}'.format(class_name, score),
bbox=dict(facecolor='blue', alpha=0.5),
fontsize=14, color='white')
plt.axis('off')
plt.tight_layout()
plt.draw()
test_file = "./test"
import cv2
from utils.timer import Timer
from model.test import im_detect
from torchvision.ops import nms
cfg.TEST.HAS_RPN = True # Use RPN for proposals
# 模型存储位置
# 这里我们加载一个已经训练110000迭代之后的模型,可以选择自己的训练模型位置
saved_model = "./models/vgg16-voc0712/vgg16_faster_rcnn_iter_110000.pth"
print('trying to load weights from ', saved_model)
# 加载backbone
net = vgg16()
# 构建网络
net.create_architecture(21, tag='default', anchor_scales=[8, 16, 32])
# 加载权重文件
net.load_state_dict(torch.load(saved_model, map_location=lambda storage, loc: storage))
net.eval()
# 选择推理设备
net.to(net._device)
print('Loaded network {:s}'.format(saved_model))
for file in os.listdir(test_file):
if file.startswith("._") == False:
file_path = os.path.join(test_file, file)
print(file_path)
# 打开测试图片文件
im = cv2.imread(file_path)
# 定义计时器
timer = Timer()
timer.tic()
# 检测得到图片ROI
scores, boxes = im_detect(net, im)
print(scores.shape, boxes.shape)
timer.toc()
print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time(), boxes.shape[0]))
# 定义阈值
CONF_THRESH = 0.7
NMS_THRESH = 0.3
cls_dets = {}
# NMS 非极大值抑制操作,过滤边界框
for cls_ind, cls in enumerate(CLASSES[1:]):
cls_ind += 1 # 跳过 background
cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
cls_scores = scores[:, cls_ind]
dets = np.hstack((cls_boxes,
cls_scores[:, np.newaxis])).astype(np.float32)
keep = nms(torch.from_numpy(cls_boxes), torch.from_numpy(cls_scores), NMS_THRESH)
dets = dets[keep.numpy(), :]
if len(dets) > 0:
if cls in cls_dets:
cls_dets[cls] = np.vstack([cls_dets[cls], dets])
else:
cls_dets[cls] = dets
vis_detections(im, cls_dets, thresh=CONF_THRESH)
plt.show()
- 点赞
- 收藏
- 关注作者
评论(0)