yolov3_darknet53训练自己的训练集
【摘要】 基于mindspore中model_zoo的yolov3_darknet53的代码,并使用自己的训练集进行训练。一、数据集 首先对自己的数据进行标注,标注工具网上有很多,我用的LabelImg,labelImg的下载地址以及使用,可以参考博客windows下使用labelImg标注图像,生成的xml文件和图片分别放到两个文件夹中,由于这里要求数据格式是coco的数据格式,为了方...
基于mindspore中model_zoo的yolov3_darknet53的代码,并使用自己的训练集进行训练。
一、数据集
首先对自己的数据进行标注,标注工具网上有很多,我用的LabelImg,labelImg的下载地址以及使用,可以参考博客windows下使用labelImg标注图像,生成的xml文件和图片分别放到两个文件夹中,由于这里要求数据格式是coco的数据格式,为了方便,不用重新做数据集,我将我的xml格式数据转为coco数据集格式,代码如下:在转换的时候最好使用--rename参数,将原文件全部重命名,否则可能会出现image_id不是int类型报错
from pathlib import Path
import os
import sys
import xml.etree.ElementTree as ET
import numpy as np
import argparse
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import shutil
import json
from typing import Dict, List
from tqdm import tqdm
import re
from collections import Counter
def get_label2id(labels_path: str) -> Dict[str, int]:
'''
id is 1 start'''
with open(labels_path, 'r') as f:
labels_str = f.read().split()
labels_ids = list(range(1, len(labels_str) + 1))
return dict(zip(labels_str, labels_ids))
def get_image_info(ann_path, annotation_root, extract_num_from_imgid=True):
'''
ann_path:标注文件全路径
annotation_root:xml对根内容进行解析后的内容
extract_num_from_imgid:是否从imageid中提取数字,对于COCO格式数据集最好使用True选项,将image_id转换为整型
'''
img_name = os.path.basename(ann_path)
img_id = os.path.splitext(img_name)[0]
filename = img_id + ext
if extract_num_from_imgid and isinstance(img_id, str):
# 采用正则表达式,支持转换的文件命名:0001.png, cls_0021.png, cls0123.jpg, 00123abc.png等
img_id = int(re.findall(r'\d+', img_id)[0])
size = annotation_root.find('size')
width = int(size.findtext('width'))
height = int(size.findtext('height'))
image_info = {
'file_name': filename,
'height': height,
'width': width,
'id': img_id
}
return image_info
def counting_labels(anno_root):
'''
获取pascal voc格式数据集中的所有标签名
anno_root: pascal标注文件路径,一般为Annotations
'''
all_classes = []
for xml_file in os.listdir(anno_root):
xml_file = os.path.join(anno_root, xml_file)
# print(xml_file)
xml = open(xml_file, encoding='utf-8')
tree = ET.parse(xml)
root = tree.getroot()
for obj in root.iter('object'):
class_ = obj.find('name').text.strip()
all_classes.append(class_)
print(Counter(all_classes))
labels = sorted(list(set(all_classes)))
print('标签数据:', labels)
print('标签长度:', len(labels))
print('写入标签信息...{}'.format(os.path.join(opt.voc_root, 'labels.txt')))
with open(os.path.join(opt.voc_root, 'labels.txt'), 'w') as f:
for k in labels:
f.write(k)
f.write('\n')
def get_coco_annotation_from_obj(obj, label2id):
label = obj.findtext('name').strip()
assert label in label2id, f"Error: {label} is not in label2id !"
category_id = label2id[label]
bndbox = obj.find('bndbox')
xmin = int(bndbox.findtext('xmin')) - 1
ymin = int(bndbox.findtext('ymin')) - 1
xmax = int(bndbox.findtext('xmax'))
ymax = int(bndbox.findtext('ymax'))
assert xmax > xmin and ymax > ymin, f"Box size error !: (xmin, ymin, xmax, ymax): {xmin, ymin, xmax, ymax}"
o_width = xmax - xmin
o_height = ymax - ymin
ann = {
'area': o_width * o_height,
'iscrowd': 0,
'bbox': [xmin, ymin, o_width, o_height],
'category_id': category_id,
'ignore': 0,
# 起始点是左上角,按照顺时针方向
'segmentation': [[xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]]
}
return ann
def convert_xmls_to_cocojson(annotation_paths: List[str],
label2id: Dict[str, int],
output_jsonpath: str,
extract_num_from_imgid: bool = True):
output_json_dict = {
"images": [],
"type": "instances",
"annotations": [],
"categories": []
}
bnd_id = 1 # START_BOUNDING_BOX_ID, TODO input as args ?
print('Start converting !')
for a_path in tqdm(annotation_paths):
# Read annotation xml
ann_tree = ET.parse(a_path)
ann_root = ann_tree.getroot()
# print(a_path)
img_info = get_image_info(ann_path=a_path,
annotation_root=ann_root,
extract_num_from_imgid=extract_num_from_imgid)
img_id = img_info['id']
output_json_dict['images'].append(img_info)
for obj in ann_root.findall('object'):
ann = get_coco_annotation_from_obj(obj=obj, label2id=label2id)
ann.update({'image_id': img_id, 'id': bnd_id})
output_json_dict['annotations'].append(ann)
bnd_id = bnd_id + 1
for label, label_id in label2id.items():
category_info = {'supercategory': 'none', 'id': label_id, 'name': label}
output_json_dict['categories'].append(category_info)
with open(output_jsonpath, 'w') as f:
output_json = json.dumps(output_json_dict)
f.write(output_json)
def create_dir(ROOT: str):
if not os.path.exists(ROOT):
os.mkdir(ROOT)
else:
shutil.rmtree(ROOT) # 先删除,再创建
os.mkdir(ROOT)
def check_files(ann_root, img_root):
'''检测图像名称和xml标准文件名称是否一致,检查图像后缀'''
if os.path.exists(ann_root):
ann = Path(ann_root)
else:
raise Exception("标注文件路径错误")
if os.path.exists(img_root):
img = Path(img_root)
else:
raise Exception("图像文件路径错误")
ann_files = []
img_files = []
img_exts = []
for an, im in zip(ann.iterdir(), img.iterdir()):
ann_files.append(an.stem)
img_files.append(im.stem)
img_exts.append(im.suffix)
print('图像后缀列表:', np.unique(img_exts))
if len(np.unique(img_exts)) > 1:
# print('数据集包含多种格式图像,请检查!', np.unique(img_exts))
raise Exception('数据集包含多种格式图像,请检查!', np.unique(img_exts))
if set(ann_files) == set(img_files):
print('标注文件和图像文件匹配')
else:
print('标注文件和图像文件不匹配')
return np.unique(img_exts)[0]
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--voc-root', type=str, required=True,
help='VOC格式数据集根目录,该目录下必须包含存储图像和标注文件的两个文件夹')
parser.add_argument('--img_dir', type=str, required=False,
help='VOC格式数据集图像存储路径,如果不指定,默认为JPEGImages')
parser.add_argument('--anno_dir', type=str, required=False,
help='VOC格式数据集标注文件存储路径,如果不指定,默认为Annotations')
parser.add_argument('--coco-dir', type=str, default='CocoDataset',
help='COCO数据集存储路径,默认为VOC数据集下新建文件夹CocoDataset')
parser.add_argument('--test-ratio', type=float, default=0.1,
help='验证集比例,默认为0.3')
parser.add_argument('--rename', type=bool, default=False,
help='是否对VOC数据集进行数字化重命名')
parser.add_argument('--label-file', type=str, required=False,
help='path to label list.')
parser.add_argument('--output', type=str, default='output.json', help='path to output .json file')
# parser.add_argument('--ext', type=str, default='.png', help='VOC图像数据后缀,注意带"." ' )
opt = parser.parse_args()
voc_root = opt.voc_root
print('Pascal VOC格式数据集路径:', voc_root)
xml_file = []
img_files = []
if opt.img_dir is None:
img_dir = 'JPEGImages'
else:
img_dir = opt.img_dir
JPEG = os.path.join(voc_root, img_dir)
if not os.path.exists(JPEG):
raise Exception(f'数据集图像路径{JPEG}不存在!')
if opt.anno_dir is None:
anno_dir = 'Annotations'
else:
anno_dir = opt.anno_dir
ANNO = os.path.join(voc_root, anno_dir)
if not os.path.exists(ANNO):
raise Exception(f'数据集图像路径{ANNO}不存在!')
ext = check_files(ANNO, JPEG)
##============================##
## 对文件进行数字化重命名 ##
##============================##
if opt.rename == True:
renamed_jpeg = os.path.join(voc_root, 'RenamedJPEGImages')
create_dir(renamed_jpeg)
renamed_xml = os.path.join(voc_root, 'RenamedAnnotations')
create_dir(renamed_xml)
p1 = Path(JPEG)
p2 = Path(ANNO)
imgs, annos = [], []
for img, anno in zip(p1.iterdir(), p2.iterdir()):
imgs.append(img.name.split('.')[0]) # 这里用'.'进行分割,因此要保证文件名中只有区分后缀的一个小数点
annos.append(anno.name.split('.')[0])
imgs = sorted(imgs)
annos = sorted(annos)
# print(imgs[:10], annos[:10])
assert imgs == annos
LENGTH = len(imgs)
print('图像数量:', LENGTH)
for new_num, id in tqdm(zip(range(1, LENGTH + 1), imgs), total=LENGTH):
src_img_path = os.path.join(JPEG, id + ext) # 原始Pascal格式数据集的图像全路径
dst_img_path = os.path.join(renamed_jpeg, str(new_num) + ext) # coco格式下的图像存储路径
shutil.copy(src_img_path, dst_img_path)
src_xml_path = os.path.join(ANNO, id + '.xml') # 原始Pascal格式数据集的图像全路径
dst_xml_path = os.path.join(renamed_xml, str(new_num) + '.xml') # coco格式下的图像存储路径
shutil.copy(src_xml_path, dst_xml_path)
JPEG = renamed_jpeg # 将重命名后的图像路径赋值给JPEG
ANNO = renamed_xml # 将重命名后的标注路径赋值给ANNO
ImgSets = os.path.join(voc_root, 'ImageSets')
if not os.path.exists(ImgSets):
os.mkdir(ImgSets)
ImgSetsMain = os.path.join(ImgSets, 'Main')
# if os.path.exists(ImgSetsMain):
# print('目录ImageSets/Main已经存在')
# else:
create_dir(ImgSetsMain)
COCOPROJ = os.path.join(voc_root, opt.coco_dir) # pascal voc转coco格式的存储路径
create_dir(COCOPROJ)
txt_files = ['trainvaltest', 'train', 'val', 'trainval', 'test']
coco_dirs = []
for dir_ in txt_files:
DIR = os.path.join(COCOPROJ, dir_)
coco_dirs.append(DIR)
create_dir(DIR)
COCOANNO = os.path.join(COCOPROJ, 'annotations') # coco标注文件存放路径
create_dir(COCOANNO)
p = Path(JPEG)
files = []
for file in p.iterdir():
name, sufix = file.name.split('.')
files.append(name) # Pascal voc格式下,ImageSets/Main里的train.txt,trainval.txt,val.txt和test.txt等文件只存储图像id,不包括后缀
print('数据集长度:', len(files))
files = shuffle(files)
ratio = opt.test_ratio
trainval, test = train_test_split(files, test_size=ratio)
train, val = train_test_split(trainval, test_size=0.1)
print('训练集数量: ', len(train))
print('验证集数量: ', len(val))
print('测试集数量: ', len(test))
def write_txt(txt_path, data):
with open(txt_path, 'w') as f:
for d in data:
f.write(str(d))
f.write('\n')
# 写入各个txt文件
datas = [files, train, val, trainval, test]
for txt, data in zip(txt_files, datas):
txt_path = os.path.join(ImgSetsMain, txt + '.txt')
write_txt(txt_path, data)
# 遍历xml文件,得到所有标签值,并且保存为labels.txt
if opt.label_file:
print('从自定义标签文件读取!')
labels = opt.label_file
else:
print('从xml文件自动处理标签!')
counting_labels(ANNO)
labels = os.path.join(voc_root, 'labels.txt')
if not os.path.isfile(labels):
raise Exception('需要提供数据集标签文件路径,用于按顺序转换数值id,如果没有,需要手动创建!')
label2id = get_label2id(labels_path=labels)
print('标签值及其对应的编码值:', label2id)
for name, imgs, PATH in tqdm(zip(txt_files,
datas,
coco_dirs)):
annotation_paths = []
for img in imgs:
annotation_paths.append(os.path.join(ANNO, img + '.xml'))
src_img_path = os.path.join(JPEG, img + ext) # 原始Pascal格式数据集的图像全路径
dst_img_path = os.path.join(PATH, img + ext) # coco格式下的图像存储路径
shutil.copy(src_img_path, dst_img_path)
convert_xmls_to_cocojson(
annotation_paths=annotation_paths,
label2id=label2id,
output_jsonpath=os.path.join(COCOANNO, f'instances_{name}.json'),
# img_ids = imgs
extract_num_from_imgid=True # 一定注意这里,COCO格式数据集image_id需要整型,可以从图片名称中抽取id号
)
最后生成对应的文件夹:
├── CocoDataset
│ ├── annotations 存放json文件
│ ├── test
│ ├── train
│ ├── trainval
│ ├── trainvaltest
│ └── val
二、代码修改
- 下载代码
git clone https://gitee.com/mindspore/mindspore.git
然后切换1.1分支代码或者直接下载1.1版本zip 并解压,找到mindspore/model_zoo/official/cv/yolov3_darknet53路径。建议直接把yolov3_darknet53整个文件夹copy出来,方便修改代码。
- 修改config.py文件中num_classes,改成自己的类别数,out_channel是最后输出anchor数量,也就是3x(类别数+4个坐标+1个置信度)即3x
(54+4+1)=177
num_classes = 54 # 修改自己的类别数 max_box = 50 backbone_input_shape = [32, 64, 128, 256, 512] backbone_shape = [64, 128, 256, 512, 1024] backbone_layers = [1, 2, 8, 8, 4] # confidence under ignore_threshold means no object when training ignore_threshold = 0.7 # h->w anchor_scales = [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)] out_channel = 177 # 3x(种类数+bbox4个坐标+bbox信度)
- 修改train.py,由于我们生成的数据集文件夹命名问题,需要在train.py中改为我们自己对应的文件名,将下面的2014去掉
args.data_root = os.path.join(args.data_dir, 'train2014') args.annFile = os.path.join(args.data_dir, 'annotations/instances_train2014.json')
- 训练模型
训练模型有两种方式,具体可参考README.md,但是有一点,不知道是我的问题还是脚本问题,如果使用脚本训练的时候,在输入参数时总会报错,导致读取参数失败,最好在sh脚本中将原来的代码注掉直接改成:
#get_real_path(){
# if [ "${1:0:1}" == "/" ]; then
# echo "$1"
# else
# echo "$(realpath -m $PWD/$1)"
# fi
#}
#DATASET_PATH=$(get_real_path $1)
DATASET_PATH=$1
echo $DATASET_PATH
#PRETRAINED_BACKBONE=$(get_real_path $2)
PRETRAINED_BACKBONE=$2
echo $PRETRAINED_BACKBONE
- 评估代码eval.py中需要修改labels标签名称,改为自己的标签名,另外在评估中最后输出 类别也要改为自己的类数
- 预测,原始开源项目中没有提供测试代码,由于时间关系自己暂时手写了简单的测试代码,如下:
import os
import argparse
import datetime
import time
import math
import random
import sys
from collections import defaultdict
import mindspore
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from mindspore.context import ParallelMode
from mindspore import context
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from src.yolo import YOLOV3DarkNet53
from src.logger import get_logger
from src.yolo_dataset import create_yolo_dataset
from src.config import ConfigYOLOV3DarkNet53
from src.transforms import _reshape_data
import cv2
from mindspore import Tensor
from eval import DetectionEngine
def plot_one_box(x, img, color=None, label=None, line_thickness=None):
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
color = color or [random.randint(0, 255) for _ in range(3)]
print("x1="+str(int(x[0])), 'y1='+str(int(x[1])),
'x2='+str(int(x[2])), 'y2='+str(int(x[3])))
x1 = math.ceil(x[0])
y1 = math.ceil(x[1])
x2 = math.ceil(x[0]+x[2])
y2 = math.ceil(x[1]+x[3])
c1, c2 = (x1, y1), (x2, y2)
cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
def det_nms(predicts, threshold):
"""Calculate NMS."""
# convert xywh -> xmin ymin xmax ymax
x1 = predicts[:, 0]
y1 = predicts[:, 1]
x2 = x1 + predicts[:, 2]
y2 = y1 + predicts[:, 3]
scores = predicts[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
reserved_boxes = []
while order.size > 0:
i = order[0]
reserved_boxes.append(i)
max_x1 = np.maximum(x1[i], x1[order[1:]])
max_y1 = np.maximum(y1[i], y1[order[1:]])
min_x2 = np.minimum(x2[i], x2[order[1:]])
min_y2 = np.minimum(y2[i], y2[order[1:]])
intersect_w = np.maximum(0.0, min_x2 - max_x1 + 1)
intersect_h = np.maximum(0.0, min_y2 - max_y1 + 1)
intersect_area = intersect_w * intersect_h
ovr = intersect_area / (areas[i] + areas[order[1:]] - intersect_area)
indexes = np.where(ovr <= threshold)[0]
order = order[indexes + 1]
return reserved_boxes
def do_nms_for_results(results,img0,img_name):
"""Get result boxes."""
for clsi in results:
dets = results[clsi]
dets = np.array(dets)
keep_index = det_nms(dets, 0.3)
for i in keep_index:
score = dets[i][4].astype(float)
label = '%s_%.2f_' % (labels[int(clsi)-1], score)
bbox = list(dets[i][:4].astype(float))
plot_one_box(bbox,img0,label= label)
cv2.imwrite("./{}".format(img_name), img0)
coco_catIds = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54]
labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22',
'23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43',
'44', '45', '46', '47', '48', '49', '50', '51', '52', '53']
devid = int(os.getenv('DEVICE_ID', '0'))
context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True,
device_target='Ascend', save_graphs=True, device_id=devid,variable_memory_max_size="4GB")
network = YOLOV3DarkNet53(is_training=False)
param_dict = load_checkpoint("ckpt_0/0-125_256250.ckpt")
param_dict_new = {}
for key, values in param_dict.items():
if key.startswith('moments.'):
continue
elif key.startswith('yolo_network.'):
param_dict_new[key[13:]] = values
else:
param_dict_new[key] = values
load_param_into_net(network, param_dict_new)
config = ConfigYOLOV3DarkNet53()
network.set_train(False)
test_data = 'data/test_img/'
img_list = os.listdir(test_data)
for i, img_name in enumerate(img_list):
results = defaultdict(list)
img_source = cv2.imread(os.path.join(test_data, img_name))
# source_img_shape = Tensor(img_source.shape,dtype=mindspore.int32).asnumpy
source_img_shape = np.array(img_source.shape,dtype = np.int32)
image_size = config.test_img_shape
img, img_shape = _reshape_data(img_source, image_size)
img = img.transpose((2, 0, 1))
img = np.expand_dims(img, axis=0)
img = Tensor(img)
prediction = network(img)
output_big, output_me, output_small = prediction
output_big = output_big.asnumpy()
output_me = output_me.asnumpy()
output_small = output_small.asnumpy()
outputs = [output_small, output_me, output_big]
outputs_num = len(outputs)
# output [|32, 52, 52, 3, 85| ]
for out_id in range(outputs_num):
# 32, 52, 52, 3, 85
out_item = outputs[out_id]
# 52, 52, 3, 85
out_item_single = out_item[0, :]
# get number of items in one head, [B, gx, gy, anchors, 5+80]
dimensions = out_item_single.shape[:-1]
out_num = 1
for d in dimensions:
out_num *= d
ori_h, ori_w, _ = source_img_shape
# img_id = int(image_id[0])
x = out_item_single[..., 0] * ori_w
y = out_item_single[..., 1] * ori_h
w = out_item_single[..., 2] * ori_w
h = out_item_single[..., 3] * ori_h
conf = out_item_single[..., 4:5]
cls_emb = out_item_single[..., 5:]
cls_argmax = np.expand_dims(np.argmax(cls_emb, axis=-1), axis=-1)
cls_argmax = np.expand_dims(np.argmax(cls_emb, axis=-1), axis=-1)
x = x.reshape(-1)
y = y.reshape(-1)
w = w.reshape(-1)
h = h.reshape(-1)
cls_emb = cls_emb.reshape(-1, 54)
conf = conf.reshape(-1)
cls_argmax = cls_argmax.reshape(-1)
x_top_left = x - w / 2.
y_top_left = y - h / 2.
# create all False
flag = np.random.random(cls_emb.shape) > sys.maxsize
for i in range(flag.shape[0]):
c = cls_argmax[i]
flag[i, c] = True
confidence = cls_emb[flag] * conf
for x_lefti, y_lefti, wi, hi, confi, clsi in zip(x_top_left, y_top_left, w, h, confidence, cls_argmax):
if confi < 0.7:
continue
# if img_id not in self.results:
# self.results[img_id] = defaultdict(list)
x_lefti = max(0, x_lefti)
y_lefti = max(0, y_lefti)
wi = min(wi, ori_w)
hi = min(hi, ori_h)
coco_clsi = coco_catIds[clsi]
results[str(coco_clsi)].append([x_lefti, y_lefti, wi, hi, confi])
do_nms_for_results(results, img_source, img_name)
【版权声明】本文为华为云社区用户原创内容,未经允许不得转载,如需转载请自行联系原作者进行授权。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)