AIFood比赛总结
最近老山的同事参与了华为AIFood的比赛,大赛的任务是对食品的图像进行分类。但是数据存在不均衡的问题。每个大样本中有500多张图片,而小样本类别中,每类只有5张图片。这种数据不均衡的问题对小样本数据集的训练具有极大的挑战。 在比赛中,采用了Pytorch框架实现数据的分类。在这里简单记录下比赛遇到的困难,希望能够帮助到大家。
问题1:数据不均衡
提高小样本数据的采样几率,使得送入batch的样本来自不同类型的概率相等
具体方式是复写了ImbalancedDatasetSampler类,在这里,模型根据每一类样本的数量定义权重,若一个样本中有m个样本,那他的权重为1/m,在取样时,每个样本被取到的概率为1/m,从而保证训练样本的均衡,具体代码如下:
import torch
import torch.utils.data
import torchvision
class ImbalancedDatasetSampler(torch.utils.data.sampler.Sampler):
"""Samples elements randomly from a given list of indices for imbalanced dataset
Arguments:
indices (list, optional): a list of indices
num_samples (int, optional): number of samples to draw
"""
def __init__(self, dataset, indices=None, num_samples=None):
# if indices is not provided,
# all elements in the dataset will be considered
self.indices = list(range(len(dataset))) \
if indices is None else indices
# if num_samples is not provided,
# draw `len(indices)` samples in each iteration
self.num_samples = len(self.indices) \
if num_samples is None else num_samples
# distribution of classes in the dataset
label_to_count = {}
for idx in self.indices:
label = self._get_label(dataset, idx)
if label in label_to_count:
label_to_count[label] += 1
else:
label_to_count[label] = 1
# weight for each sample
weights = [1.0 / label_to_count[self._get_label(dataset, idx)]
for idx in self.indices]
self.weights = torch.DoubleTensor(weights)
def _get_label(self, dataset, idx):
dataset_type = type(dataset)
if dataset_type is torchvision.datasets.MNIST:
return dataset.train_labels[idx].item()
elif dataset_type is torchvision.datasets.ImageFolder:
return dataset.imgs[idx][1]
else:
raise NotImplementedError
def __iter__(self):
return (self.indices[i] for i in torch.multinomial(
self.weights, self.num_samples, replacement=True))
def __len__(self):
return self.num_samples数据增广,通过数据增强的方式,使图像数据旋转、放大、缩小等,使数据集的数量变大
import os
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import numpy as np
# 定义图像生成器
# rotation_range是一个0~180的度数,用来指定随机选择图片的角度。
# width_shift和height_shift用来指定水平和竖直方向随机移动的程度,这是两个0~1之间的比
# rescale值将在执行其他处理前乘到整个图像上,我们的图像在RGB通道都是0~255的整数,这样的操作可能使图像的值过高或过低,所以我们将这个值定为0~1之间的数。
# shear_range是用来进行错切变换的程度,参考错切变换
# zoom_range用来进行随机的放大
# horizontal_flip随机的对图片进行水平翻转,这个参数适用于水平翻转不影响图片语义的时候
# fill_mode用来指定当需要进行像素填充,如旋转,水平和竖直位移时,如何填充新出现的像素
datagen = ImageDataGenerator(
rotation_range = 40, # 随机旋转角度
width_shift_range = 0.2, # 随机水平平移
height_shift_range = 0.2,# 随机竖直平移
rescale = 1/255, # 数据归一化
shear_range = 20, # 随机错切变换
zoom_range = 0.2, # 随机放大
horizontal_flip = True, # 水平翻转
fill_mode = 'nearest',) # 填充方式
# 载入图片
for t in os.listdir('.、images2/'):
for m in os.listdir(os.path.join('./images2/',t)):
img = load_img(os.path.join('./images2/',t,m))
x = img_to_array(img)
# 扩展维度
x = np.expand_dims(x, 0)
# 生成3张图片
i = 0
for batch in datagen.flow(x, batch_size=1, save_to_dir=os.path.join('./images2/',t), save_prefix='', save_format='jpg'):
i += 1
if i==5:
break
print('finished!')
问题2:推理代码
大赛中需使用ModelArts完成模型部署操作,但使用赛题链接里提供的config.json和customize_service.py,上传之后却无法完成部署,于是便有了下面的漫长的debug之路。
模型代码
比赛中给出的代码中重构了resnet50的网络结构,为了与训练模型一致,将这段代码替换成pytorch中加载内置resnet50模型的代码。
model = models.resnet50(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 25)
model.load_state_dict(torch.load(model_path,map_location ='cpu'))
# model.load_state_dict(torch.load(model_path))small_label.txt文件
推理结果中总少了一样菜名,发现是small_label.txt文件头导致第一行的菜名无法正确加载。把文件编码方式更正为utf-8,并使用vs code直接新建文件,避免在Win10新建文本文件带人的文件头。
3. 图片加载方式的错误
torchvision.transformers的输入对象是PIL Image对象而非ndarray对象。因此删除了decode_image函数中返回值的前一行。
4. 数据输出
数据输出报错,发现数据输出格式未与config.json一致。
最终代码
customize_service.py
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from math import exp
import numpy as np
from PIL import Image
import cv2
from model_service.pytorch_model_service import PTServingBaseService
import torch.nn as nn
import torch
import logging
import torchvision.models as models
import torchvision.transforms as transforms
infer_transformation = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
IMAGES_KEY = 'images'
MODEL_INPUT_KEY = 'images'
MODEL_OUTPUT_KEY = 'logits'
LABELS_FILE_NAME = 'small_labels_25c.txt'
def decode_image(file_content):
"""
Decode bytes to a single image
:param file_content: bytes
:return: ndarray with rank=3
"""
image = Image.open(file_content)
image = image.convert('RGB')
# print(image.shape)
# image = np.asarray(image, dtype=np.float32)
return image
# image_content = r.files[file_content].read() # python 'List' class that holds byte
# np_array = np.fromstring(image_content, np.uint8) # numpy array with dtype np.unit8
# img_array = cv2.imdecode(np_array, cv2.IMREAD_COLOR) # numpy array in shape [height, width, channels]
def read_label_list(path):
"""
read label list from path
:param path: a path
:return: a list of label names like: ['label_a', 'label_b', ...]
"""
with open(path, 'r') as f:
label_list = f.read().split(os.linesep)
label_list = [x.strip() for x in label_list if x.strip()]
print(' label_list',label_list)
return label_list
class FoodPredictService(PTServingBaseService):
def __init__(self, model_name, model_path):
global LABEL_LIST
super(FoodPredictService, self).__init__(model_name, model_path)
self.model = resnet50(model_path)
dir_path = os.path.dirname(os.path.realpath(self.model_path))
LABEL_LIST = read_label_list(os.path.join(dir_path, LABELS_FILE_NAME))
def _preprocess(self, data):
"""
`data` is provided by Upredict service according to the input data. Which is like:
{
'images': {
'image_a.jpg': b'xxx'
}
}
For now, predict a single image at a time.
"""
preprocessed_data = {}
input_batch = []
for file_name, file_content in data[IMAGES_KEY].items():
print('\tAppending image: %s' % file_name)
image1 = decode_image(file_content)
if torch.cuda.is_available():
input_batch.append(infer_transformation(image1).cuda())
else:
input_batch.append(infer_transformation(image1))
input_batch_var = torch.autograd.Variable(torch.stack(input_batch, dim=0), volatile=True)
preprocessed_data[MODEL_INPUT_KEY] = input_batch_var
# print('preprocessed_data',input_batch_var.shape())
return preprocessed_data
def _postprocess(self, data):
"""
`data` is the result of your model. Which is like:
{
'logits': [[0.1, -0.12, 0.72, ...]]
}
value of logits is a single list of list because one image is predicted at a time for now.
"""
# logits_list = [0.1, -0.12, 0.72, ...]
logits_list = data['images'][0].detach().numpy().tolist()
maxlist=max(logits_list)
z_exp = [exp(i-maxlist) for i in logits_list]
sum_z_exp = sum(z_exp)
softmax = [round(i / sum_z_exp, 3) for i in z_exp]
# labels_to_logits = {
# 'label_a': 0.1, 'label_b': -0.12, 'label_c': 0.72, ...
# }
labels_to_logits = {
LABEL_LIST[i]: s for i, s in enumerate(softmax)
# LABEL_LIST[i]: s for i, s in enumerate(logits_list)
}
predict_result = {
MODEL_OUTPUT_KEY: labels_to_logits
}
return predict_result
def resnet50(model_path, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = models.resnet50(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 25)
model.load_state_dict(torch.load(model_path,map_location ='cpu'))
# model.load_state_dict(torch.load(model_path))
model.eval()
return model
notebook上的训练程序
# ========数据处理:划分训练集、验证集等============
dataTrans = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
# image data path
data_dir = './data/aifood/images'
all_image_datasets = datasets.ImageFolder(data_dir, dataTrans)
trainsize = int(0.8*len(all_image_datasets))
testsize = len(all_image_datasets) - trainsize
train_dataset, test_dataset = torch.utils.data.random_split(all_image_datasets,[trainsize,testsize])
image_datasets = {'train':train_dataset,'val':test_dataset}
# wrap your data and label into Tensor
dataloders = {
'train':torch.utils.data.DataLoader(train_dataset,
batch_size=64,
num_workers=4,
sampler=sampler.ImbalancedDatasetSampler(train_dataset.dataset)
),
'val':torch.utils.data.DataLoader(test_dataset,
batch_size=64,
shuffle=True,
num_workers=4)
}
dataloders = {x: torch.utils.data.DataLoader(image_datasets[x],
batch_size=64,
shuffle=True,
num_workers=4) for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
# use gpu or not
use_gpu = torch.cuda.is_available()
#===============训练数据=====================
def train_model(model, lossfunc, optimizer, scheduler, num_epochs=10):
start_time = time.time()
best_model_wts = model.state_dict()
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train(True) # Set model to training mode
else:
model.train(False) # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0.0
# Iterate over data.
for data in dataloders[phase]:
# get the inputs
inputs, labels = data
# wrap them in Variable
if use_gpu:
inputs = Variable(inputs.cuda())
labels = Variable(labels.cuda())
else:
inputs, labels = Variable(inputs), Variable(labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs)
_, preds = torch.max(outputs.data, 1)
loss = lossfunc(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.data
running_corrects += torch.sum(preds == labels.data).to(torch.float32)
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = model.state_dict()
elapsed_time = time.time() - start_time
print('Training complete in {:.0f}m {:.0f}s'.format(
elapsed_time // 60, elapsed_time % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
# get model and replace the original fc layer with your fc layer
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 75)
if use_gpu:
model_ft = model_ft.cuda()
# define loss function
lossfunc = nn.CrossEntropyLoss()
# setting optimizer and trainable parameters
# params = model_ft.parameters()
# list(model_ft.fc.parameters())+list(model_ft.layer4.parameters())
params = list(model_ft.fc.parameters())+list( model_ft.parameters())
optimizer_ft = optim.SGD(params, lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = train_model(model=model_ft,
lossfunc=lossfunc,
optimizer=optimizer_ft,
scheduler=exp_lr_scheduler,
num_epochs=20)
#==============生成上述模型后,对Resnet最后全连接层更改为小样本数据分类,再次训练=========
resnet= models.resnet50(pretrained=True)
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 75)
resnet.load_state_dict(torch.load('./model75.pth'))
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 25)
- 点赞
- 收藏
- 关注作者
评论(0)