基于Densenet&Xception融合的102种鲜花识别
基于Densenet&Xception融合的102种鲜花识别
这个融合模型是我在打比赛的时候随手搞出来的。
这个数据集实际上没有难度,类间分布较为均匀,属于典型的细粒度分类问题。
解题思路就是找一个足够强悍的模型,再加上预训练模型,耐心调参,就会有一个不错的名次。
对于大多数分类问题,我比较习惯使用的是ResNet,DenseNet。
对于这种比较简单的图像分类任务,首先呢,大家可以尝试一下各种预训练模型,
简单的跑一下,对比一下和第一名的差距,如果还是没有太好的思路,那么就可以尝试本项目的思路。
大家如有任何问题,欢迎在评论区交流,码字不易,大家关注我一下吧。
数据实例
!cd 'data/data31022' && unzip -q fusion.zip
!cd 'data/data30606' && unzip -q 54_data.zip
一、数据读取
### 数据读取
from PIL import Image
import numpy as np
import paddle.fluid as fluid
import os
import shutil
def test(image2, label2):
def reader():
for img, label in zip(image2, label2):
im = Image.open(os.path.join('data/data30606/54_data/train', img))
im = im.resize((640, 640), Image.ANTIALIAS)
img = np.array(im).astype('float32')
img -= [127.5, 127.5, 127.5] #此处为了方便,仅使用了三通道的均值,应该计算在数据集上三通道的均值
img = img.transpose((2, 0, 1)) # HWC to CHW
img *= 0.007843 # 像素值归一化
label = label
yield img, int(label)
return reader
def normalized(sample):
im, label = sample
return im, label
import pandas as pd
df = pd.read_csv('data/data30606/54_data/train.csv')
image_path_list = df['filename'].values
label_list = df['label'].values
print(type(image_path_list))
image2 = list(image_path_list)
label2 = list(label_list)
reader = fluid.io.xmap_readers(normalized, test(image2, label2), process_num=6, buffer_size=512) #多进程读数据,process_num可以设置为CPU线程数的80%
train_reader = fluid.io.batch(reader, batch_size=64) #BS应设置为2的次幂的数
# 可以自行尝试shuffle
# shuffle_reader = fluid.layers.shuffle(reader=train_reader, buffer_size=512)
二、模型介绍
为什么要选择这两个模型呢?
Inception结构的网络和ResNet结构的网络的分别代表着深度神经网络的两个方向,
一是网络的宽度,二是网络的深度,神经网络变宽和变深已经被证明是可行的。
所以,本项目选择了两个非常具有代表性的两个网络进行融合。
- Xception
- DenseNet
这两个网络分别关注于网络的深度以及宽度,将这两个模型融合起来,共同提取图像的特征。
Xception
Xception(极致的 Inception)
先进行普通卷积操作,再对 1×1 1×11×1 卷积后的每个channel分别进行 3×3 3×33×3 卷积操作,最后将结果 concat:
传统卷积的实现过程:
Depthwise Separable Convolution的实现过程:
# Xception
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import math
import sys
from paddle.fluid.param_attr import ParamAttr
__all__ = ['Xception', 'Xception41', 'Xception65', 'Xception71']
class Xception(object):
"""Xception"""
def __init__(self, entry_flow_block_num=3, middle_flow_block_num=8):
self.entry_flow_block_num = entry_flow_block_num
self.middle_flow_block_num = middle_flow_block_num
return
def net(self, input, class_dim=1000):
conv = self.entry_flow(input, self.entry_flow_block_num)
conv = self.middle_flow(conv, self.middle_flow_block_num)
conv = self.exit_flow(conv, class_dim)
return conv
def entry_flow(self, input, block_num=3):
'''xception entry_flow'''
name = "entry_flow"
conv = self.conv_bn_layer(
input=input,
num_filters=32,
filter_size=3,
stride=2,
act='relu',
name=name + "_conv1")
conv = self.conv_bn_layer(
input=conv,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name=name + "_conv2")
if block_num == 3:
relu_first = [False, True, True]
num_filters = [128, 256, 728]
stride = [2, 2, 2]
elif block_num == 5:
relu_first = [False, True, True, True, True]
num_filters = [128, 256, 256, 728, 728]
stride = [2, 1, 2, 1, 2]
else:
sys.exit(-1)
for block in range(block_num):
curr_name = "{}_{}".format(name, block)
conv = self.entry_flow_bottleneck_block(
conv,
num_filters=num_filters[block],
name=curr_name,
stride=stride[block],
relu_first=relu_first[block])
return conv
def entry_flow_bottleneck_block(self,
input,
num_filters,
name,
stride=2,
relu_first=False):
'''entry_flow_bottleneck_block'''
short = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=1,
stride=stride,
padding=0,
act=None,
param_attr=ParamAttr(name + "_branch1_weights"),
bias_attr=False)
conv0 = input
if relu_first:
conv0 = fluid.layers.relu(conv0)
conv1 = self.separable_conv(
conv0, num_filters, stride=1, name=name + "_branch2a_weights")
conv2 = fluid.layers.relu(conv1)
conv2 = self.separable_conv(
conv2, num_filters, stride=1, name=name + "_branch2b_weights")
pool = fluid.layers.pool2d(
input=conv2,
pool_size=3,
pool_stride=stride,
pool_padding=1,
pool_type='max')
return fluid.layers.elementwise_add(x=short, y=pool)
def middle_flow(self, input, block_num=8):
'''xception middle_flow'''
num_filters = 728
conv = input
for block in range(block_num):
name = "middle_flow_{}".format(block)
conv = self.middle_flow_bottleneck_block(conv, num_filters, name)
return conv
def middle_flow_bottleneck_block(self, input, num_filters, name):
'''middle_flow_bottleneck_block'''
conv0 = fluid.layers.relu(input)
conv0 = self.separable_conv(
conv0,
num_filters=num_filters,
stride=1,
name=name + "_branch2a_weights")
conv1 = fluid.layers.relu(conv0)
conv1 = self.separable_conv(
conv1,
num_filters=num_filters,
stride=1,
name=name + "_branch2b_weights")
conv2 = fluid.layers.relu(conv1)
conv2 = self.separable_conv(
conv2,
num_filters=num_filters,
stride=1,
name=name + "_branch2c_weights")
return fluid.layers.elementwise_add(x=input, y=conv2)
def exit_flow(self, input, class_dim):
'''xception exit flow'''
name = "exit_flow"
num_filters1 = 728
num_filters2 = 1024
conv0 = self.exit_flow_bottleneck_block(
input, num_filters1, num_filters2, name=name + "_1")
conv1 = self.separable_conv(
conv0, num_filters=1536, stride=1, name=name + "_2")
conv1 = fluid.layers.relu(conv1)
conv2 = self.separable_conv(
conv1, num_filters=2048, stride=1, name=name + "_3")
conv2 = fluid.layers.relu(conv2)
pool = fluid.layers.pool2d(
input=conv2, pool_type='avg', global_pooling=True)
# stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
# out = fluid.layers.fc(
# input=pool,
# size=class_dim,
# param_attr=fluid.param_attr.ParamAttr(
# name='fc_weights',
# initializer=fluid.initializer.Uniform(-stdv, stdv)),
# bias_attr=fluid.param_attr.ParamAttr(name='fc_offset'))
return pool
def exit_flow_bottleneck_block(self, input, num_filters1, num_filters2,
name):
'''entry_flow_bottleneck_block'''
short = fluid.layers.conv2d(
input=input,
num_filters=num_filters2,
filter_size=1,
stride=2,
padding=0,
act=None,
param_attr=ParamAttr(name + "_branch1_weights"),
bias_attr=False)
conv0 = fluid.layers.relu(input)
conv1 = self.separable_conv(
conv0, num_filters1, stride=1, name=name + "_branch2a_weights")
conv2 = fluid.layers.relu(conv1)
conv2 = self.separable_conv(
conv2, num_filters2, stride=1, name=name + "_branch2b_weights")
pool = fluid.layers.pool2d(
input=conv2,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
return fluid.layers.elementwise_add(x=short, y=pool)
def separable_conv(self, input, num_filters, stride=1, name=None):
"""separable_conv"""
pointwise_conv = self.conv_bn_layer(
input=input,
filter_size=1,
num_filters=num_filters,
stride=1,
name=name + "_sep")
depthwise_conv = self.conv_bn_layer(
input=pointwise_conv,
filter_size=3,
num_filters=num_filters,
stride=stride,
groups=num_filters,
use_cudnn=False,
name=name + "_dw")
return depthwise_conv
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
use_cudnn=True,
name=None):
"""conv_bn_layer"""
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False,
use_cudnn=use_cudnn)
bn_name = "bn_" + name
return fluid.layers.batch_norm(
input=conv,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def Xception41():
model = Xception(entry_flow_block_num=3, middle_flow_block_num=8)
return model
def Xception65():
model = Xception(entry_flow_block_num=3, middle_flow_block_num=16)
return model
def Xception71():
model = Xception(entry_flow_block_num=5, middle_flow_block_num=16)
return model
DenseNet
相比ResNet,DenseNet提出了一个更激进的密集连接机制:即互相连接所有的层,具体来说就是每个层都会接受其前面所有层作为其额外的输入。
结构对比(CNN, ResNet, DenseNet):
DenseNet结构图
DenseNet核心思想在于建立了不同层之间的连接关系,充分利用了feature,进一步减轻了梯度消失问题,加深网络不是问题,而且训练效果非常好。
# DesNet
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import math
from paddle.fluid.param_attr import ParamAttr
__all__ = [
"DenseNet", "DenseNet121", "DenseNet161", "DenseNet169", "DenseNet201",
"DenseNet264"
]
class DenseNet():
def __init__(self, layers=121):
self.layers = layers
def net(self, input, bn_size=4, dropout=0, class_dim=1000):
layers = self.layers
supported_layers = [121, 161, 169, 201, 264]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
densenet_spec = {
121: (64, 32, [6, 12, 24, 16]),
161: (96, 48, [6, 12, 36, 24]),
169: (64, 32, [6, 12, 32, 32]),
201: (64, 32, [6, 12, 48, 32]),
264: (64, 32, [6, 12, 64, 48])
}
num_init_features, growth_rate, block_config = densenet_spec[layers]
conv = fluid.layers.conv2d(
input=input,
num_filters=num_init_features,
filter_size=7,
stride=2,
padding=3,
act=None,
param_attr=ParamAttr(name="conv1_weights"),
bias_attr=False)
conv = fluid.layers.batch_norm(
input=conv,
act='relu',
param_attr=ParamAttr(name='conv1_bn_scale'),
bias_attr=ParamAttr(name='conv1_bn_offset'),
moving_mean_name='conv1_bn_mean',
moving_variance_name='conv1_bn_variance')
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
num_features = num_init_features
for i, num_layers in enumerate(block_config):
conv = self.make_dense_block(
conv,
num_layers,
bn_size,
growth_rate,
dropout,
name='conv' + str(i + 2))
num_features = num_features + num_layers * growth_rate
if i != len(block_config) - 1:
conv = self.make_transition(
conv, num_features // 2, name='conv' + str(i + 2) + '_blk')
num_features = num_features // 2
conv = fluid.layers.batch_norm(
input=conv,
act='relu',
param_attr=ParamAttr(name='conv5_blk_bn_scale'),
bias_attr=ParamAttr(name='conv5_blk_bn_offset'),
moving_mean_name='conv5_blk_bn_mean',
moving_variance_name='conv5_blk_bn_variance')
pool = fluid.layers.pool2d(
input=conv, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(conv.shape[1] * 1.0)
# out = fluid.layers.fc(
# input=conv,
# size=class_dim,
# param_attr=fluid.param_attr.ParamAttr(
# initializer=fluid.initializer.Uniform(-stdv, stdv),
# name="fc_weights"),
# bias_attr=ParamAttr(name='fc_offset'))
return pool
def make_transition(self, input, num_output_features, name=None):
bn_ac = fluid.layers.batch_norm(
input,
act='relu',
param_attr=ParamAttr(name=name + '_bn_scale'),
bias_attr=ParamAttr(name + '_bn_offset'),
moving_mean_name=name + '_bn_mean',
moving_variance_name=name + '_bn_variance')
bn_ac_conv = fluid.layers.conv2d(
input=bn_ac,
num_filters=num_output_features,
filter_size=1,
stride=1,
act=None,
bias_attr=False,
param_attr=ParamAttr(name=name + "_weights"))
pool = fluid.layers.pool2d(
input=bn_ac_conv, pool_size=2, pool_stride=2, pool_type='avg')
return pool
def make_dense_block(self,
input,
num_layers,
bn_size,
growth_rate,
dropout,
name=None):
conv = input
for layer in range(num_layers):
conv = self.make_dense_layer(
conv,
growth_rate,
bn_size,
dropout,
name=name + '_' + str(layer + 1))
return conv
def make_dense_layer(self, input, growth_rate, bn_size, dropout, name=None):
bn_ac = fluid.layers.batch_norm(
input,
act='relu',
param_attr=ParamAttr(name=name + '_x1_bn_scale'),
bias_attr=ParamAttr(name + '_x1_bn_offset'),
moving_mean_name=name + '_x1_bn_mean',
moving_variance_name=name + '_x1_bn_variance')
bn_ac_conv = fluid.layers.conv2d(
input=bn_ac,
num_filters=bn_size * growth_rate,
filter_size=1,
stride=1,
act=None,
bias_attr=False,
param_attr=ParamAttr(name=name + "_x1_weights"))
bn_ac = fluid.layers.batch_norm(
bn_ac_conv,
act='relu',
param_attr=ParamAttr(name=name + '_x2_bn_scale'),
bias_attr=ParamAttr(name + '_x2_bn_offset'),
moving_mean_name=name + '_x2_bn_mean',
moving_variance_name=name + '_x2_bn_variance')
bn_ac_conv = fluid.layers.conv2d(
input=bn_ac,
num_filters=growth_rate,
filter_size=3,
stride=1,
padding=1,
act=None,
bias_attr=False,
param_attr=ParamAttr(name=name + "_x2_weights"))
if dropout:
bn_ac_conv = fluid.layers.dropout(
x=bn_ac_conv, dropout_prob=dropout)
bn_ac_conv = fluid.layers.concat([input, bn_ac_conv], axis=1)
return bn_ac_conv
def DenseNet121():
model = DenseNet(layers=121)
return model
def DenseNet161():
model = DenseNet(layers=161)
return model
def DenseNet169():
model = DenseNet(layers=169)
return model
def DenseNet201():
model = DenseNet(layers=201)
return model
def DenseNet264():
model = DenseNet(layers=264)
return model
三、加载模型
dense = DenseNet121()
xception = Xception41()
# 定义输入层
image = fluid.layers.data(name='image', shape=[3, 640, 640], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# 新版本中,这么定义。
# image = fluid.data(name='image', shape=[-1, 3, 32, 32], dtype='float32')
# label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
model1 = dense.net(image, class_dim=102)
model2 = xception.net(image, class_dim=102)
# 冻结网络Block层
model1.stop_gradient = True
model2.stop_gradient = True
四、模型融合&训练
这里只做简单的融合,虽然这种融合方式一点也不优雅,但是确实能涨分。
模型融合方式如下图所示,分别由两个模型提取特征,然后做一个拼接,通过全连接层输出预测概率。
大家需要注意,本项目中只用了一个全连接层,图中的两个全连接层的结构大家可以自己尝试。
融合模型的结构
# 模型融合
model_fusion = fluid.layers.concat(input=[model1, model2], axis=1)
# 由这里创建一个基本的主程序
base_model_program = fluid.default_main_program().clone()
# 这里再重新加载网络的分类器,大小为本项目的分类大小
model = fluid.layers.fc(input=model_fusion, size=102, act='softmax')
# 获取损失函数和准确率函数
cost = fluid.layers.cross_entropy(input=model, label=label)
avg_cost = fluid.layers.mean(cost)
acc = fluid.layers.accuracy(input=model, label=label)
# 定义优化方法
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-4)
# 运行优化算法
opts = optimizer.minimize(avg_cost)
# 定义训练场所
place = fluid.CUDAPlace(0)#用GPU训练
# place = fluid.CPUPlace() #用CPU训练
exe = fluid.Executor(place)
# 进行参数初始化
exe.run(fluid.default_startup_program())
# 官方提供的原预训练模型
src_pretrain_model_path = 'data/data31022/fusion/'
# 通过这个函数判断模型文件是否存在
def if_exist(var):
path = os.path.join(src_pretrain_model_path, var.name)
exist = os.path.exists(path)
return exist
# 加载模型文件,只加载存在模型的模型文件
fluid.io.load_vars(executor=exe, dirname=src_pretrain_model_path, predicate=if_exist, main_program=base_model_program)
# 定义输入数据维度
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
# 训练10次
for pass_id in range(1):
# 进行训练
for batch_id, data in enumerate(train_reader()):
train_cost, train_acc = exe.run(program=fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost, acc])
# 每100个batch打印一次信息
if batch_id % 10 == 0:
print('Pass:%d, Batch:%d, Cost:%0.5f, Accuracy:%0.5f' %
(pass_id, batch_id, train_cost[0], train_acc[0]))
# 保存参数模型
save_pretrain_model_path = 'models/step-8_model/'
# 删除旧的模型文件
shutil.rmtree(save_pretrain_model_path, ignore_errors=True)
# 创建保持模型文件目录
os.makedirs(save_pretrain_model_path)
# 保存参数模型,增量训练
fluid.io.save_params(executor=exe, dirname=save_pretrain_model_path)
五、增量训练
这里给大家介绍一下Paddle的增量训练,具体可以查看飞桨的API。PaddleAPI
# 经过step-1处理后的的预训练模型
pretrained_model_path = 'models/step-8_model/'
# 加载经过处理的模型
fluid.io.load_params(executor=exe, dirname=pretrained_model_path)
# 定义输入数据维度
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
# 迭代一次,测试程序是否跑通。
for pass_id in range(1):
# 进行训练
for batch_id, data in enumerate(train_reader()):
train_cost, train_acc = exe.run(program=fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost, acc])
# 每100个batch打印一次信息
if batch_id % 10 == 0:
print('Pass:%d, Batch:%d, Cost:%0.5f, Accuracy:%0.5f' %
(pass_id, batch_id, train_cost[0], train_acc[0]))
# 保存参数模型
save_pretrain_model_path = 'models/step-6_model/'
# 删除旧的模型文件
shutil.rmtree(save_pretrain_model_path, ignore_errors=True)
# 创建保持模型文件目录
os.makedirs(save_pretrain_model_path)
# 保存推断模型
fluid.io.save_inference_model(dirname=save_pretrain_model_path, feeded_var_names=['image'],
target_vars=[model], executor=exe)
六、预测
# 加载推断模型
use_gpu = True
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
save_freeze_dir = 'models/step-6_model/'
[inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(dirname=save_freeze_dir, executor=exe)
# 读取测试数据
from PIL import Image
import numpy as np
def reader(img_path):
img = Image.open(img_path)
if img.mode != 'RGB':
img = img.convert('RGB')
img = img.resize((640, 640), Image.ANTIALIAS)
img = np.array(img).astype('float32')
img -= [127.5, 127.5, 127.5]
img = img.transpose((2, 0, 1)) # HWC to CHW
img *= 0.007843
img = img[np.newaxis,:]
return img
# 单例模式,预测数据
# 此处直接生成比赛提交用的CSV文件,大家可以去平台上提交,测试自己的得分哦。
img_list = os.listdir('data/data30606/54_data/')
img_list.sort()
img_list.sort(key=lambda x: int(x[:-4])) ##文件名按数字排序
img_nums = len(img_list)
# print(img_list)
test_path = 'data/data30606/54_data/test/'
# img_path = test_path + img_list[i]
labels = []
for i in range(img_nums):
img_path = test_path + img_list[i]
tensor_img = reader(img_path)
label = exe.run(inference_program, feed={feed_target_names[0]: tensor_img}, fetch_list=fetch_targets)
lab = np.argmax(label)
# print(lab)
labels.append(lab)
submit = pd.DataFrame()
submit[1] = labels
submit.to_csv('submit123.csv', header=False)
总结
总体来说,这种融合方法不是很优雅,相对于计算量的提升所带来的精度提升收益不是很大,比赛中有人这么干,但是有AIstudio,
显卡算力足够,大家可以尽情的堆,精度越高,比赛排名越高。
下面是我的得分,大家可以调整迭代次数、学习率等超参,或者增加全连接层,添加DropOut,来调整网络,大家加油哦。
- 点赞
- 收藏
- 关注作者
评论(0)