金字塔卷积:Pyramid Convolution(PyConv)复现
金字塔卷积:Pyramid Convolution(PyConv)复现
1、论文解读
如上图所示,标准的卷积操作中,每个卷积核的通道和input feature的通道一致。
PyConv中,随着卷积核size增大,通道大小减少,此时需要使用Grouped Convolution。Group分组的对象是input feature的通道,每n个为一组。
类似Inception分支和ASPP模块,提出使用不同卷积核的多分支网络。与类似模块大部分使用不同空洞卷积核的是PyConv提出使用分组卷积的思想。PyConv中多分支使用不同大小的卷积核,论文中包括33,55,77,99的卷积核。一般的,较小的卷积核感受野较小,可以得到小目标和局部细节信息。较大的卷积核感受野较大,可以得到大目标和全局语义信息。分组卷积是将输入特征图切分成不同组,使用卷积核独立处理。论文中提出两个版本,PyConv和PyHGConv。PyConv中使用相对较小的分组数,包括16,8,4,2。PyHGConv使用较大的分组数,包括32和64。在backbone结合时考虑到特征图的空间尺寸减小,分支数逐渐减少。最初阶段特征图通过四个分支,最后阶段特征图仅使用一个分支。语义分割任务中在一般网络中添加局部PyConv模块和全局PyConv模块。这两个模块都包括使用1*1卷积将通道数增加到512,后使用四分支的PyConv模块,卷积核包括9,7,5,3,分组数分别为16,8,4,2。不同的是全局PyConv模块需要使用Adaptive平均池化层减少特征图大小同时提取全局特征。PyConv分支后使用上采样恢复原始尺寸。之后将局部PyConv提取的特征和全局PyConv提取的特征合并。
- 参考资料
神经网络架构】Pyramidal Convlution(PyConv):金字塔卷积,“即插即用“,提升你的网络性能
2、代码复现
import paddle.nn as nn
import paddle
class PyConv2d(nn.Layer):
"""PyConv2d with padding (general case). Applies a 2D PyConv over an input signal composed of several input planes.
Args:
in_channels (int): Number of channels in the input image
out_channels (list): Number of channels for each pyramid level produced by the convolution
pyconv_kernels (list): Spatial size of the kernel for each pyramid level
pyconv_groups (list): Number of blocked connections from input channels to output channels for each pyramid level
stride (int or tuple, optional): Stride of the convolution. Default: 1
dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``False``
Example::
>>> # PyConv with two pyramid levels, kernels: 3x3, 5x5
>>> m = PyConv2d(in_channels=64, out_channels=[32, 32], pyconv_kernels=[3, 5], pyconv_groups=[1, 4])
>>> input = paddle.randn(4, 64, 56, 56)
>>> output = m(input)
>>> # PyConv with three pyramid levels, kernels: 3x3, 5x5, 7x7
>>> m = PyConv2d(in_channels=64, out_channels=[16, 16, 32], pyconv_kernels=[3, 5, 7], pyconv_groups=[1, 4, 8])
>>> input = paddle.randn(4, 64, 56, 56)
>>> output = m(input)
"""
def __init__(self, in_channels, out_channels, pyconv_kernels, pyconv_groups, stride=1, dilation=1):
super(PyConv2d, self).__init__()
assert len(out_channels) == len(pyconv_kernels) == len(pyconv_groups)
self.pyconv_levels = [None] * len(pyconv_kernels)
for i in range(len(pyconv_kernels)):
self.pyconv_levels[i] = nn.Conv2D(in_channels, out_channels[i], kernel_size=pyconv_kernels[i],
stride=stride, padding=pyconv_kernels[i] // 2, groups=pyconv_groups[i],
dilation=dilation, )
self.pyconv_levels = nn.LayerList(self.pyconv_levels)
def forward(self, x):
out = []
for level in self.pyconv_levels:
out.append(level(x))
return paddle.concat(out, 1)
2.1 简单测试
m = PyConv2d(in_channels=64, out_channels=[32, 32], pyconv_kernels=[3, 5], pyconv_groups=[1, 4])
input = paddle.randn([4, 64, 56, 56])
output = m(input)
m = PyConv2d(in_channels=64, out_channels=[16, 16, 32], pyconv_kernels=[3, 5, 7], pyconv_groups=[1, 4, 8])
input = paddle.randn([4, 64, 56, 56])
output = m(input)
[32, 32] [3, 5] [1, 4]
[16, 16, 32] [3, 5, 7] [1, 4, 8]
2.2 Alex_PyConv
为了能够给大家一个直观的复现体验,在本项目中使用AlexNet进行对比实验,在Alex_PyConv中修改了第三、四卷积层,使用PyConv层进行替代,相比于原始的AlexNet,参数有所减少(约30W)。
import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Linear, Dropout, ReLU
from paddle.nn import Conv2D, MaxPool2D
from paddle.nn.initializer import Uniform
from paddle.fluid.param_attr import ParamAttr
from paddle.utils.download import get_weights_path_from_url
model_urls = {
"alexnet": (
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams",
"7f0f9f737132e02732d75a1459d98a43", )
}
__all__ = []
class ConvPoolLayer(nn.Layer):
def __init__(self,
input_channels,
output_channels,
filter_size,
stride,
padding,
stdv,
groups=1,
act=None):
super(ConvPoolLayer, self).__init__()
self.relu = ReLU() if act == "relu" else None
self._conv = Conv2D(
in_channels=input_channels,
out_channels=output_channels,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
def forward(self, inputs):
x = self._conv(inputs)
if self.relu is not None:
x = self.relu(x)
x = self._pool(x)
return x
class AlexNet_PYN(nn.Layer):
"""AlexNet model from
`"ImageNet Classification with Deep Convolutional Neural Networks"
<https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf>`_
Args:
num_classes (int): Output dim of last fc layer. Default: 1000.
Examples:
.. code-block:: python
from paddle.vision.models import AlexNet
alexnet = AlexNet()
"""
def __init__(self, num_classes=1000):
super(AlexNet_PYN, self).__init__()
self.num_classes = num_classes
stdv = 1.0 / math.sqrt(3 * 11 * 11)
self._conv1 = ConvPoolLayer(3, 64, 11, 4, 2, stdv, act="relu")
stdv = 1.0 / math.sqrt(64 * 5 * 5)
self._conv2 = ConvPoolLayer(64, 192, 5, 1, 2, stdv, act="relu")
stdv = 1.0 / math.sqrt(192 * 3 * 3)
# PyConv
self._conv3 = PyConv2d(in_channels=192, out_channels=[192, 192], pyconv_kernels=[3, 5], pyconv_groups=[1, 4])
self._conv4 = PyConv2d(in_channels=384, out_channels=[64, 64, 128], pyconv_kernels=[3, 5, 7], pyconv_groups=[1, 4, 8])
stdv = 1.0 / math.sqrt(256 * 3 * 3)
self._conv5 = ConvPoolLayer(256, 256, 3, 1, 1, stdv, act="relu")
if self.num_classes > 0:
stdv = 1.0 / math.sqrt(256 * 6 * 6)
self._drop1 = Dropout(p=0.5, mode="downscale_in_infer")
self._fc6 = Linear(
in_features=256 * 6 * 6,
out_features=4096,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
self._drop2 = Dropout(p=0.5, mode="downscale_in_infer")
self._fc7 = Linear(
in_features=4096,
out_features=4096,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
self._fc8 = Linear(
in_features=4096,
out_features=num_classes,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
def forward(self, inputs):
x = self._conv1(inputs)
x = self._conv2(x)
x = self._conv3(x)
x = F.relu(x)
x = self._conv4(x)
x = F.relu(x)
x = self._conv5(x)
if self.num_classes > 0:
x = paddle.flatten(x, start_axis=1, stop_axis=-1)
x = self._drop1(x)
x = self._fc6(x)
x = F.relu(x)
x = self._drop2(x)
x = self._fc7(x)
x = F.relu(x)
x = self._fc8(x)
return x
alex_pyn = AlexNet_PYN(num_classes=10)
paddle.summary(alex_pyn,(1,3,224,224))
[192, 192] [3, 5] [1, 4]
[64, 64, 128] [3, 5, 7] [1, 4, 8]
[1, 384, 13, 13]
----------------------------------------------------------------------------
Layer (type) Input Shape Output Shape Param #
============================================================================
Conv2D-54 [[1, 3, 224, 224]] [1, 64, 55, 55] 23,296
ReLU-20 [[1, 64, 55, 55]] [1, 64, 55, 55] 0
MaxPool2D-20 [[1, 64, 55, 55]] [1, 64, 27, 27] 0
ConvPoolLayer-20 [[1, 3, 224, 224]] [1, 64, 27, 27] 0
Conv2D-55 [[1, 64, 27, 27]] [1, 192, 27, 27] 307,392
ReLU-21 [[1, 192, 27, 27]] [1, 192, 27, 27] 0
MaxPool2D-21 [[1, 192, 27, 27]] [1, 192, 13, 13] 0
ConvPoolLayer-21 [[1, 64, 27, 27]] [1, 192, 13, 13] 0
Conv2D-56 [[1, 192, 13, 13]] [1, 192, 13, 13] 331,968
Conv2D-57 [[1, 192, 13, 13]] [1, 192, 13, 13] 230,592
PyConv2d-23 [[1, 192, 13, 13]] [1, 384, 13, 13] 0
Conv2D-58 [[1, 384, 13, 13]] [1, 64, 13, 13] 221,248
Conv2D-59 [[1, 384, 13, 13]] [1, 64, 13, 13] 153,664
Conv2D-60 [[1, 384, 13, 13]] [1, 128, 13, 13] 301,184
PyConv2d-24 [[1, 384, 13, 13]] [1, 256, 13, 13] 0
Conv2D-61 [[1, 256, 13, 13]] [1, 256, 13, 13] 590,080
ReLU-22 [[1, 256, 13, 13]] [1, 256, 13, 13] 0
MaxPool2D-22 [[1, 256, 13, 13]] [1, 256, 6, 6] 0
ConvPoolLayer-22 [[1, 256, 13, 13]] [1, 256, 6, 6] 0
Dropout-3 [[1, 9216]] [1, 9216] 0
Linear-4 [[1, 9216]] [1, 4096] 37,752,832
Dropout-4 [[1, 4096]] [1, 4096] 0
Linear-5 [[1, 4096]] [1, 4096] 16,781,312
Linear-6 [[1, 4096]] [1, 10] 40,970
============================================================================
Total params: 56,734,538
Trainable params: 56,734,538
Non-trainable params: 0
----------------------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 8.91
Params size (MB): 216.43
Estimated Total Size (MB): 225.91
----------------------------------------------------------------------------
{'total_params': 56734538, 'trainable_params': 56734538}
3、对比实验
本项目中设置一组对比实验,实验数据为Cifar10,对比模型为AlexNet。分别迭代12轮,最后给出对比实验结果。
import paddle
from paddle.metric import Accuracy
from paddle.vision.transforms import Compose, Normalize, Resize, Transpose, ToTensor
callback = paddle.callbacks.VisualDL(log_dir='visualdl_log_dir_alex_pyn')
normalize = Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
data_format='HWC')
transform = Compose([ToTensor(), Normalize(), Resize(size=(224,224))])
cifar10_train = paddle.vision.datasets.Cifar10(mode='train',
transform=transform)
cifar10_test = paddle.vision.datasets.Cifar10(mode='test',
transform=transform)
# 构建训练集数据加载器
train_loader = paddle.io.DataLoader(cifar10_train, batch_size=768, shuffle=True, drop_last=True)
# 构建测试集数据加载器
test_loader = paddle.io.DataLoader(cifar10_test, batch_size=768, shuffle=True, drop_last=True)
alex_pyn = paddle.Model(AlexNet_PYN(num_classes=10))
optim = paddle.optimizer.Adam(learning_rate=0.001, parameters=alex_pyn.parameters())
alex_pyn.prepare(
optim,
paddle.nn.CrossEntropyLoss(),
Accuracy()
)
alex_pyn.fit(train_data=train_loader,
eval_data=test_loader,
epochs=12,
callbacks=callback,
verbose=1
)
import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Linear, Dropout, ReLU
from paddle.nn import Conv2D, MaxPool2D
from paddle.nn.initializer import Uniform
from paddle.fluid.param_attr import ParamAttr
from paddle.utils.download import get_weights_path_from_url
model_urls = {
"alexnet": (
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams",
"7f0f9f737132e02732d75a1459d98a43", )
}
__all__ = []
class ConvPoolLayer(nn.Layer):
def __init__(self,
input_channels,
output_channels,
filter_size,
stride,
padding,
stdv,
groups=1,
act=None):
super(ConvPoolLayer, self).__init__()
self.relu = ReLU() if act == "relu" else None
self._conv = Conv2D(
in_channels=input_channels,
out_channels=output_channels,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
def forward(self, inputs):
x = self._conv(inputs)
if self.relu is not None:
x = self.relu(x)
x = self._pool(x)
return x
class AlexNet(nn.Layer):
"""AlexNet model from
`"ImageNet Classification with Deep Convolutional Neural Networks"
<https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf>`_
Args:
num_classes (int): Output dim of last fc layer. Default: 1000.
Examples:
.. code-block:: python
from paddle.vision.models import AlexNet
alexnet = AlexNet()
"""
def __init__(self, num_classes=1000):
super(AlexNet, self).__init__()
self.num_classes = num_classes
stdv = 1.0 / math.sqrt(3 * 11 * 11)
self._conv1 = ConvPoolLayer(3, 64, 11, 4, 2, stdv, act="relu")
stdv = 1.0 / math.sqrt(64 * 5 * 5)
self._conv2 = ConvPoolLayer(64, 192, 5, 1, 2, stdv, act="relu")
stdv = 1.0 / math.sqrt(192 * 3 * 3)
self._conv3 = Conv2D(
192,
384,
3,
stride=1,
padding=1,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
stdv = 1.0 / math.sqrt(384 * 3 * 3)
self._conv4 = Conv2D(
384,
256,
3,
stride=1,
padding=1,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
stdv = 1.0 / math.sqrt(256 * 3 * 3)
self._conv5 = ConvPoolLayer(256, 256, 3, 1, 1, stdv, act="relu")
if self.num_classes > 0:
stdv = 1.0 / math.sqrt(256 * 6 * 6)
self._drop1 = Dropout(p=0.5, mode="downscale_in_infer")
self._fc6 = Linear(
in_features=256 * 6 * 6,
out_features=4096,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
self._drop2 = Dropout(p=0.5, mode="downscale_in_infer")
self._fc7 = Linear(
in_features=4096,
out_features=4096,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
self._fc8 = Linear(
in_features=4096,
out_features=num_classes,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
def forward(self, inputs):
x = self._conv1(inputs)
x = self._conv2(x)
x = self._conv3(x)
x = F.relu(x)
x = self._conv4(x)
x = F.relu(x)
x = self._conv5(x)
if self.num_classes > 0:
x = paddle.flatten(x, start_axis=1, stop_axis=-1)
x = self._drop1(x)
x = self._fc6(x)
x = F.relu(x)
x = self._drop2(x)
x = self._fc7(x)
x = F.relu(x)
x = self._fc8(x)
return x
alex = AlexNet(num_classes=10)
paddle.summary(alex,(1,3,224,224))
----------------------------------------------------------------------------
Layer (type) Input Shape Output Shape Param #
============================================================================
Conv2D-62 [[1, 3, 224, 224]] [1, 64, 55, 55] 23,296
ReLU-23 [[1, 64, 55, 55]] [1, 64, 55, 55] 0
MaxPool2D-23 [[1, 64, 55, 55]] [1, 64, 27, 27] 0
ConvPoolLayer-23 [[1, 3, 224, 224]] [1, 64, 27, 27] 0
Conv2D-63 [[1, 64, 27, 27]] [1, 192, 27, 27] 307,392
ReLU-24 [[1, 192, 27, 27]] [1, 192, 27, 27] 0
MaxPool2D-24 [[1, 192, 27, 27]] [1, 192, 13, 13] 0
ConvPoolLayer-24 [[1, 64, 27, 27]] [1, 192, 13, 13] 0
Conv2D-64 [[1, 192, 13, 13]] [1, 384, 13, 13] 663,936
Conv2D-65 [[1, 384, 13, 13]] [1, 256, 13, 13] 884,992
Conv2D-66 [[1, 256, 13, 13]] [1, 256, 13, 13] 590,080
ReLU-25 [[1, 256, 13, 13]] [1, 256, 13, 13] 0
MaxPool2D-25 [[1, 256, 13, 13]] [1, 256, 6, 6] 0
ConvPoolLayer-25 [[1, 256, 13, 13]] [1, 256, 6, 6] 0
Dropout-5 [[1, 9216]] [1, 9216] 0
Linear-7 [[1, 9216]] [1, 4096] 37,752,832
Dropout-6 [[1, 4096]] [1, 4096] 0
Linear-8 [[1, 4096]] [1, 4096] 16,781,312
Linear-9 [[1, 4096]] [1, 10] 40,970
============================================================================
Total params: 57,044,810
Trainable params: 57,044,810
Non-trainable params: 0
----------------------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 8.09
Params size (MB): 217.61
Estimated Total Size (MB): 226.27
----------------------------------------------------------------------------
{'total_params': 57044810, 'trainable_params': 57044810}
import paddle
from paddle.metric import Accuracy
from paddle.vision.transforms import Compose, Normalize, Resize, Transpose, ToTensor
callback = paddle.callbacks.VisualDL(log_dir='visualdl_log_dir_alex')
normalize = Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5],
data_format='HWC')
transform = Compose([ToTensor(), Normalize(), Resize(size=(224,224))])
cifar10_train = paddle.vision.datasets.Cifar10(mode='train',
transform=transform)
cifar10_test = paddle.vision.datasets.Cifar10(mode='test',
transform=transform)
# 构建训练集数据加载器
train_loader = paddle.io.DataLoader(cifar10_train, batch_size=768, shuffle=True, drop_last=True)
# 构建测试集数据加载器
test_loader = paddle.io.DataLoader(cifar10_test, batch_size=768, shuffle=True, drop_last=True)
alex = paddle.Model(AlexNet(num_classes=10))
optim = paddle.optimizer.Adam(learning_rate=0.001, parameters=alex.parameters())
alex.prepare(
optim,
paddle.nn.CrossEntropyLoss(),
Accuracy()
)
alex.fit(train_data=train_loader,
eval_data=test_loader,
epochs=12,
callbacks=callback,
verbose=1
)
4、实验结果
4.1 实验结果表
Model | Train Acc | Eval Acc |
---|---|---|
Alex_PYN | 0.7801 | 0.7254 |
AlexNet | 0.7241 | 0.6967 |
4.2 训练可视化
5、总结
本项目中对金字塔卷积进行了复现,相比于标准的卷积层,金字塔卷积的参数量更少,在12轮迭代结束后,模型收敛更快,模型的指标更好,包括准确率以及Loss值。本项目为大家提供了一个飞桨版本的金字塔卷积,该模块属于即插即用型卷积层,大家可以根据自己的需要,灵活应用在自己的网络架构中。
- 点赞
- 收藏
- 关注作者
评论(0)