vs2017\vs2019 VGG16处理cifar-10数据集的PyTorch实现
这是针对于博客vs2017安装和使用教程(详细)和vs2019安装和使用教程(详细)的VGG16-CIFAR10项目新建示例
目录
一、说明
1.网络框架搭建教程请参看博主博客:PyTorch 入门实战(四)——利用Torch.nn构建卷积神经网络
2.这里主要展示博主的代码和运行结果,希望可以帮助到正在学习PyTorch的人们
二、代码
1.nn_module_sample.py:里面是VGG-16(带有BatchNorm层)的网络,注意classifier分类器部分(全连接部分)的输入大小根据batch大小而定
-
import torch.nn as nn
-
-
class VGG16(nn.Module):
-
def __init__(self, num_classes=10):
-
super(VGG16, self).__init__()
-
self.features = nn.Sequential(
-
#1
-
nn.Conv2d(3,64,kernel_size=3,padding=1),
-
nn.BatchNorm2d(64),
-
nn.ReLU(True),
-
#2
-
nn.Conv2d(64,64,kernel_size=3,padding=1),
-
nn.BatchNorm2d(64),
-
nn.ReLU(True),
-
nn.MaxPool2d(kernel_size=2,stride=2),
-
#3
-
nn.Conv2d(64,128,kernel_size=3,padding=1),
-
nn.BatchNorm2d(128),
-
nn.ReLU(True),
-
#4
-
nn.Conv2d(128,128,kernel_size=3,padding=1),
-
nn.BatchNorm2d(128),
-
nn.ReLU(True),
-
nn.MaxPool2d(kernel_size=2,stride=2),
-
#5
-
nn.Conv2d(128,256,kernel_size=3,padding=1),
-
nn.BatchNorm2d(256),
-
nn.ReLU(True),
-
#6
-
nn.Conv2d(256,256,kernel_size=3,padding=1),
-
nn.BatchNorm2d(256),
-
nn.ReLU(True),
-
#7
-
nn.Conv2d(256,256,kernel_size=3,padding=1),
-
nn.BatchNorm2d(256),
-
nn.ReLU(True),
-
nn.MaxPool2d(kernel_size=2,stride=2),
-
#8
-
nn.Conv2d(256,512,kernel_size=3,padding=1),
-
nn.BatchNorm2d(512),
-
nn.ReLU(True),
-
#9
-
nn.Conv2d(512,512,kernel_size=3,padding=1),
-
nn.BatchNorm2d(512),
-
nn.ReLU(True),
-
#10
-
nn.Conv2d(512,512,kernel_size=3,padding=1),
-
nn.BatchNorm2d(512),
-
nn.ReLU(True),
-
nn.MaxPool2d(kernel_size=2,stride=2),
-
#11
-
nn.Conv2d(512,512,kernel_size=3,padding=1),
-
nn.BatchNorm2d(512),
-
nn.ReLU(True),
-
#12
-
nn.Conv2d(512,512,kernel_size=3,padding=1),
-
nn.BatchNorm2d(512),
-
nn.ReLU(True),
-
#13
-
nn.Conv2d(512,512,kernel_size=3,padding=1),
-
nn.BatchNorm2d(512),
-
nn.ReLU(True),
-
nn.MaxPool2d(kernel_size=2,stride=2),
-
nn.AvgPool2d(kernel_size=1,stride=1),
-
)
-
self.classifier = nn.Sequential(
-
#14
-
nn.Linear(512,4096),
-
nn.ReLU(True),
-
nn.Dropout(),
-
#15
-
nn.Linear(4096, 4096),
-
nn.ReLU(True),
-
nn.Dropout(),
-
#16
-
nn.Linear(4096,num_classes),
-
)
-
#self.classifier = nn.Linear(512, 10)
-
-
def forward(self, x):
-
out = self.features(x)
-
out = out.view(out.size(0), -1)
-
out = self.classifier(out)
-
return out
-
-
-
class testNet(nn.Module):
-
def __init__(self, num_classes=10):
-
super(testNet, self).__init__()
-
#定义自己的网络
-
self.conv1 = nn.Conv2d(3,64,kernel_size=3,padding=1)
-
self.BN1 = nn.BatchNorm2d(64)
-
self.relu1 = nn.ReLU(True)
-
self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)
-
-
layer2 = nn.Sequential()
-
layer2.add_module('conv2', nn.Conv2d(64,64,kernel_size=3,padding=1))
-
layer2.add_module('BN2',nn.BatchNorm2d(64))
-
layer2.add_module('relu2',nn.ReLU(True))
-
layer2.add_module('pool2',nn.MaxPool2d(kernel_size=2,stride=2))
-
self.layer2 = layer2
-
-
self.layer3 = nn.Sequential(
-
nn.Conv2d(64,128,kernel_size=3,padding=1),
-
nn.BatchNorm2d(128),
-
nn.ReLU(True),
-
)
-
self.classifier = nn.Sequential(
-
nn.Linear(128,256),
-
nn.ReLU(True),
-
nn.Dropout(),
-
-
nn.Linear(256, 256),
-
nn.ReLU(True),
-
nn.Dropout(),
-
-
nn.Linear(256,num_classes),
-
)
-
def forward(self,x):
-
#定义自己的前向传播方式
-
out = self.conv1(x)
-
out = self.BN1(out)
-
out = self.relu1(out)
-
out = self.pool1(out)
-
-
out = self.layer2(out)
-
out = self.layer3(out)
-
-
out = out.view(out.size(0), -1)
-
out = self.classifier(out)
-
return out
-
-
if __name__ == '__main__':
-
import torch
-
#使用gpu
-
use_cuda = torch.cuda.is_available()
-
device = torch.device("cuda" if use_cuda else "cpu")
-
-
net = VGG16().to(device)
-
print(net)
2.train.py:包含参数设定、图像预处理、数据集读取、网络创建、损失和优化、训练和测试部分
-
import torch
-
import torch.nn as nn
-
import torch.optim as optim
-
import torchvision
-
import torchvision.transforms as transforms
-
-
import os
-
import argparse
-
-
from tensorboardX import SummaryWriter
-
-
from nn_module_sample import VGG16
-
from torch.autograd import Variable
-
-
#参数设置
-
parser = argparse.ArgumentParser(description='cifar10')
-
parser.add_argument('--lr', default=1e-2,help='learning rate')
-
#parser.add_argument('--batch_size',default=50,help='batch size')
-
parser.add_argument('--epoch',default=15,help='time for ergodic')
-
parser.add_argument('--pre_epoch',default=0,help='begin epoch')
-
parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints') #输出结果保存路径
-
parser.add_argument('--pre_model', default=True,help='use pre-model')#恢复训练时的模型路径
-
args = parser.parse_args()
-
-
#使用gpu
-
use_cuda = torch.cuda.is_available()
-
device = torch.device("cuda" if use_cuda else "cpu")
-
-
#数据预处理
-
# 图像预处理和增强
-
transform_train = transforms.Compose([
-
transforms.RandomCrop(32, padding=4), #先四周填充0,再把图像随机裁剪成32*32
-
transforms.RandomHorizontalFlip(), #图像一半的概率翻转,一半的概率不翻转
-
transforms.ToTensor(),
-
#transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
-
transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
-
])
-
-
transform_test = transforms.Compose([
-
transforms.ToTensor(),
-
#transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
-
transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
-
])
-
-
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
-
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=0)
-
-
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
-
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=0)
-
#Cifar-10的标签
-
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
-
-
#模型定义 VGG16
-
net = VGG16().to(device)
-
-
# 定义损失函数和优化方式
-
criterion = nn.CrossEntropyLoss() #损失函数为交叉熵,多用于多分类问题
-
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) #优化方式为mini-batch momentum-SGD,并采用L2正则化(权重衰减)
-
-
#使用预训练模型
-
if args.pre_model:
-
print("Resume from checkpoint...")
-
assert os.path.isdir('checkpoint'),'Error: no checkpoint directory found'
-
state = torch.load('./checkpoint/ckpt.t7')
-
net.load_state_dict(state['state_dict'])
-
best_test_acc = state['acc']
-
pre_epoch = state['epoch']
-
else:
-
#定义最优的测试准确率
-
best_test_acc = 0
-
pre_epoch = args.pre_epoch
-
-
#训练
-
if __name__ == "__main__":
-
if not os.path.exists('./model'):
-
os.mkdir('./model')
-
writer = SummaryWriter(log_dir='./log')
-
print("Start Training, VGG-16...")
-
with open("acc.txt","w") as acc_f:
-
with open("log.txt","w") as log_f:
-
for epoch in range(pre_epoch, args.epoch):
-
print('\nEpoch: %d' % (epoch + 1))
-
#开始训练
-
net.train()
-
print(net)
-
#总损失
-
sum_loss = 0.0
-
#准确率
-
accuracy = 0.0
-
total = 0.0
-
-
for i, data in enumerate(trainloader):
-
#准备数据
-
length = len(trainloader) #数据大小
-
inputs, labels = data #取出数据
-
inputs, labels = inputs.to(device), labels.to(device)
-
optimizer.zero_grad() #梯度初始化为零(因为一个batch的loss关于weight的导数是所有sample的loss关于weight的导数的累加和)
-
inputs, labels = Variable(inputs), Variable(labels)
-
#forward + backward + optimize
-
outputs = net(inputs) #前向传播求出预测值
-
loss = criterion(outputs, labels) #求loss
-
loss.backward() #反向传播求梯度
-
optimizer.step() #更新参数
-
-
# 每一个batch输出对应的损失loss和准确率accuracy
-
sum_loss += loss.item()
-
_, predicted = torch.max(outputs.data, 1)#返回每一行中最大值的那个元素,且返回其索引
-
total += labels.size(0)
-
accuracy += predicted.eq(labels.data).cpu().sum() #预测值和真实值进行比较,将数据放到cpu上并且求和
-
-
print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
-
% (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * accuracy / total))
-
-
#写入日志
-
log_f.write('[epoch:%d, iter:%d] |Loss: %.03f | Acc: %.3f%% '
-
% (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * accuracy / total))
-
log_f.write('\n')
-
log_f.flush()
-
-
#写入tensorboard
-
writer.add_scalar('loss/train',sum_loss / (i + 1),epoch)
-
writer.add_scalar('accuracy/train',100. * accuracy / total,epoch)
-
#每一个训练epoch完成测试准确率
-
print("Waiting for test...")
-
#在上下文环境中切断梯度计算,在此模式下,每一步的计算结果中requires_grad都是False,即使input设置为requires_grad=True
-
with torch.no_grad():
-
accuracy = 0
-
total = 0
-
for data in testloader:
-
#开始测试
-
net.eval()
-
-
images, labels = data
-
images, labels = images.to(device), labels.to(device)
-
-
outputs = net(images)
-
-
_, predicted = torch.max(outputs.data, 1)#返回每一行中最大值的那个元素,且返回其索引(得分高的那一类)
-
total += labels.size(0)
-
accuracy += (predicted == labels).sum()
-
-
#输出测试准确率
-
print('测试准确率为: %.3f%%' % (100 * accuracy / total))
-
acc = 100. * accuracy / total
-
-
#写入tensorboard
-
writer.add_scalar('accuracy/test', acc,epoch)
-
-
#将测试结果写入文件
-
print('Saving model...')
-
torch.save(net.state_dict(), '%s/net_%3d.pth' % (args.outf, epoch + 1))
-
acc_f.write("epoch = %03d, accuracy = %.3f%%" % (epoch + 1, acc))
-
acc_f.write('\n')
-
acc_f.flush()
-
-
#记录最佳的测试准确率
-
if acc > best_test_acc:
-
print('Saving Best Model...')
-
#存储状态
-
state = {
-
'state_dict': net.state_dict(),
-
'acc': acc,
-
'epoch': epoch + 1,
-
}
-
#没有就创建checkpoint文件夹
-
if not os.path.isdir('checkpoint'):
-
os.mkdir('checkpoint')
-
#best_acc_f = open("best_acc.txt","w")
-
#best_acc_f.write("epoch = %03d, accuracy = %.3f%%" % (epoch + 1, acc))
-
#best_acc_f.close()
-
torch.save(state, './checkpoint/ckpt.t7')
-
best_test_acc = acc
-
#写入tensorboard
-
writer.add_scalar('best_accuracy/test', best_test_acc,epoch)
-
-
#训练结束
-
print("Training Finished, Total Epoch = %d" % epoch)
-
writer.close()
-
-
-
三、结果
1.打开cmd或者是Anaconda Prompt输入指令,找到你的log目录
tensorboard --logdir 你的文件夹目录/log
例如博主的是这样的

然后打开最后一行的网址http://DESKTOP-xxxxxx:6006(这里每个电脑是不一样的),例如博主的是这样的

最终训练准确率在89%左右,测试准确率在87%左右~
2.在训练过程中还会生成data、model和checkpoint文件夹

四、注意事项
1.代码里参数设置部分pre_model是用来继续训练的,读取的是上一次epoch存储的checkpoint,设置为True即可继续训练,否则从头开始训练
2.代码里参数设置部分lr学习率如果再训练过程中准确率变化缓慢可以适当减小
3.注意如果没有gpu则需要在代码里注销这个部分
-
#使用gpu
-
use_cuda = torch.cuda.is_available()
-
device = torch.device("cuda" if use_cuda else "cpu")
并且所有的xx.to(device)都需要删除;
或者不注销上面的gpu使用,在每一个xx.to(device)之前加一句话:
if use_cuda:
例如:
-
#模型定义 VGG16
-
if use_cuda:
-
net = VGG16().to(device)
-
else:
-
net = VGG16()
返回至原博客:
文章来源: nickhuang1996.blog.csdn.net,作者:悲恋花丶无心之人,版权归原作者所有,如需转载,请联系作者。
原文链接:nickhuang1996.blog.csdn.net/article/details/86608963
- 点赞
- 收藏
- 关注作者
评论(0)