MindTorch体验

举报
JeffDing 发表于 2024/11/12 12:54:43 2024/11/12
【摘要】 MindTorch介绍MindTorch是将PyTorch训练脚本高效迁移至MindSpore框架执行的工具,其目的是在不改变原有PyTorch用户的使用习惯情况下,使得PyTorch代码能在昇腾上获得高效性能。PyTorch接口支持: MindTorch目前支持大部分PyTorch常用接口适配。用户接口使用方式不变,基于MindSpore动态图或静态图模式下执行在昇腾算力平台上。可以在to...

MindTorch介绍

MindTorch是将PyTorch训练脚本高效迁移至MindSpore框架执行的工具,其目的是在不改变原有PyTorch用户的使用习惯情况下,使得PyTorch代码能在昇腾上获得高效性能。

  • PyTorch接口支持: MindTorch目前支持大部分PyTorch常用接口适配。用户接口使用方式不变,基于MindSpore动态图或静态图模式下执行在昇腾算力平台上。可以在torch接口支持列表中查看接口支持情况。
  • TorchVision接口支持: MindTorch TorchVision是迁移自PyTorch官方实现的计算机视觉工具库,延用PyTorch官方API设计与使用习惯,内部计算调用MindSpore算子,实现与torchvision原始库同等功能。可以在TorchVision接口支持列表中查看接口支持情况。

MindTorch安装

pip安装

pip install mindtorch

源码安装

git clone https://git.openi.org.cn/OpenI/MSAdapter.git
cd MSAdapter
python setup.py install

MindTorch体验

快速开始

创建代码quick_start.py

vim quick_start.py

代码内容

from mindtorch.tools import mstorch_enable     #需要在主入口文件导入torch相关模块的前面使用
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

# 1.Working with data
# Download training data from open datasets.
training_data = datasets.FashionMNIST(root="data", train=True, download=True, transform=ToTensor())
# Download test data from open datasets.
test_data = datasets.FashionMNIST(root="data", train=False, download=True, transform=ToTensor())


# 2.Creating Models
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


def train(dataloader, model, loss_fn, optimizer, device):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

#         if batch % 10 == 0:
        loss, current = loss.item(), (batch + 1) * len(X)
        print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test(dataloader, model, loss_fn, device):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


if __name__ == '__main__':
    train_dataloader = DataLoader(training_data, batch_size=64)
    test_dataloader = DataLoader(test_data, batch_size=64)
    # Get cpu, gpu or mps device for training.
    device = (
        "cuda"
        if torch.cuda.is_available()
        else "mps"
        if torch.backends.mps.is_available()
        else "cpu"
    )
    model = NeuralNetwork().to(device)

    # 3.Optimizing the Model Parameters
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

    epochs = 5
    for t in range(epochs):
        print(f"Epoch {t + 1}\n-------------------------------")
        train(train_dataloader, model, loss_fn, optimizer, device)
        test(test_dataloader, model, loss_fn, device)
    print("Done!")

    # 4.Saving Models
    torch.save(model.state_dict(), "model.pth")
    print("Saved PyTorch Model State to model.pth")

    # 5.Loading Models
    model = NeuralNetwork().to(device)
    model.load_state_dict(torch.load("model.pth"))

    classes = [
        "T-shirt/top",
        "Trouser",
        "Pullover",
        "Dress",
        "Coat",
        "Sandal",
        "Shirt",
        "Sneaker",
        "Bag",
        "Ankle boot",
    ]
    # 6.Predicted
    model.eval()
    x, y = test_data[0][0], test_data[0][1]
    with torch.no_grad():
        x = x.to(device)
        pred = model(x)
        predicted, actual = classes[pred[0].argmax(0)], classes[y]
        print(f'Predicted: "{predicted}", Actual: "{actual}"')

执行

python quick_start.py 

注:体验代码会自动下载数据集

优化器与学习率适配

动态图模式下,与PyTorch没有差异。因为虽然lr的默认类型为mindspore的Parameter,但是在实现上,支持了修改成number类型的功能。

静态图模式下,只能使用mindspore.ops.assign的方式修改学习率

通过代码理解

创建optimizer_lr.py

vim optimizer_lr.py

代码内容

import torch
import mindtorch.torch as mtorch
import mindspore as ms

# 1. Print learning rate
# torch代码
torch_optimizer = torch.optim.SGD([torch.nn.Parameter(torch.tensor(2.0))], lr=0.01)
print("torch lr is {}".format(torch_optimizer.param_groups[0]['lr']))

# MindTorch代码
mtorch_optimizer = mtorch.optim.SGD([mtorch.nn.Parameter(mtorch.tensor(2.0))], lr=0.01)
print("mindtorch lr no float is {}".format(mtorch_optimizer.param_groups[0]['lr']))
print("mindtorch lr float is {}".format(float(mtorch_optimizer.param_groups[0]['lr']))) #通过float

# 2. Modified learning rate
torch_optimizer.param_groups[0]['lr'] = 0.1
print("modified torch lr is {}".format(torch_optimizer.param_groups[0]['lr']))

ms.set_context(mode=ms.context.PYNATIVE_MODE)
mtorch_optimizer.param_groups[0]['lr'] = 0.1
print("PYNATIVE_MODE modified mindtorch lr is {}".format(mtorch_optimizer.param_groups[0]['lr']))

ms.set_context(mode=ms.context.GRAPH_MODE)
mtorch_optimizer = mtorch.optim.SGD([mtorch.nn.Parameter(mtorch.tensor(2.0))], lr=0.01)
ms.ops.assign(mtorch_optimizer.param_groups[0]['lr'], 0.2) # 需要使用mindspore.ops.assign修改对应
print("GRAPH_MODE modified mindtorch lr is {}".format(torch_optimizer.param_groups[0]['lr']))

# 3. Custom optimizer
class TRanger(torch.optim.Optimizer):
    def __init__(self, params, lr=1e-3, alpha=0.5, k=6):
        defaults = dict(lr=lr, alpha=alpha)
        super().__init__(params, defaults)
        self.k = k
    def __setstate__(self, state):
        print("set state called")
        super().__setstate__(state)
    def step(self, closure=None):
        loss = None
        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                p_data_fp32 = p.data.float()
                state = self.state[p]
                state['step'] += 1
                p_data_fp32.add_(grad)
                p.data.copy_(p_data_fp32)
        return loss

tranger = TRanger([torch.nn.Parameter(torch.tensor(2.0))], lr=0.01)
print("Init TRanger", tranger)

class MTRanger(torch.optim.Optimizer):
    def __init__(self, params, lr=1e-3, alpha=0.5, k=6):
        defaults = dict(lr=lr, alpha=alpha)
        super().__init__(params, defaults)
        self.k = k
    def __setstate__(self, state):
        print("set state called")
        super().__setstate__(state)
    def step(self, grads, closure=None): # 需要新增grads作为函数入参,以便传入梯度
        loss = None
        i = -1                           # 声明一个索引,用来遍历grads入参
        for group in self.param_groups:
            for p in group['params']:
                i = i + 1                # 索引递增
                grad = grads[i]          # grad从入参中获取。如果对应Parameter没有参与求导,grad为0
                p_data_fp32 = p.data.float()
                state = self.state[p]
                state['step'] += 1
                p_data_fp32.add_(grad)
                p.data.copy_(p_data_fp32)
        return loss
    
mtranger = MTRanger([torch.nn.Parameter(torch.tensor(2.0))], lr=0.01)
print("Init MTRanger", mtranger)

执行

python optimizer_lr.py

Lenet实例

数据集地址:

MNIST handwritten digit database, Yann LeCun, Corinna Cortes and Chris Burges

数据集存放目录

data/mnist

lenet.py代码内容

import torch.nn as nn
from collections import OrderedDict


class C1(nn.Module):
    def __init__(self):
        super(C1, self).__init__()

        self.c1 = nn.Sequential(OrderedDict([
            ('c1', nn.Conv2d(1, 6, kernel_size=(5, 5))),
            ('relu1', nn.ReLU()),
            ('s1', nn.MaxPool2d(kernel_size=(2, 2), stride=2))
        ]))

    def forward(self, img):
        output = self.c1(img)
        return output


class C2(nn.Module):
    def __init__(self):
        super(C2, self).__init__()

        self.c2 = nn.Sequential(OrderedDict([
            ('c2', nn.Conv2d(6, 16, kernel_size=(5, 5))),
            ('relu2', nn.ReLU()),
            ('s2', nn.MaxPool2d(kernel_size=(2, 2), stride=2))
        ]))

    def forward(self, img):
        output = self.c2(img)
        return output


class C3(nn.Module):
    def __init__(self):
        super(C3, self).__init__()

        self.c3 = nn.Sequential(OrderedDict([
            ('c3', nn.Conv2d(16, 120, kernel_size=(5, 5))),
            ('relu3', nn.ReLU())
        ]))

    def forward(self, img):
        output = self.c3(img)
        return output


class F4(nn.Module):
    def __init__(self):
        super(F4, self).__init__()

        self.f4 = nn.Sequential(OrderedDict([
            ('f4', nn.Linear(120, 84)),
            ('relu4', nn.ReLU())
        ]))

    def forward(self, img):
        output = self.f4(img)
        return output


class F5(nn.Module):
    def __init__(self):
        super(F5, self).__init__()

        self.f5 = nn.Sequential(OrderedDict([
            ('f5', nn.Linear(84, 10)),
            ('sig5', nn.LogSoftmax(dim=-1))
        ]))

    def forward(self, img):
        output = self.f5(img)
        return output


class LeNet5(nn.Module):
    """
    Input - 1x32x32
    Output - 10
    """
    def __init__(self):
        super(LeNet5, self).__init__()

        self.c1 = C1()
        self.c2_1 = C2()
        self.c2_2 = C2()
        self.c3 = C3()
        self.f4 = F4()
        self.f5 = F5()

    def forward(self, img):
        output = self.c1(img)
        x = self.c2_1(output)
        output = self.c2_2(output)
        output += x

        output = self.c3(output)
        output = output.view(img.size(0), -1)
        output = self.f4(output)
        output = self.f5(output)
        return output

mindtorch_demo.py代码内容

from mindtorch.tools import mstorch_enable
from lenet import LeNet5
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets.mnist import MNIST
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time

data_train = MNIST('./data/mnist',
                   download=True,
                   transform=transforms.Compose([
                       transforms.Resize((32, 32)),
                       transforms.ToTensor()]))
data_test = MNIST('./data/mnist',
                  train=False,
                  download=True,
                  transform=transforms.Compose([
                      transforms.Resize((32, 32)),
                      transforms.ToTensor()]))
data_train_loader = DataLoader(data_train, batch_size=128, shuffle=True, num_workers=4, drop_last=True)
data_test_loader = DataLoader(data_test, batch_size=128, num_workers=4, drop_last=True)

net = LeNet5()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=2e-3)

cur_batch_win = None
cur_batch_win_opts = {
    'title': 'Epoch Loss Trace',
    'xlabel': 'Batch Number',
    'ylabel': 'Loss',
    'width': 1200,
    'height': 600,
}


def train(epoch):
    global cur_batch_win
    net.train()
    loss_list, batch_list = [], []
    size = len(data_train_loader)
    start = time.time()
    for i, (images, labels) in enumerate(data_train_loader):
        optimizer.zero_grad()

        output = net(images)

        loss = criterion(output, labels)

        loss_list.append(loss.detach().cpu().item())
        batch_list.append(i + 1)

        if i % 10 == 0:
            end = time.time()
            print(f"loss: {loss.detach().cpu().item():>7f}  [{i:>3d}/{size:>3d}]", "Runing time:", end - start, "s")
            start = time.time()
        loss.backward()
        optimizer.step()


def test():
    net.eval()
    total_correct = 0
    avg_loss = 0.0
    num_batches = 0
    total = 0
    for i, (images, labels) in enumerate(data_test_loader):
        output = net(images)
        num_batches += 1
        total += len(images)
        avg_loss += criterion(output, labels).sum()
        pred = output.detach().max(1)[1]
        total_correct += pred.eq(labels.view_as(pred)).sum()

    avg_loss = avg_loss / num_batches
    total_correct = total_correct / total

    print(f"Test: \n Accuracy: {(100 * total_correct):>0.1f}%, Avg loss: {avg_loss.detach().cpu().item():>8f} \n")


def train_and_test(epoch):
    print(f"Epoch {epoch}\n-------------------------------")
    train(epoch)
    test()


def main():
    for e in range(1, 2):
        train_and_test(e)


if __name__ == '__main__':
    main()

执行

python mindtorch_demo.py

参考文献

https://openi.pcl.ac.cn/OpenI/mindtorch_tutorial

【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。