基于Pytorch Geometric在昇腾上实现GAT、GraphSAGE、GCN图神经网络
【摘要】 全文给出三种模型的代码示例,以GraphSage为例主要讲解 GraphSage因为昇腾暂时还不支持PyG的torch_scatter,torch_sparse等加速库,所以还不能使用mini-batch方式训练GNN,此处都是in-memory的全图训练方式。首先构建一个GraphSAGE网络以下代码是一个两层的GraphSAGE网络,利用PyG提供的SAGEConv进行构建。class ...
环境
- torch 2.1
- torch_npu 2.1.0
- torch_geometric(cpu)
安装可以参考以下博客: - 安装torch_npu
- 安装Pytorch Geometric(PyG)
全文给出三种模型的代码示例,以GraphSage为例主要讲解
GraphSage
因为昇腾暂时还不支持PyG的torch_scatter
,torch_sparse
等加速库,所以还不能使用mini-batch
方式训练GNN,此处都是in-memory的全图训练方式。
- 首先构建一个GraphSAGE网络
以下代码是一个两层的GraphSAGE网络,利用PyG提供的SAGEConv
进行构建。
class GraphSAGE_NET(torch.nn.Module):
def __init__(self, feature, hidden, classes):
super(GraphSAGE_NET, self).__init__()
self.sage1 = SAGEConv(feature, hidden)
self.sage2 = SAGEConv(hidden, classes)
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.sage1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.sage2(x, edge_index)
return F.log_softmax(x, dim=1)
- 下载数据集,此处国内可能超时无法下载,可以修改url或者手动下载
dataset = Planetoid(root='/root/data', name='CiteSeer
- 设置设备为
npu
,将数据迁移到设备上
device = 'npu'
model = GraphSAGE_NET(dataset.num_node_features, 16, dataset.num_classes).to(device)
data = dataset[0].to(device)
- 进行模型训练
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
for epoch in range(200):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
- 进行测试
model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())
print('GraphSAGE Accuracy: {:.4f}'.format(acc))
代码
import torch
import torch.nn.functional as F
# 导入GraphSAGE层
from torch_geometric.nn import SAGEConv
from torch_geometric.datasets import Planetoid
#导入Npu相关库
import torch_npu
from torch_npu.contrib import transfer_to_npu
class GraphSAGE_NET(torch.nn.Module):
def __init__(self, feature, hidden, classes):
super(GraphSAGE_NET, self).__init__()
self.sage1 = SAGEConv(feature, hidden)
self.sage2 = SAGEConv(hidden, classes)
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.sage1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.sage2(x, edge_index)
return F.log_softmax(x, dim=1)
dataset = Planetoid(root='/root/data', name='CiteSeer')
device = 'npu'
model = GraphSAGE_NET(dataset.num_node_features, 16, dataset.num_classes).to(device)
data = dataset[0].to(device)
dataset.num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
for epoch in range(200):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())
print('GraphSAGE Accuracy: {:.4f}'.format(acc))
GCN
代码
# 导入torch及相关库,便于后续搭建网络调用基础算子模块
import torch
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
from torch_geometric.nn import GCNConv
class GCN_NET(torch.nn.Module):
def __init__(self, features, hidden, classes):
super(GCN_NET, self).__init__()
# shape(输入的节点特征维度 * 中间隐藏层的维度)
self.conv1 = GCNConv(features, hidden)
# shaape(中间隐藏层的维度 * 节点类别)
self.conv2 = GCNConv(hidden, classes)
def forward(self, data):
# 加载节点特征和邻接关系
x, edge_index = data.x, data.edge_index
# 传入卷积层
x = self.conv1(x, edge_index)
# 激活函数
x = F.relu(x)
# dropout层,防止过拟合
x = F.dropout(x, training=self.training)
# 第二层卷积层
x = self.conv2(x, edge_index)
# 将经过两层卷积得到的特征输入log_softmax函数得到概率分布
return F.log_softmax(x, dim=1)
# 导入数据集
from torch_geometric.datasets import Planetoid
import numpy as np
dataset = Planetoid(root='/home/wzq/pyg_test/data', name='Cora')
# # 导入NPU相关库
# import torch_npu
# from torch_npu.contrib import transfer_to_npu
# #设置成npu
device = 'npu'
# device = 'cuda:0'
model = GCN_NET(dataset.num_node_features, 16, dataset.num_classes).to(device)
# 加载数据
data = dataset[0].to(device)
# 定义优化函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
for epoch in range(200):
# 梯度设为零
optimizer.zero_grad()
# 模型输出
out = model(data)
# 计算损失
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
# 反向传播计算梯度
loss.backward()
# 一步优化
optimizer.step()
# 评估模型
model.eval()
# 得到模型输出的类别
_, pred = model(data).max(dim=1)
# 计算正确的个数
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
# 得出准确率
acc = correct / int(data.test_mask.sum())
# 打印准确率及结果
print('GCN Accuracy: {:.4f}'.format(acc))
GAT
代码
import torch
import torch.nn.functional as F
# 导入GATConv层
from torch_geometric.nn import GATConv
from torch_geometric.datasets import Planetoid
#导入Npu相关库
import torch_npu
from torch_npu.contrib import transfer_to_npu
class GAT_NET(torch.nn.Module):
def __init__(self, features, hidden, classes, heads=4):
super(GAT_NET, self).__init__()
# 定义GAT层,使用多头注意力机制
self.gat1 = GATConv(features, hidden, heads=4)
# 因为多头注意力是将向量拼接,所以维度乘以头数。
self.gat2 = GATConv(hidden*heads, classes)
def forward(self, data):
# 从输入数据集中获取x与边集相关信息
x, edge_index = data.x, data.edge_index
# 将输入传入GAT层中,获得第一层Gat层的输出
x = self.gat1(x, edge_index)
# 经过非线性激活与dropout,减少过拟合现象,增加模型的泛化能力
x = F.relu(x)
x = F.dropout(x, training=self.training)
# 第二层GAT层,得到整个网络的输出送给分类器进行分类
x = self.gat2(x, edge_index)
return F.log_softmax(x, dim=1)
dataset = Planetoid(root='/root/data', name='PubMed')
device = 'npu'
model = GAT_NET(dataset.num_node_features, 16, dataset.num_classes).to(device) # 定义GraphSAGE
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
for epoch in range(200):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
# 模型验证过程,对训练得到的模型效果进行评估,并打印准确率。
model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())
print('GAT Accuracy: {:.4f}'.format(acc))
【声明】本内容来自华为云开发者社区博主,不代表华为云及华为云开发者社区的观点和立场。转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息,否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)