基于Pytorch Geometric在昇腾上实现GAT、GraphSAGE、GCN图神经网络

举报
WWWzq 发表于 2025/05/10 14:46:41 2025/05/10
【摘要】 全文给出三种模型的代码示例,以GraphSage为例主要讲解 GraphSage因为昇腾暂时还不支持PyG的torch_scatter,torch_sparse等加速库,所以还不能使用mini-batch方式训练GNN,此处都是in-memory的全图训练方式。首先构建一个GraphSAGE网络以下代码是一个两层的GraphSAGE网络,利用PyG提供的SAGEConv进行构建。class ...

环境

GraphSage

因为昇腾暂时还不支持PyG的torch_scatter,torch_sparse等加速库,所以还不能使用mini-batch方式训练GNN,此处都是in-memory的全图训练方式。

  1. 首先构建一个GraphSAGE网络
    以下代码是一个两层的GraphSAGE网络,利用PyG提供的SAGEConv进行构建。
class GraphSAGE_NET(torch.nn.Module):

    def __init__(self, feature, hidden, classes):
        super(GraphSAGE_NET, self).__init__()
        self.sage1 = SAGEConv(feature, hidden)
        self.sage2 = SAGEConv(hidden, classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.sage1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.sage2(x, edge_index)

        return F.log_softmax(x, dim=1)
  1. 下载数据集,此处国内可能超时无法下载,可以修改url或者手动下载
dataset = Planetoid(root='/root/data', name='CiteSeer
  1. 设置设备为npu,将数据迁移到设备上
device = 'npu'

model = GraphSAGE_NET(dataset.num_node_features, 16, dataset.num_classes).to(device) 
data = dataset[0].to(device)
  1. 进行模型训练
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
  1. 进行测试
model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())

print('GraphSAGE Accuracy: {:.4f}'.format(acc))

代码

import torch
import torch.nn.functional as F

# 导入GraphSAGE层
from torch_geometric.nn import SAGEConv
from torch_geometric.datasets import Planetoid

#导入Npu相关库
import torch_npu
from torch_npu.contrib import transfer_to_npu

class GraphSAGE_NET(torch.nn.Module):

    def __init__(self, feature, hidden, classes):
        super(GraphSAGE_NET, self).__init__()
        self.sage1 = SAGEConv(feature, hidden)
        self.sage2 = SAGEConv(hidden, classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.sage1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.sage2(x, edge_index)

        return F.log_softmax(x, dim=1)

dataset = Planetoid(root='/root/data', name='CiteSeer')


device = 'npu'

model = GraphSAGE_NET(dataset.num_node_features, 16, dataset.num_classes).to(device) 
data = dataset[0].to(device)
dataset.num_classes)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())

print('GraphSAGE Accuracy: {:.4f}'.format(acc))

GCN

代码

# 导入torch及相关库,便于后续搭建网络调用基础算子模块
import torch
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing

from torch_geometric.utils import add_self_loops, degree

from torch_geometric.nn import GCNConv

class GCN_NET(torch.nn.Module):

    def __init__(self, features, hidden, classes):
        super(GCN_NET, self).__init__()
        # shape(输入的节点特征维度 * 中间隐藏层的维度)
        self.conv1 = GCNConv(features, hidden) 
        # shaape(中间隐藏层的维度 * 节点类别)
        self.conv2 = GCNConv(hidden, classes)  

    def forward(self, data):
        # 加载节点特征和邻接关系
        x, edge_index = data.x, data.edge_index
        # 传入卷积层
        x = self.conv1(x, edge_index)
        # 激活函数
        x = F.relu(x)  
        # dropout层,防止过拟合
        x = F.dropout(x, training=self.training) 
        # 第二层卷积层
        x = self.conv2(x, edge_index)  
        # 将经过两层卷积得到的特征输入log_softmax函数得到概率分布
        return F.log_softmax(x, dim=1)
# 导入数据集
from torch_geometric.datasets import Planetoid

import numpy as np

dataset = Planetoid(root='/home/wzq/pyg_test/data', name='Cora')

# # 导入NPU相关库
# import torch_npu
# from torch_npu.contrib import transfer_to_npu

# #设置成npu
device = 'npu'
# device = 'cuda:0'

model = GCN_NET(dataset.num_node_features, 16, dataset.num_classes).to(device)
# 加载数据
data = dataset[0].to(device)
# 定义优化函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    # 梯度设为零
    optimizer.zero_grad() 
    # 模型输出
    out = model(data)  
    # 计算损失
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])  
    # 反向传播计算梯度
    loss.backward()  
    # 一步优化
    optimizer.step() 

# 评估模型
model.eval() 
# 得到模型输出的类别
_, pred = model(data).max(dim=1)  
# 计算正确的个数
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())  
# 得出准确率
acc = correct / int(data.test_mask.sum())  
# 打印准确率及结果
print('GCN Accuracy: {:.4f}'.format(acc))

GAT

代码

import torch
import torch.nn.functional as F

# 导入GATConv层
from torch_geometric.nn import GATConv
from torch_geometric.datasets import Planetoid

#导入Npu相关库
import torch_npu
from torch_npu.contrib import transfer_to_npu

class GAT_NET(torch.nn.Module):
    def __init__(self, features, hidden, classes, heads=4):
        super(GAT_NET, self).__init__()
        # 定义GAT层,使用多头注意力机制
        self.gat1 = GATConv(features, hidden, heads=4) 
        # 因为多头注意力是将向量拼接,所以维度乘以头数。
        self.gat2 = GATConv(hidden*heads, classes) 

    def forward(self, data):
        # 从输入数据集中获取x与边集相关信息
        x, edge_index = data.x, data.edge_index
        # 将输入传入GAT层中,获得第一层Gat层的输出
        x = self.gat1(x, edge_index)
        # 经过非线性激活与dropout,减少过拟合现象,增加模型的泛化能力
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        # 第二层GAT层,得到整个网络的输出送给分类器进行分类
        x = self.gat2(x, edge_index)
        return F.log_softmax(x, dim=1)

dataset = Planetoid(root='/root/data', name='PubMed')

device = 'npu'

model = GAT_NET(dataset.num_node_features, 16, dataset.num_classes).to(device)  # 定义GraphSAGE
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

# 模型验证过程,对训练得到的模型效果进行评估,并打印准确率。
model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())
print('GAT Accuracy: {:.4f}'.format(acc))
【声明】本内容来自华为云开发者社区博主,不代表华为云及华为云开发者社区的观点和立场。转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息,否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。