使用DGL实现GAT(并在6个节点的2分类图中进行简单应用)

formal文件(在cora数据集上的应用)

python 复制代码
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.data import CoraGraphDataset
import time
import numpy as np
from visdom import Visdom
import dgl
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


class GATLayer(nn.Module):
    def __init__(self, g, in_dim, out_dim):
        super(GATLayer, self).__init__()
        self.g = g
        # 公式 (1)
        self.fc = nn.Linear(in_dim, out_dim, bias=False)
        # 公式 (2)
        self.attn_fc = nn.Linear(2 * out_dim, 1, bias=False)

    def edge_attention(self, edges):
        # 公式 (2) 所需,边上的用户定义函数
        z2 = torch.cat([edges.src['z'], edges.dst['z']], dim=1)
        a = self.attn_fc(z2)
        return {'e': F.leaky_relu(a)}

    def message_func(self, edges):
        # 公式 (3), (4)所需,传递消息用的用户定义函数
        return {'z': edges.src['z'], 'e': edges.data['e']}

    def reduce_func(self, nodes):
        # 公式 (3), (4)所需, 归约用的用户定义函数
        # 公式 (3)
        alpha = F.softmax(nodes.mailbox['e'], dim=1)
        # 公式 (4)
        h = torch.sum(alpha * nodes.mailbox['z'], dim=1)
        return {'h': h}

    def forward(self, h):
        # 公式 (1)
        z = self.fc(h)
        self.g.ndata['z'] = z
        # 公式 (2)
        self.g.apply_edges(self.edge_attention)
        # 公式 (3) & (4)
        self.g.update_all(self.message_func, self.reduce_func)
        return self.g.ndata.pop('h')


class MultiHeadGATLayer(nn.Module):
    def __init__(self, g, in_dim, out_dim, num_heads, merge='cat'):
        super(MultiHeadGATLayer, self).__init__()
        self.heads = nn.ModuleList()
        for i in range(num_heads):
            self.heads.append(GATLayer(g, in_dim, out_dim))
        self.merge = merge

    def forward(self, h):
        head_outs = [attn_head(h) for attn_head in self.heads]
        if self.merge == 'cat':
            # 对输出特征维度(第1维)做拼接
            return torch.cat(head_outs, dim=1)
        else:
            # 用求平均整合多头结果
            return torch.mean(torch.stack(head_outs))


class GAT(nn.Module):
    def __init__(self, g, in_dim, hidden_dim, out_dim, num_heads):
        super(GAT, self).__init__()
        self.layer1 = MultiHeadGATLayer(g, in_dim, hidden_dim, num_heads)
        # 注意输入的维度是 hidden_dim * num_heads 因为多头的结果都被拼接在了
        # 一起。 此外输出层只有一个头。
        self.layer2 = MultiHeadGATLayer(g, hidden_dim * num_heads, out_dim, 1)

    def forward(self, h):
        h = self.layer1(h)
        h = F.elu(h)
        h = self.layer2(h)
        return h


def evaluate(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)


# 加载数据集
dataset = CoraGraphDataset('./cora')
graph = dataset[0]
graph = dgl.remove_self_loop(graph)
graph = dgl.add_self_loop(graph)
# 把graph搬到gpu上
graph = graph.to(device)

train_mask = graph.ndata['train_mask']
val_mask = graph.ndata['val_mask']
test_mask = graph.ndata['test_mask']
label = graph.ndata['label']
features = graph.ndata['feat']

in_feats = features.shape[1]
n_hidden = 8
n_classes = dataset.num_classes
num_heads = 3
feat_drop = 0.6
attn_drop = 0.5
lr = 0.02
weight_deacy = 3e-4
num_epochs = 31

model = GAT(graph,
            in_dim=features.shape[1],
            hidden_dim=n_hidden,
            out_dim=7,
            num_heads=num_heads)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_deacy)

dur = []

# 使用visdom进行可视化
vis = Visdom()
x = []
y = []

opt = {
    'title': 'GAT on Cora',
    'xlabel': 'epoch',
    'ylabel': 'loss / acc',
    'legend': ['loss', 'val_accuracy']
}

for epoch in range(num_epochs):
    if epoch >= 3:
        t0 = time.time()

    logits = model(features)
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[train_mask], label[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    acc_val = evaluate(model, features, label, val_mask)

    x = epoch
    y_loss = loss.cpu().detach().numpy()
    y_acc = acc_val
    vis.line(
        X=[x],
        Y=[[y_loss, y_acc]],
        win='GAT',
        update='append',
        opts=opt
    )
    if epoch >= 3:
        dur.append(time.time() - t0)

    print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | Accuracy {:.4f}".format(
        epoch, loss.item(), np.mean(dur), acc_val))

acc_test = evaluate(model, features, label, test_mask)
print("Test Accuracy {:.4f}".format(acc_test))

test文件(在6个节点上的二分类应用)

python 复制代码
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.data import CoraGraphDataset
import time
import numpy as np
from visdom import Visdom
import dgl

print('----------test: DGL_GAT----------')

device = torch.device('cpu')


class GATLayer(nn.Module):
    def __init__(self, g, in_dim, out_dim):
        super(GATLayer, self).__init__()
        self.g = g
        # 公式 (1)
        self.fc = nn.Linear(in_dim, out_dim, bias=False)
        # 公式 (2)
        self.attn_fc = nn.Linear(2 * out_dim, 1, bias=False)

    def edge_attention(self, edges):
        # 公式 (2) 所需,边上的用户定义函数
        # 在我们的实验中,注意力机制α是一个单层前馈神经网络,由权重向量a参数化,并应用LeakyReLU非线性(
        # 负输入斜率α = 0.2)。完全展开后,注意力机制计算的系数(由图1(左)所示)可以表示为:

        z2 = torch.cat([edges.src['z'], edges.dst['z']], dim=1)
        a = self.attn_fc(z2)
        return {'e': F.leaky_relu(a)}

    def message_func(self, edges):
        # 公式 (3), (4)所需,传递消息用的用户定义函数
        # 返回的z是源节点的z属性   e是边的e属性
        return {'z': edges.src['z'], 'e': edges.data['e']}

    def reduce_func(self, nodes):
        # 公式 (3), (4)所需, 归约用的用户定义函数
        # 公式 (3)
        alpha = F.softmax(nodes.mailbox['e'], dim=1)
        # 公式 (4)
        h = torch.sum(alpha * nodes.mailbox['z'], dim=1)
        return {'h': h}

    def forward(self, h):
        # 公式 (1)
        z = self.fc(h)
        self.g.ndata['z'] = z
        # 公式 (2)
        self.g.apply_edges(self.edge_attention)
        # 公式 (3) & (4)
        self.g.update_all(self.message_func, self.reduce_func)
        return self.g.ndata.pop('h')


class MultiHeadGATLayer(nn.Module):
    def __init__(self, g, in_dim, out_dim, num_heads, merge='cat'):
        super(MultiHeadGATLayer, self).__init__()
        self.heads = nn.ModuleList()
        for i in range(num_heads):
            self.heads.append(GATLayer(g, in_dim, out_dim))
        self.merge = merge

    def forward(self, h):
        head_outs = [attn_head(h) for attn_head in self.heads]
        if self.merge == 'cat':
            # 对输出特征维度(第1维)做拼接
            return torch.cat(head_outs, dim=1)
        else:
            # 用求平均整合多头结果
            return torch.mean(torch.stack(head_outs))


class GAT(nn.Module):
    def __init__(self, g, in_dim, hidden_dim, out_dim, num_heads):
        super(GAT, self).__init__()
        self.layer1 = MultiHeadGATLayer(g, in_dim, hidden_dim, num_heads)
        # 注意输入的维度是 hidden_dim * num_heads 因为多头的结果都被拼接在了
        # 一起。 此外输出层只有一个头。
        self.layer2 = MultiHeadGATLayer(g, hidden_dim * num_heads, out_dim, 1)

    def forward(self, h):
        h = self.layer1(h.float())
        h = F.elu(h)
        h = self.layer2(h.float())
        return h


def evaluate(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)


# 加载数据集
# dataset = CoraGraphDataset('./cora')
# graph = dataset[0]
# graph = dgl.remove_self_loop(graph)
graph = dgl.graph((torch.tensor([0, 0, 0, 0, 0, 0,
                             1, 1, 1, 1, 1, 1,
                             2, 2, 2, 2, 2, 2,
                             3, 3, 3, 3, 3, 3,
                             4, 4, 4, 4, 4, 4,
                             5, 5, 5, 5, 5, 5,]),
               torch.tensor([0, 1, 2, 3, 4, 5,
                             0, 1, 2, 3,4, 5,
                             0, 1, 2, 3, 4, 5,
                             0, 1, 2, 3, 4, 5,
                             0, 1, 2, 3, 4, 5,
                             0, 1, 2, 3, 4, 5,])))
# graph = dgl.add_self_loop(graph)
# 节点1,2,3
graph.ndata['train_mask'] = torch.tensor([False,True,True,True,False,False])
# 节点0
graph.ndata['val_mask'] = torch.tensor([True,False,False,False,False,False])
# 节点4,5
graph.ndata['test_mask'] = torch.tensor([False,False,False,False,True,True])

graph.ndata['feat'] = torch.tensor([[0,0,0,0,0,0],[1,1,1,1,1,1],[2,2,2,2,2,2],[3,3,3,3,3,3],[4,4,4,4,4,4],[5,5,5,5,5,5]])
graph.ndata['label'] = torch.tensor([1,2,1,1,2,1])
# 把graph搬到gpu上
graph = graph.to(device)

train_mask = graph.ndata['train_mask']
print(graph.ndata['train_mask'])


# print('train_mask:',train_mask)
val_mask = graph.ndata['val_mask']
print(graph.ndata['val_mask'])

test_mask = graph.ndata['test_mask']
print(graph.ndata['test_mask'])


label = graph.ndata['label']

features = graph.ndata['feat']


in_feats = features.shape[1]
print('in_feats:',in_feats)
n_hidden = 8
n_classes = 2
num_heads = 3
feat_drop = 0.6
attn_drop = 0.5
lr = 0.02
weight_deacy = 3e-4
num_epochs = 31

model = GAT(graph,
            in_dim=features.shape[1],
            hidden_dim=n_hidden,
            out_dim=7,
            num_heads=num_heads)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_deacy)

dur = []

# 使用visdom进行可视化
vis = Visdom()
x = []
y = []

opt = {
    'title': 'GAT on Cora',
    'xlabel': 'epoch',
    'ylabel': 'loss / acc',
    'legend': ['loss', 'val_accuracy']
}

for epoch in range(num_epochs):
    if epoch >= 3:
        t0 = time.time()

    logits = model(features)
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[train_mask], label[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    acc_val = evaluate(model, features, label, val_mask)

    x = epoch
    y_loss = loss.cpu().detach().numpy()
    y_acc = acc_val
    vis.line(
        X=[x],
        Y=[[y_loss, y_acc]],
        win='GAT',
        update='append',
        opts=opt
    )
    if epoch >= 3:
        dur.append(time.time() - t0)

    print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | Accuracy {:.4f}".format(
        epoch, loss.item(), np.mean(dur), acc_val))

acc_test = evaluate(model, features, label, test_mask)
print("Test Accuracy {:.4f}".format(acc_test))

# print(graph.ndata['h'][1])

print(graph.ndata['z'][1])
相关推荐
AI极客菌1 小时前
Controlnet作者新作IC-light V2:基于FLUX训练,支持处理风格化图像,细节远高于SD1.5。
人工智能·计算机视觉·ai作画·stable diffusion·aigc·flux·人工智能作画
阿_旭1 小时前
一文读懂| 自注意力与交叉注意力机制在计算机视觉中作用与基本原理
人工智能·深度学习·计算机视觉·cross-attention·self-attention
王哈哈^_^1 小时前
【数据集】【YOLO】【目标检测】交通事故识别数据集 8939 张,YOLO道路事故目标检测实战训练教程!
前端·人工智能·深度学习·yolo·目标检测·计算机视觉·pyqt
Power20246662 小时前
NLP论文速读|LongReward:基于AI反馈来提升长上下文大语言模型
人工智能·深度学习·机器学习·自然语言处理·nlp
数据猎手小k2 小时前
AIDOVECL数据集:包含超过15000张AI生成的车辆图像数据集,目的解决旨在解决眼水平分类和定位问题。
人工智能·分类·数据挖掘
好奇龙猫2 小时前
【学习AI-相关路程-mnist手写数字分类-win-硬件:windows-自我学习AI-实验步骤-全连接神经网络(BPnetwork)-操作流程(3) 】
人工智能·算法
沉下心来学鲁班2 小时前
复现LLM:带你从零认识语言模型
人工智能·语言模型
数据猎手小k2 小时前
AndroidLab:一个系统化的Android代理框架,包含操作环境和可复现的基准测试,支持大型语言模型和多模态模型。
android·人工智能·机器学习·语言模型
YRr YRr2 小时前
深度学习:循环神经网络(RNN)详解
人工智能·rnn·深度学习
sp_fyf_20243 小时前
计算机前沿技术-人工智能算法-大语言模型-最新研究进展-2024-11-01
人工智能·深度学习·神经网络·算法·机器学习·语言模型·数据挖掘