一、问题定义(先统一接口)
给定一个无向网络 ( G = (V, E) ):
- ( V ):节点集合
- ( E ):已有连边集合
- 目标:预测 ( T ) 时刻尚未连接的节点对中,哪些将来会连接
二、 MATLAB 源码(主程序 + 算法)
2.1 主程序:link_prediction_main.m
matlab
%% 链路预测主程序
clear; clc; close all;
%% 1. 加载网络(示例:用 Zachary Karate Club)
load karate.mat; % 包含 adjacency matrix A (34×34)
A = adjacency; % 邻接矩阵(0/1)
N = size(A, 1); % 节点数
%% 2. 划分训练集 / 测试集(90% 训练,10% 测试)
ratio = 0.1;
[A_train, test_edges] = split_network(A, ratio);
%% 3. 计算链路预测得分
fprintf('计算链路预测得分...\n');
% 3.1 基于相似度的方法
S_CN = common_neighbors(A_train);
S_Jac = jaccard(A_train);
S_AA = adamic_adar(A_train);
S_RA = resource_allocation(A_train);
% 3.2 基于概率的方法
S_PA = preferential_attachment(A_train);
% 3.3 基于机器学习的方法(特征拼接)
features = cat(3, S_CN, S_Jac, S_AA);
S_ML = ml_link_prediction(A_train, features);
%% 4. 评估性能
methods = {'CN','Jaccard','AA','RA','PA','ML'};
scores = {S_CN, S_Jac, S_AA, S_RA, S_PA, S_ML};
for i = 1:length(methods)
auc = evaluate_auc(A_train, scores{i}, test_edges);
fprintf('方法: %-10s AUC = %.4f\n', methods{i}, auc);
end
%% 5. 可视化 Top-K 预测结果
K = 10;
[~, idx] = sort(S_AA(:), 'descend');
[pred_i, pred_j] = ind2sub([N N], idx);
fprintf('\nTop %d 预测链路:\n', K);
for k = 1:K
if A(pred_i(k), pred_j(k)) == 0
fprintf('%2d: 节点 %2d --- 节点 %2d (得分 = %.4f)\n', ...
k, pred_i(k), pred_j(k), S_AA(pred_i(k), pred_j(k)));
end
end
2.2 网络划分:split_network.m
matlab
function [A_train, test_edges] = split_network(A, ratio)
% 随机移除一定比例边作为测试集
edges = find(triu(A,1));
N_edge = length(edges);
N_remove = round(N_edge * ratio);
remove_idx = randperm(N_edge, N_remove);
A_train = A;
A_train(edges(remove_idx)) = 0;
A_train = A_train + A_train'; % 保持对称
test_edges = edges(remove_idx);
end
2.3 相似度算法(核心)
(1)共同邻居 CN
matlab
function S = common_neighbors(A)
N = size(A,1);
S = zeros(N);
for i = 1:N
for j = i+1:N
S(i,j) = sum(A(i,:) .* A(j,:));
end
end
S = S + S';
end
(2)Jaccard 系数
matlab
function S = jaccard(A)
N = size(A,1);
S = zeros(N);
for i = 1:N
for j = i+1:N
inter = sum(A(i,:) .* A(j,:));
union = sum(A(i,:) + A(j,:)) - inter;
S(i,j) = inter / (union + eps);
end
end
S = S + S';
end
(3)Adamic--Adar 指数
matlab
function S = adamic_adar(A)
N = size(A,1);
deg = sum(A);
S = zeros(N);
for i = 1:N
for j = i+1:N
common = find(A(i,:) .* A(j,:));
S(i,j) = sum(1 ./ log(deg(common) + eps));
end
end
S = S + S';
end
(4)资源分配 RA
matlab
function S = resource_allocation(A)
N = size(A,1);
deg = sum(A);
S = zeros(N);
for i = 1:N
for j = i+1:N
common = find(A(i,:) .* A(j,:));
S(i,j) = sum(1 ./ deg(common));
end
end
S = S + S';
end
(5)优先连接 PA
matlab
function S = preferential_attachment(A)
deg = sum(A);
S = deg * deg';
end
2.4 机器学习方法(逻辑回归 / 随机森林)
matlab
function S = ml_link_prediction(A, features)
% features: [N×N×F] 特征张量
[N, ~, F] = size(features);
% 构造训练样本
pos = find(triu(A,1));
neg = find(triu(1-A,1) .* triu(ones(N),1));
neg = neg(randperm(length(neg), length(pos)));
X = zeros(length(pos)+length(neg), F);
Y = zeros(length(pos)+length(neg), 1);
for k = 1:length(pos)
[i,j] = ind2sub([N N], pos(k));
X(k,:) = squeeze(features(i,j,:))';
Y(k) = 1;
end
for k = 1:length(neg)
[i,j] = ind2sub([N N], neg(k));
X(length(pos)+k,:) = squeeze(features(i,j,:))';
Y(length(pos)+k) = 0;
end
% 训练逻辑回归
mdl = fitglm(X, Y, 'Distribution', 'binomial');
S = predict(mdl, reshape(features,[],F));
S = reshape(S, [N N]);
end
2.5 评估函数(AUC)
matlab
function auc = evaluate_auc(A_train, S, test_edges)
% 正样本得分
pos_scores = S(sub2ind(size(S), test_edges(:,1), test_edges(:,2)));
% 负样本得分(随机采样)
neg_edges = find(triu(1-A_train,1) .* triu(ones(size(S)),1));
neg_edges = neg_edges(randperm(length(neg_edges), length(test_edges)));
neg_scores = S(sub2ind(size(S), neg_edges(:,1), neg_edges(:,2)));
% 计算 AUC
scores = [pos_scores; neg_scores];
labels = [ones(length(pos_scores),1); zeros(length(neg_scores),1)];
[~, ~, auc] = perfcurve(labels, scores, 1);
end
参考代码 链路预测matlab源码 www.youwenfan.com/contentcsv/79333.html
三、示例网络数据(Karate Club)
matlab
% karate.mat
adjacency = [
0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0;
1 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0;
...
];
四、结果解读
| 方法 | 特点 | 适用场景 |
|---|---|---|
| CN | 最简单,速度快 | 小规模网络 |
| AA / RA | 抑制枢纽节点,效果好 | 无标度网络 ✅ |
| Jaccard | 归一化,抗密度差异 | 社区结构明显 |
| PA | 偏向高 degree | 纯幂律网络 |
| ML | 精度最高 | 数据充足时 ✅ |