一、核心算法
1. 数据预处理与模型初始化
matlab
%% 数据准备
% 生成示例数据(二分类)
[X, y] = make_classification(n_samples=100, n_features=2, n_redundant=0, n_clusters_per_class=1);
X = [ones(size(X,1),1) X]; % 添加截距项
%% 参数设置
learning_rate = 0.1; % 学习率
max_iter = 1000; % 最大迭代次数
tol = 1e-4; % 收敛阈值
2. Sigmoid函数定义
matlab
function g = sigmoid(z)
g = 1.0 ./ (1.0 + exp(-z));
end
3. 损失函数与梯度计算
matlab
function [J, grad] = computeCost(X, y, theta)
m = length(y);
h = sigmoid(X * theta);
J = (-1/m) * sum(y .* log(h) + (1-y) .* log(1-h));
grad = (1/m) * X' * (h - y);
end
二、梯度下降法实现
matlab
function theta = gradientDescent(X, y, alpha, max_iter)
[m, n] = size(X);
theta = zeros(n,1);
J_history = zeros(max_iter,1);
for iter = 1:max_iter
[J, grad] = computeCost(X, y, theta);
theta = theta - alpha * grad;
J_history(iter) = J;
% 收敛判断
if iter > 1 && abs(J_history(iter) - J_history(iter-1)) < tol
break;
end
end
end
三、随机梯度下降法实现
matlab
function theta = stochasticGradientDescent(X, y, alpha, max_epoch)
[m, n] = size(X);
theta = zeros(n,1);
J_history = zeros(max_epoch,1);
for epoch = 1:max_epoch
% 随机打乱样本顺序
idx = randperm(m);
X = X(idx,:);
y = y(idx);
for i = 1:m
xi = X(i,:)';
yi = y(i);
[h, grad] = computeCost(X, y, theta);
theta = theta - alpha * grad;
end
% 记录平均损失
J_history(epoch) = computeCost(X, y, theta);
end
end
四、牛顿法实现
matlab
function theta = newtonMethod(X, y, max_iter)
[m, n] = size(X);
theta = zeros(n,1);
H = zeros(n,n);
for iter = 1:max_iter
h = sigmoid(X * theta);
grad = (1/m) * X' * (h - y);
% 计算Hessian矩阵(添加正则化项)
S = diag(h .* (1-h));
H = (1/m) * X' * S * X + 1e-4 * eye(n); % 正则化防止奇异
% 参数更新
theta = theta - H \ grad;
% 收敛判断
if iter > 1 && norm(grad) < tol
break;
end
end
end
五、性能对比与可视化
matlab
%% 训练与比较
theta_gd = gradientDescent(X, y, 0.1, 1000);
theta_sgd = stochasticGradientDescent(X, y, 0.1, 1000);
theta_newton = newtonMethod(X, y, 100);
%% 决策边界绘制
figure;
hold on;
scatter(X(y==1,2), X(y==1,3), 'r', 'filled');
scatter(X(y==0,2), X(y==0,3), 'b', 'filled');
% 绘制各方法决策边界
x1 = linspace(min(X(:,2)), max(X(:,2)), 100);
x2_gd = (-theta_gd(1) - theta_gd(2)*x1) / theta_gd(3);
x2_sgd = (-theta_sgd(1) - theta_sgd(2)*x1) / theta_sgd(3);
x2_newton = (-theta_newton(1) - theta_newton(2)*x1) / theta_newton(3);
plot(x1, x2_gd, 'g', 'LineWidth', 2);
plot(x1, x2_sgd, 'm', 'LineWidth', 2);
plot(x1, x2_newton, 'k', 'LineWidth', 2);
legend('Positive', 'Negative', 'GD', 'SGD', 'Newton');
title('不同优化方法的决策边界对比');
六、优化
- 学习率调整
- 动态衰减策略:
alpha = initial_alpha / (1 + decay_rate * iter)
- 自适应方法:结合AdaGrad或RMSProp
- 动态衰减策略:
- 正则化增强
- L2正则化:在损失函数中添加
lambda/2 * sum(theta(2:end).^2)
- L1正则化:使用次梯度法处理稀疏性
- L2正则化:在损失函数中添加
- 数值稳定性优化
- Sigmoid函数截断:
sigmoid(z) = max(min(z, 30), -30)
- Hessian矩阵正则化:添加小量对角项防止奇异
- Sigmoid函数截断:
参考代码 使用梯度下降法、随机梯度下降法和牛顿法实现的逻辑回归算法 www.youwenfan.com/contentcsi/59877.html
七、扩展应用示例
matlab
%% 多分类扩展(One-vs-All)
function models = oneVsAll(X, y, num_classes, method)
models = cell(num_classes,1);
for c = 1:num_classes
% 二分类标签转换
binary_y = (y == c);
% 训练单个分类器
switch method
case 'gd'
models{c} = gradientDescent(X, binary_y);
case 'sgd'
models{c} = stochasticGradientDescent(X, binary_y);
case 'newton'
models{c} = newtonMethod(X, binary_y);
end
end
end