拟分3种实现方法:
1.纯numpy
2.sklearn中的MLPClassifier
3.pytorch
题目:
在 MNIST 数据集上训练 MLP 模型并比较不同的激活函数和优化算法
任务描述:
使用 MNIST 数据集中的前 20,000 个样本训练一个多层感知机 (MLP) 模型。你需要比较三种不同的激活函数(ReLU、Sigmoid、Tanh)和三种不同的优化算法(SGD、Momentum、Adam),以找到表现最好的组合。模型需要使用一层隐藏层,隐藏单元数量为 128。
要求:
加载并预处理数据,将每个图像展平成 28x28 的向量,并进行标准化(除以 255)。
使用 one-hot 编码将标签进行转换。
在训练过程中,分别使用以下激活函数和优化算法:
激活函数:ReLU、Sigmoid、Tanh
优化算法:SGD、Momentum、Adam
对每种激活函数和优化算法组合,训练模型 10000 个 epoch。
评估模型在验证集上的准确率,并输出最优的激活函数与优化算法组合。
输入:
训练数据:MNIST 数据集中的前 20,000 个样本。
每个样本是一个 28x28 的灰度图像,标签为 0-9 的分类。
输出:
输出最优激活函数与优化算法组合,以及在验证集上的准确率。
要求:
不同激活函数与优化算法的组合实现。
对模型的正确率进行评估,并选择最优方案。
提示:
你可以使用 OneHotEncoder 将标签进行 one-hot 编码。
在模型的反向传播过程中,根据不同的优化算法更新权重。
激活函数可以用 ReLU、Sigmoid 和 Tanh,确保在前向传播和反向传播时分别计算激活值及其导数。
python
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
np.random.seed(999)
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader,TensorDataset
def load_data(path="mnist.npz"):
# np.load加载数据文件
f = np.load(path)
# 提取训练集和测试集的图片和标签数据
X_train, y_train = f['x_train'], f['y_train']
X_test, y_test = f['x_test'], f['y_test']
f.close() # 关闭文件
# 返回训练集和测试集
return (X_train, y_train), (X_test, y_test)
# 加载MNIST数据集
(X_train, y_train), (X_test, y_test) = load_data()
X_train=X_train[:2000]
y_train=y_train[:2000]
X_test=X_test[:1000]
y_test=y_test[:1000]
# 数据预处理
X_train = X_train.reshape((-1, 28 * 28)).astype('float32') / 255
X_test = X_test.reshape((-1, 28 * 28)).astype('float32') / 255
# 标签进行独热编码
def one_hot_encoding(labels, num_classes):
encoded = np.zeros((len(labels), num_classes))
for i, label in enumerate(labels):
encoded[i][label] = 1
return encoded
y_train = one_hot_encoding(y_train, 10)
y_test = one_hot_encoding(y_test, 10)
# 定义 MLP 模型
class MLP(nn.Module):
def __init__(self, activation_function):
super(MLP, self).__init__()
self.fc1 = nn.Linear(28 * 28, 128)
self.activation = activation_function
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.fc1(x)
x = self.activation(x)
x = self.fc2(x)
return x
# 激活函数和优化算法的组合
activations = {
'ReLU': nn.ReLU(),
'Sigmoid': nn.Sigmoid(),
'Tanh': nn.Tanh()
}
optimizers = {
'SGD': optim.SGD,
'Momentum': optim.SGD, # 使用 SGD 并添加动量
'Adam': optim.Adam
}
# 核心代码
results = {}
# 训练和评估模型
for act_name, activation in activations.items():
# 遍历激活函数和优化器
for opt_name, optimizer in optimizers.items():
print(f"Training with {act_name} and {opt_name}")
model = MLP(activation)
if opt_name == 'Momentum':
# 如果是动量优化器,则设置动量参数;否则,仅使用学习率。
optimizer_instance = optimizer(model.parameters(), lr=0.001, momentum=0.9)
else:
optimizer_instance = optimizer(model.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()
# 转换为 Tensor 数据加载器
train_dataset = torch.utils.data.TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
# 训练模型
for epoch in range(10): # 为了快速实验,将 epoch 设置为 10,你也可以设置为1000
model.train()
for data, target in train_loader:
optimizer_instance.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer_instance.step()
# 在验证集上评估
model.eval()
with torch.no_grad():
val_output = model(torch.Tensor(X_test))
val_predictions = torch.sigmoid(val_output) # 应用 sigmoid 函数
val_predictions = val_predictions.numpy()
val_predictions = (val_predictions >= 0.5).astype(int) # 二元化输出
# 计算准确率
val_accuracy = (val_predictions == y_test).mean()
results[(act_name, opt_name)] = val_accuracy
print(f"Validation Accuracy: {val_accuracy:.4f}")
# 找到最优组合
best_combination = max(results, key=results.get)
best_accuracy = results[best_combination]
print(f"Best Activation Function: {best_combination[0]}, Best Optimizer: {best_combination[1]}, Accuracy: {best_accuracy:.4f}")
运行结果:

附带一个AI编程软件Trae生成的程序(已在kaggle跑通),大家对比一下:
python
#pytorch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time
# 设备配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 数据预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
# 加载MNIST数据集
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)
# MLP模型定义
class MLP(nn.Module):
def __init__(self, activation_fn):
super(MLP, self).__init__()
self.fc1 = nn.Linear(784, 128)
self.fc2 = nn.Linear(128, 10)
# 激活函数选择
if activation_fn == 'relu':
self.activation = nn.ReLU()
elif activation_fn == 'sigmoid':
self.activation = nn.Sigmoid()
elif activation_fn == 'tanh':
self.activation = nn.Tanh()
# 权重初始化
self._init_weights()
def _init_weights(self):
nn.init.xavier_uniform_(self.fc1.weight)
nn.init.zeros_(self.fc1.bias)
nn.init.xavier_uniform_(self.fc2.weight)
nn.init.zeros_(self.fc2.bias)
def forward(self, x):
x = x.view(-1, 784) # 展平图像
x = self.activation(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
# 训练和测试函数
def train(model, optimizer, train_loader, epoch):
model.train()
total_loss = 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
total_loss += loss.item()
return total_loss / len(train_loader)
def test(model, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)
return test_loss, accuracy
# 实验配置
activation_fns = ['relu', 'sigmoid', 'tanh']
optimizers_config = {
'sgd': lambda params: optim.SGD(params, lr=0.01),
'momentum': lambda params: optim.SGD(params, lr=0.01, momentum=0.9),
'adam': lambda params: optim.Adam(params, lr=0.001)
}
results = []
# 主实验循环
for act_fn in activation_fns:
for opt_name, opt_fn in optimizers_config.items():
print(f"\n=== 实验: {act_fn.upper()} + {opt_name.upper()} ===")
# 初始化模型和优化器
model = MLP(activation_fn=act_fn).to(device)
optimizer = opt_fn(model.parameters())
train_losses = []
test_accuracies = []
start_time = time.time()
# 训练循环
for epoch in range(1, 11):
train_loss = train(model, optimizer, train_loader, epoch)
_, accuracy = test(model, test_loader)
train_losses.append(train_loss)
test_accuracies.append(accuracy)
print(f'Epoch {epoch}: 训练损失={train_loss:.4f}, 测试准确率={accuracy:.2f}%')
# 记录结果
training_time = time.time() - start_time
final_accuracy = test_accuracies[-1]
results.append({
'activation': act_fn,
'optimizer': opt_name,
'final_accuracy': final_accuracy,
'training_time': training_time,
'train_losses': train_losses,
'test_accuracies': test_accuracies
})
# 输出最终结果比较
print("\n=== 最终性能比较 ===")
print("激活函数 | 优化器 | 测试准确率 | 训练时间(s)")
print("---|---|---|---")
for res in results:
print(f"{res['activation']} | {res['optimizer']} | {res['final_accuracy']:.2f}% | {res['training_time']:.2f}")
运行结果:
