# 定义神经网络模型
# nn.Sequential 创建一个顺序容器,按顺序执行其中的层
net = nn.Sequential(
# 第一层:展平层
# 将输入的28x28图像展平为784维向量
nn.Flatten(),
# 第二层:第一个全连接层
# 输入维度784(展平后的像素数),输出维度256
nn.Linear(784, 256),
# 第三层:ReLU激活函数
# 引入非线性,解决线性不可分问题
nn.ReLU(),
# 第四层:第一个Dropout层
# dropout1是丢弃率参数,训练时随机丢弃神经元
# 防止过拟合,增强模型泛化能力
nn.Dropout(dropout1),
# 第五层:第二个全连接层
# 输入256维,输出256维
nn.Linear(256, 256),
# 第六层:第二个ReLU激活函数
nn.ReLU(),
# 第七层:第二个Dropout层
# dropout2是第二个丢弃率参数
nn.Dropout(dropout2),
# 第八层:输出层
# 输入256维,输出10维(对应10个类别)
# 这里没有使用Softmax,因为损失函数CrossEntropyLoss内部会处理
nn.Linear(256, 10)
)
# 定义权重初始化函数
def init_weights(m):
"""
对指定类型的层进行权重初始化
参数:
m: nn.Module对象,表示神经网络中的一个层
"""
# 检查当前层是否为全连接层
if type(m) == nn.Linear:
# 使用正态分布初始化权重
# std=0.01: 标准差0.01,使得初始权重较小
# 较小的初始权重有助于防止梯度爆炸/消失
nn.init.normal_(m.weight, std=0.01)
# 注意:这里没有初始化偏置,偏置默认初始化为0
# 如果需要初始化偏置,可以添加:
# if m.bias is not None:
# nn.init.constant_(m.bias, 0)
# 对网络中的所有模块应用权重初始化函数
net.apply(init_weights)
# apply函数会递归地将init_weights应用于net中的所有子模块
# 定义优化器
# 使用随机梯度下降优化器
trainer = torch.optim.SGD(
net.parameters(), # 需要优化的参数
lr=lr # 学习率
)
# 调用训练函数
d2l.train_ch3(
net, # 神经网络模型
train_iter, # 训练数据迭代器
test_iter, # 测试数据迭代器
loss, # 损失函数(通常为CrossEntropyLoss)
num_epochs, # 训练轮数
trainer # 优化器
)
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
# 假设这是代码的完整上下文
# 定义超参数
dropout1 = 0.2 # 第一个Dropout层的丢弃率
dropout2 = 0.5 # 第二个Dropout层的丢弃率
lr = 0.1 # 学习率
num_epochs = 10 # 训练轮数
batch_size = 256 # 批次大小
# 加载Fashion-MNIST数据集
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)) # MNIST的均值和标准差
])
train_dataset = datasets.FashionMNIST(
root='./data',
train=True,
download=True,
transform=transform
)
test_dataset = datasets.FashionMNIST(
root='./data',
train=False,
download=True,
transform=transform
)
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=True
)
test_loader = torch.utils.data.DataLoader(
test_dataset,
batch_size=batch_size,
shuffle=False
)
# 定义损失函数
loss_fn = nn.CrossEntropyLoss()
# 训练函数
def train_epoch(net, train_loader, loss_fn, optimizer, device):
"""训练一个epoch"""
net.train() # 设置为训练模式
total_loss = 0
correct = 0
total = 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
# 前向传播
output = net(data)
loss = loss_fn(output, target)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 统计
total_loss += loss.item()
_, predicted = output.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
avg_loss = total_loss / len(train_loader)
accuracy = 100. * correct / total
return avg_loss, accuracy
# 测试函数
def test(net, test_loader, loss_fn, device):
"""测试模型"""
net.eval() # 设置为评估模式
total_loss = 0
correct = 0
total = 0
with torch.no_grad(): # 不计算梯度
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = net(data)
loss = loss_fn(output, target)
total_loss += loss.item()
_, predicted = output.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
avg_loss = total_loss / len(test_loader)
accuracy = 100. * correct / total
return avg_loss, accuracy
# 主训练循环
def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 创建网络
net = nn.Sequential(
nn.Flatten(),
nn.Linear(784, 256),
nn.ReLU(),
nn.Dropout(dropout1),
nn.Linear(256, 256),
nn.ReLU(),
nn.Dropout(dropout2),
nn.Linear(256, 10)
).to(device)
# 初始化权重
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight, std=0.01)
net.apply(init_weights)
# 优化器
optimizer = optim.SGD(net.parameters(), lr=lr)
# 记录训练历史
train_losses, train_accs = [], []
test_losses, test_accs = [], []
# 训练循环
for epoch in range(1, num_epochs + 1):
# 训练
train_loss, train_acc = train_epoch(net, train_loader, loss_fn, optimizer, device)
train_losses.append(train_loss)
train_accs.append(train_acc)
# 测试
test_loss, test_acc = test(net, test_loader, loss_fn, device)
test_losses.append(test_loss)
test_accs.append(test_acc)
print(f'Epoch {epoch:2d}: '
f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% | '
f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
return net, train_losses, train_accs, test_losses, test_accs
# 运行训练
if __name__ == "__main__":
net, train_losses, train_accs, test_losses, test_accs = main()
# 可视化结果
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
# 损失曲线
axes[0].plot(train_losses, label='Train Loss')
axes[0].plot(test_losses, label='Test Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Test Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# 准确率曲线
axes[1].plot(train_accs, label='Train Accuracy')
axes[1].plot(test_accs, label='Test Accuracy')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Training and Test Accuracy')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()