大模型算法学习2026.6.13

这两天看完了RNN、GRU和LSTM，下面给出一个使用PyTorch实现的、不同神经网络实现简单直观的项目，用于MNIST手写数字识别，同样作为一个深度学习模板进行学习记忆。

RNN:

复制代码

import os
# 设置 PyTorch 数据集国内镜像
os.environ["TORCHVISION_DATASET_MIRROR"] = "https://mirrors.tuna.tsinghua.edu.cn/pytorch-datasets"

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

# 1. 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")

# 2. 超参数
input_size = 28
sequence_length = 28
hidden_size = 128
num_classes = 10
num_epochs = 5
batch_size = 64
learning_rate = 0.001

# 3. 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.squeeze(0))
])

# 已有数据集后把 download=True 改为 download=False
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset  = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

class SimpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        batch = x.size(0)
        # LSTM 需要 h0、c0 两个初始状态
        h0 = torch.zeros(1, batch, self.hidden_size).to(x.device)
        # c0 = torch.zeros(1, batch, self.hidden_size).to(x.device)
        out, _ = self.lstm(x, h0)
        # 取最后一个时间步输出
        out = out[:, -1, :]
        out = self.fc(out)
        return out

model = SimpleLSTM(input_size, hidden_size, num_classes).to(device)

# 5. 损失函数、优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 6. 训练循环
print("开始训练...")
train_losses = []
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    train_losses.append(avg_loss)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

# 7. 测试评估
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"\n测试集准确率: {100 * correct / total:.2f}%")

# 8. 可视化单张图片
first_image = test_dataset[0][0].cpu().numpy()
first_label = test_dataset[0][1]
model.eval()
with torch.no_grad():
    pred = model(test_dataset[0][0].unsqueeze(0).to(device))
    pred_class = torch.argmax(pred, dim=1).item()

plt.imshow(first_image, cmap='gray')
plt.title(f"真实标签: {first_label}, 预测结果: {pred_class}")
plt.show()

GRU:

复制代码

import os
# 设置 PyTorch 数据集国内镜像
os.environ["TORCHVISION_DATASET_MIRROR"] = "https://mirrors.tuna.tsinghua.edu.cn/pytorch-datasets"

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

# 1. 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")

# 2. 超参数
input_size = 28
sequence_length = 28
hidden_size = 128
num_classes = 10
num_epochs = 5
batch_size = 64
learning_rate = 0.001

# 3. 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.squeeze(0))
])

# 已有数据集后把 download=True 改为 download=False
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset  = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

class SimpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        batch = x.size(0)
        # LSTM 需要 h0、c0 两个初始状态
        h0 = torch.zeros(1, batch, self.hidden_size).to(x.device)
        # c0 = torch.zeros(1, batch, self.hidden_size).to(x.device)
        out, _ = self.lstm(x, h0)
        # 取最后一个时间步输出
        out = out[:, -1, :]
        out = self.fc(out)
        return out

model = SimpleLSTM(input_size, hidden_size, num_classes).to(device)

# 5. 损失函数、优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 6. 训练循环
print("开始训练...")
train_losses = []
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    train_losses.append(avg_loss)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

# 7. 测试评估
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"\n测试集准确率: {100 * correct / total:.2f}%")

# 8. 可视化单张图片
first_image = test_dataset[0][0].cpu().numpy()
first_label = test_dataset[0][1]
model.eval()
with torch.no_grad():
    pred = model(test_dataset[0][0].unsqueeze(0).to(device))
    pred_class = torch.argmax(pred, dim=1).item()

plt.imshow(first_image, cmap='gray')
plt.title(f"真实标签: {first_label}, 预测结果: {pred_class}")
plt.show()

LSTM:

复制代码

import os
# 设置 PyTorch 数据集国内镜像
os.environ["TORCHVISION_DATASET_MIRROR"] = "https://mirrors.tuna.tsinghua.edu.cn/pytorch-datasets"

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

# 1. 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")

# 2. 超参数
input_size = 28
sequence_length = 28
hidden_size = 128
num_classes = 10
num_epochs = 5
batch_size = 64
learning_rate = 0.001

# 3. 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.squeeze(0))
])

# 已有数据集后把 download=True 改为 download=False
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset  = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 4. LSTM 模型（修复双状态问题）
class SimpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        # LSTM 不要加 nonlinearity 参数（LSTM 内部固定激活）
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        batch = x.size(0)
        # LSTM 需要 h0、c0 两个初始状态
        h0 = torch.zeros(1, batch, self.hidden_size).to(x.device)
        c0 = torch.zeros(1, batch, self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0,c0))
        # 取最后一个时间步输出
        out = out[:, -1, :]
        out = self.fc(out)
        return out

model = SimpleLSTM(input_size, hidden_size, num_classes).to(device)

# 5. 损失函数、优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 6. 训练循环
print("开始训练...")
train_losses = []
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    train_losses.append(avg_loss)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

# 7. 测试评估
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"\n测试集准确率: {100 * correct / total:.2f}%")

# 8. 可视化单张图片
first_image = test_dataset[0][0].cpu().numpy()
first_label = test_dataset[0][1]
model.eval()
with torch.no_grad():
    pred = model(test_dataset[0][0].unsqueeze(0).to(device))
    pred_class = torch.argmax(pred, dim=1).item()

plt.imshow(first_image, cmap='gray')
plt.title(f"真实标签: {first_label}, 预测结果: {pred_class}")
plt.show()