这两天看完了RNN、GRU和LSTM,下面给出一个使用PyTorch实现的、不同神经网络实现简单直观的项目,用于MNIST手写数字识别,同样作为一个深度学习模板进行学习记忆。
RNN:
import os
# 设置 PyTorch 数据集国内镜像
os.environ["TORCHVISION_DATASET_MIRROR"] = "https://mirrors.tuna.tsinghua.edu.cn/pytorch-datasets"
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
# 1. 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
# 2. 超参数
input_size = 28
sequence_length = 28
hidden_size = 128
num_classes = 10
num_epochs = 5
batch_size = 64
learning_rate = 0.001
# 3. 数据预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Lambda(lambda x: x.squeeze(0))
])
# 已有数据集后把 download=True 改为 download=False
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
class SimpleLSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super().__init__()
self.hidden_size = hidden_size
self.lstm = nn.RNN(input_size, hidden_size, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x):
batch = x.size(0)
# LSTM 需要 h0、c0 两个初始状态
h0 = torch.zeros(1, batch, self.hidden_size).to(x.device)
# c0 = torch.zeros(1, batch, self.hidden_size).to(x.device)
out, _ = self.lstm(x, h0)
# 取最后一个时间步输出
out = out[:, -1, :]
out = self.fc(out)
return out
model = SimpleLSTM(input_size, hidden_size, num_classes).to(device)
# 5. 损失函数、优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# 6. 训练循环
print("开始训练...")
train_losses = []
for epoch in range(num_epochs):
model.train()
total_loss = 0.0
for images, labels in train_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(train_loader)
train_losses.append(avg_loss)
print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")
# 7. 测试评估
model.eval()
correct = 0
total = 0
with torch.no_grad():
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f"\n测试集准确率: {100 * correct / total:.2f}%")
# 8. 可视化单张图片
first_image = test_dataset[0][0].cpu().numpy()
first_label = test_dataset[0][1]
model.eval()
with torch.no_grad():
pred = model(test_dataset[0][0].unsqueeze(0).to(device))
pred_class = torch.argmax(pred, dim=1).item()
plt.imshow(first_image, cmap='gray')
plt.title(f"真实标签: {first_label}, 预测结果: {pred_class}")
plt.show()
GRU:
import os
# 设置 PyTorch 数据集国内镜像
os.environ["TORCHVISION_DATASET_MIRROR"] = "https://mirrors.tuna.tsinghua.edu.cn/pytorch-datasets"
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
# 1. 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
# 2. 超参数
input_size = 28
sequence_length = 28
hidden_size = 128
num_classes = 10
num_epochs = 5
batch_size = 64
learning_rate = 0.001
# 3. 数据预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Lambda(lambda x: x.squeeze(0))
])
# 已有数据集后把 download=True 改为 download=False
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
class SimpleLSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super().__init__()
self.hidden_size = hidden_size
self.lstm = nn.GRU(input_size, hidden_size, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x):
batch = x.size(0)
# LSTM 需要 h0、c0 两个初始状态
h0 = torch.zeros(1, batch, self.hidden_size).to(x.device)
# c0 = torch.zeros(1, batch, self.hidden_size).to(x.device)
out, _ = self.lstm(x, h0)
# 取最后一个时间步输出
out = out[:, -1, :]
out = self.fc(out)
return out
model = SimpleLSTM(input_size, hidden_size, num_classes).to(device)
# 5. 损失函数、优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# 6. 训练循环
print("开始训练...")
train_losses = []
for epoch in range(num_epochs):
model.train()
total_loss = 0.0
for images, labels in train_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(train_loader)
train_losses.append(avg_loss)
print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")
# 7. 测试评估
model.eval()
correct = 0
total = 0
with torch.no_grad():
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f"\n测试集准确率: {100 * correct / total:.2f}%")
# 8. 可视化单张图片
first_image = test_dataset[0][0].cpu().numpy()
first_label = test_dataset[0][1]
model.eval()
with torch.no_grad():
pred = model(test_dataset[0][0].unsqueeze(0).to(device))
pred_class = torch.argmax(pred, dim=1).item()
plt.imshow(first_image, cmap='gray')
plt.title(f"真实标签: {first_label}, 预测结果: {pred_class}")
plt.show()
LSTM:
import os
# 设置 PyTorch 数据集国内镜像
os.environ["TORCHVISION_DATASET_MIRROR"] = "https://mirrors.tuna.tsinghua.edu.cn/pytorch-datasets"
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
# 1. 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
# 2. 超参数
input_size = 28
sequence_length = 28
hidden_size = 128
num_classes = 10
num_epochs = 5
batch_size = 64
learning_rate = 0.001
# 3. 数据预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Lambda(lambda x: x.squeeze(0))
])
# 已有数据集后把 download=True 改为 download=False
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# 4. LSTM 模型(修复双状态问题)
class SimpleLSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super().__init__()
self.hidden_size = hidden_size
# LSTM 不要加 nonlinearity 参数(LSTM 内部固定激活)
self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x):
batch = x.size(0)
# LSTM 需要 h0、c0 两个初始状态
h0 = torch.zeros(1, batch, self.hidden_size).to(x.device)
c0 = torch.zeros(1, batch, self.hidden_size).to(x.device)
out, _ = self.lstm(x, (h0,c0))
# 取最后一个时间步输出
out = out[:, -1, :]
out = self.fc(out)
return out
model = SimpleLSTM(input_size, hidden_size, num_classes).to(device)
# 5. 损失函数、优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# 6. 训练循环
print("开始训练...")
train_losses = []
for epoch in range(num_epochs):
model.train()
total_loss = 0.0
for images, labels in train_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(train_loader)
train_losses.append(avg_loss)
print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")
# 7. 测试评估
model.eval()
correct = 0
total = 0
with torch.no_grad():
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f"\n测试集准确率: {100 * correct / total:.2f}%")
# 8. 可视化单张图片
first_image = test_dataset[0][0].cpu().numpy()
first_label = test_dataset[0][1]
model.eval()
with torch.no_grad():
pred = model(test_dataset[0][0].unsqueeze(0).to(device))
pred_class = torch.argmax(pred, dim=1).item()
plt.imshow(first_image, cmap='gray')
plt.title(f"真实标签: {first_label}, 预测结果: {pred_class}")
plt.show()