利用pytorch对加噪堆叠自编码器在MNIST数据集进行训练和验证

实现背景:

最近在复现关于使用深度学习提取特征来进行聚类的论文,其中使用到了加噪堆叠自编码器,具体实现细节请参考论文:Improved Deep Embedded Clustering with Local Structure Preservation

其中加噪堆叠自编码器涉及两个过程:

预训练:预训练过程对原始数据加噪,贪婪式地对每一层encoder和decoder进行训练,其中训练新的AE时冻结前面训练好的AE。详见:堆栈自编码器 Stacked AutoEncoder-CSDN博客

微调:在预训练完成之后使用所有AE和原始数据对整体模型进行微调。

我在网上找到了一个SAE的示范样例:python-pytorch 利用pytorch对堆叠自编码器进行训练和验证_pytoch把训练和验证写一起的代码-CSDN博客

但是这篇博客的数据集很小,如果应用到MNIST数据集时显存很容易溢出,因此我在原始的基础上进行了改进,直接上代码:

初始化数据集:

复制代码
import torch
from torchvision import datasets, transforms
from torch.utils.data import Dataset, random_split
# 定义数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# 下载并加载 MNIST 训练数据集
original_dataset = datasets.MNIST(root='./data', train=True,
                                  download=False, transform=transform)


class NoLabelDataset(Dataset):
    def __init__(self, original_dataset):
        self.original_dataset = original_dataset

    def __getitem__(self, index):
        image, _ = self.original_dataset[index]
        return image

    def __len__(self):
        return len(self.original_dataset)


# 创建不包含标签的数据集
no_label_dataset = NoLabelDataset(original_dataset)

# 划分训练集和验证集
train_size = int(0.8 * len(no_label_dataset))
val_size = len(no_label_dataset) - train_size
train_dataset, val_dataset = random_split(no_label_dataset, [train_size, val_size])

# 创建数据加载器
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)

print(f"训练集样本数量: {len(train_dataset)}")
print(f"验证集样本数量: {len(val_dataset)}")    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

定义模型和训练函数:

复制代码
import torch.nn as nn

class Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(input_size, hidden_size, kernel_size=3, stride=1, padding=1),  # 输入通道1,输出通道16
            nn.ReLU())
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(hidden_size, input_size, kernel_size=3, stride=1, padding=1),
            nn.ReLU())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

def train_ae(models, train_loader, val_loader, num_epochs, criterion, optimizer, noise_factor, finetune):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    for epoch in range(num_epochs):
        # Training
        models[-1].train()
        train_loss = 0
        for batch_data in train_loader:
            optimizer.zero_grad()
            if len(models) != 1:
                batch_data = batch_data.to(device)
                for model in models[:-1]:
                    with torch.no_grad():
                        batch_data = model.encoder(batch_data)
                batch_data = batch_data.detach()
            if finetune == True:
                batch_data = batch_data.to(device)
                outputs = models[-1](batch_data)
                loss = criterion(outputs, batch_data)
            else:
                noisy_image = batch_data + noise_factor * torch.randn_like(batch_data)
                noisy_image = torch.clamp(noisy_image, 0., 1.).to(device)
                outputs = models[-1](noisy_image)
                batch_data = batch_data.to(device)
                loss = criterion(outputs, batch_data)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {train_loss:.4f}")

        # Validation
        models[-1].eval()
        val_loss = 0
        with torch.no_grad():
            for batch_data in val_loader:
                if len(models) != 1:
                    batch_data = batch_data.to(device)
                    for model in models[:-1]:
                        batch_data = model.encoder(batch_data)
                    batch_data = batch_data.detach()
                if finetune == True:
                    batch_data = batch_data.to(device)
                    outputs = models[-1](batch_data)
                    loss = criterion(outputs, batch_data)
                else:
                    noisy_image = batch_data + noise_factor * torch.randn_like(batch_data)
                    noisy_image = torch.clamp(noisy_image, 0., 1.).to(device)
                    outputs = models[-1](noisy_image)
                    batch_data = batch_data.to(device)
                    loss = criterion(outputs, batch_data)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.4f}")

模型训练以及微调:

复制代码
batch_size = 16
noise_factor = 0.4


ae1 = Autoencoder(input_size=1, hidden_size=16).to(device)
optimizer = torch.optim.Adam(ae1.parameters(), lr=0.001)
criterion = nn.MSELoss()
train_ae([ae1], train_loader, val_loader, 10, criterion, optimizer, noise_factor, finetune = False)


ae2 = Autoencoder(input_size=16, hidden_size=64).to(device)
optimizer = torch.optim.Adam(ae2.parameters(), lr=0.001)
train_ae([ae1, ae2], train_loader, val_loader, 10, criterion, optimizer, noise_factor, finetune = False)


ae3 = Autoencoder(input_size=64, hidden_size=128).to(device)
optimizer = torch.optim.Adam(ae3.parameters(), lr=0.001)
train_ae([ae1, ae2, ae3], train_loader, val_loader, 10, criterion, optimizer, noise_factor, finetune = False)

class StackedAutoencoder(nn.Module):
    def __init__(self, ae1, ae2, ae3):
        super(StackedAutoencoder, self).__init__()
        self.encoder = nn.Sequential(ae1.encoder, ae2.encoder, ae3.encoder)
        self.decoder = nn.Sequential(ae3.decoder, ae2.decoder, ae1.decoder)

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

sae = StackedAutoencoder(ae1, ae2, ae3)

optimizer = torch.optim.Adam(ae1.parameters(), lr=0.001)
criterion = nn.MSELoss()
train_ae([sae], train_loader, val_loader, 10, criterion, optimizer, noise_factor, finetune = True)

结果可视化:

复制代码
import matplotlib.pyplot as plt
import numpy as np
dataiter = iter(val_loader)
image = next(dataiter)[1]
print(image.shape)
image = image.to(device)


# 通过自编码器模型进行前向传播
with torch.no_grad():
    output = sae(image)

noise_factor = 0.4
noisy_image = image + noise_factor * torch.randn_like(image)
noisy_image = torch.clamp(noisy_image, 0., 1.).cpu().numpy()

# 将张量转换为 numpy 数组以便可视化
image = image.cpu().numpy()
output = output.cpu().numpy()

# 定义一个函数来显示图片
def imshow(img):
    img = img * 0.3081 + 0.1307  # 反归一化
    npimg = img.squeeze()  # 去除单维度
    plt.imshow(npimg, cmap='gray')

# 可视化输入和输出图片
plt.figure(figsize=(10, 5))

# 显示输入图像
plt.subplot(1, 3, 1)
imshow(torch.from_numpy(image))
plt.title('Input Image')
plt.axis('off')

# 显示加噪图像
plt.subplot(1, 3, 2)
imshow(torch.from_numpy(noisy_image))
plt.title('Noisy Image')
plt.axis('off')

# 显示输出图像
plt.subplot(1, 3, 3)
imshow(torch.from_numpy(output))
plt.title('Output Image')
plt.axis('off')

plt.savefig("预训练图.svg", dpi=300,format="svg")

下面附上训练好的可视化图:

相关推荐
X.AI6668 分钟前
YouTube评论情感分析项目84%正确率:基于BERT的实战复现与原理解析
人工智能·深度学习·bert
Python×CATIA工业智造11 分钟前
Pycatia二次开发基础代码解析:组件识别、选择反转与链接创建技术解析
python·pycharm
艾莉丝努力练剑14 分钟前
【C++:继承】面向对象编程精要:C++继承机制深度解析与最佳实践
开发语言·c++·人工智能·继承·c++进阶
小宁爱Python29 分钟前
从零搭建 RAG 智能问答系统 6:Text2SQL 与工作流实现数据库查询
数据库·人工智能·python·django
Hard_Liquor30 分钟前
Datawhale秋训营-“大运河杯”数据开发应用创新大赛
人工智能·深度学习·算法
m0_7482412336 分钟前
Java注解与反射实现日志与校验
java·开发语言·python
运维行者_44 分钟前
AWS云服务故障复盘——从故障中汲取的 IT 运维经验
大数据·linux·运维·服务器·人工智能·云计算·aws
Saniffer_SH1 小时前
搭载高性能GPU的英伟达Nvidia DGX Spark桌面性能小怪兽国内首台开箱视频!
人工智能·深度学习·神经网络·ubuntu·机器学习·语言模型·边缘计算
数字化脑洞实验室1 小时前
AI决策vs人工决策:效率的底层逻辑与选择边界
人工智能
可触的未来,发芽的智生1 小时前
追根索源:换不同的词嵌入(词向量生成方式不同,但词与词关系接近),会出现什么结果?
javascript·人工智能·python·神经网络·自然语言处理