【深度学习】transformer的encoder部分,多特征多变量,双头,一头回归,一头分类的代码实现,并且分开embedding的

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

class CustomEmbedding(nn.Module):
    def __init__(self, num_embeddings, embedding_dim):
        super(CustomEmbedding, self).__init__()
        self.embeddings = nn.Parameter(torch.randn(num_embeddings, embedding_dim))

    def forward(self, input):
        return self.embeddings[input]

class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dims, d_model, n_heads, n_layers, seq_len, num_classes, output_steps):
        super(TimeSeriesTransformer, self).__init__()
        
        self.input_dims = input_dims  # List of input dimensions for each feature
        self.d_model = d_model
        self.n_heads = n_heads
        self.n_layers = n_layers
        self.seq_len = seq_len
        
        # Custom embedding layers for each input feature with specified dimensions 这里的词表大小应该是根据输入特征的值可以调整的 @todo
        self.embeddings = nn.ModuleList([
            CustomEmbedding(num_embeddings=100 if i == 0 else 50, embedding_dim=dim)
            for i, dim in enumerate(input_dims)
        ])
        
        self.positional_encoding = self.get_positional_encoding(seq_len, d_model)

        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=d_model, nhead=n_heads), 
            num_layers=n_layers
        )
        
        # Output layers
        self.fc_classification = nn.Linear(d_model, num_classes * output_steps)  # Output for classification (multi-step)
        self.fc_regression = nn.Linear(d_model, 1 * output_steps)  # Output for regression (multi-step)

    def get_positional_encoding(self, seq_len, d_model):
        position = np.arange(seq_len)[:, np.newaxis]
        div_term = np.exp(np.arange(0, d_model, 2) * -(np.log(10000.0) / d_model))
        
        pos_enc = np.zeros((seq_len, d_model))  # Create an array of shape (seq_len, d_model)
        pos_enc[:, 0::2] = np.sin(position * div_term)
        pos_enc[:, 1::2] = np.cos(position * div_term)

        return torch.FloatTensor(pos_enc).unsqueeze(0)  # Shape (1, seq_len, d_model)

    def forward(self, x):
        # Get embeddings for each input feature 32X10X2-->32X10-->embed
        embeddings = [embed(x[:, :, i]) for i, embed in enumerate(self.embeddings)] # 32X10X32, 32X10X16--->32X19X48
        x = torch.cat(embeddings, dim=-1)  # Concatenate embeddings along the last dimension 32X10X48
        
        # Ensure correct shape before adding positional encoding
        # print(f"Shape after embeddings: {x.shape}")  # Should be (batch_size, seq_len, sum(input_dims))

        # Check if input dimensions match d_model
        assert x.shape[-1] == self.d_model, "Concatenated embeddings do not match d_model."
        # x.shape 32X10X48,然后把它加上位置编码, x.size(1) = 10

        x += self.positional_encoding[:, :x.size(1), :]  # Adding positional encoding self.positional_encoding.shape  (1, 10, 48) 是把每一个位置的每一个维度都加上位置编码
        x = x.permute(1, 0, 2)  # Change to (seq_len, batch_size, d_model)
        # 输入是什么,输出还是什么
        x = self.transformer_encoder(x)  # (seq_len, batch_size, d_model)
        x = x.mean(dim=0)  # Global average pooling over the sequence  在dim=0上求平均值,也就是把每一个batch的每个时间步的对应的特征都求平均值,得到一个batch的特征表示,这一步太难理解了 
        
        class_output = self.fc_classification(x)  # (batch_size, num_classes * output_steps)
        reg_output = self.fc_regression(x)  # (batch_size, 1 * output_steps)
        
        return class_output.reshape(-1, output_steps, num_classes), reg_output.reshape(-1, output_steps, 1)

# Parameters
# Adjust total dimensions to match d_model
input_dims = [32, 16]  # Example dimensions for each input feature (sum should match d_model, which is 64)
d_model = sum(input_dims)  # Ensure this matches the total of input dimensions
n_heads = 4
n_layers = 2
seq_len = 10
batch_size = 32
num_classes = 3
output_steps = 5
num_samples = 1000
num_epochs = 100
learning_rate = 0.001

# Generate synthetic time series data,它这里设置1000个词表大小是有意义的,但实际可能并不是这样,因为他的两个输入特征都是0,999了
# X = torch.randint(0, 999, (num_samples, seq_len, len(input_dims)))  # Input indices
x1 = torch.randint(0, 99, (num_samples, seq_len, 1))
x2 = torch.randint(0, 49, (num_samples, seq_len, 1))
X = torch.cat([x1, x2], dim=-1)  # Concatenate along the last dimension
y_class = torch.randint(0, num_classes, (num_samples, output_steps)) 
y_reg = torch.randn(num_samples, output_steps, 1)

# Create dataset and loader
dataset = TensorDataset(X, y_class, y_reg) # 这个就是zip的作用
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) # 这个是根据batch_size将数据分成多个小batch 并且将切好片的部分打包

# Instantiate the model
model = TimeSeriesTransformer(input_dims, d_model, n_heads, n_layers, seq_len, num_classes, output_steps)

# Define loss functions and optimizer
criterion_class = nn.CrossEntropyLoss(ignore_index=-1)  
criterion_reg = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss_class = 0.0
    running_loss_reg = 0.0
    
    for inputs, labels_class, labels_reg in data_loader:
        optimizer.zero_grad()
        outputs_class, outputs_reg = model(inputs)
        
        loss_class = criterion_class(outputs_class.view(-1, num_classes), labels_class.view(-1)) #32*5*3-->32*5  160,3  160
        loss_reg = criterion_reg(outputs_reg.view(-1, 1), labels_reg.view(-1, 1)) # 32*5*1-->160
        
        total_loss = loss_class + loss_reg
        total_loss.backward()
        optimizer.step()
        
        running_loss_class += loss_class.item()
        running_loss_reg += loss_reg.item()
        
    print(f'Epoch [{epoch + 1}/{num_epochs}], '
          f'Classification Loss: {running_loss_class / len(data_loader):.4f}, '
          f'Regression Loss: {running_loss_reg / len(data_loader):.4f}')

# Evaluation
# Evaluation
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels_class, labels_reg in data_loader:
        outputs_class, outputs_reg = model(inputs)
        
        # Flatten softmax outputs for classification
        predicted = torch.argmax(outputs_class, dim=2).view(-1)  # Size should be (batch_size * output_steps)
        labels_flat = labels_class.view(-1)  # Size should be (batch_size * output_steps)

        # Count correct predictions
        total += labels_flat.size(0)  # Total number of samples
        correct += (predicted == labels_flat).sum().item()  # Count correct predictions

    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
相关推荐
Icomi_1 小时前
【外文原版书阅读】《机器学习前置知识》1.线性代数的重要性,初识向量以及向量加法
c语言·c++·人工智能·深度学习·神经网络·机器学习·计算机视觉
IT古董1 小时前
【深度学习】常见模型-生成对抗网络(Generative Adversarial Network, GAN)
人工智能·深度学习·生成对抗网络
Jackilina_Stone1 小时前
【论文阅读笔记】“万字”关于深度学习的图像和视频阴影检测、去除和生成的综述笔记 | 2024.9.3
论文阅读·人工智能·笔记·深度学习·ai
梦云澜1 小时前
论文阅读(三):微阵列数据的图形模型和多变量分析
论文阅读·深度学习
梦云澜1 小时前
论文阅读(二):理解概率图模型的两个要点:关于推理和学习的知识
论文阅读·深度学习·学习
羊小猪~~2 小时前
深度学习项目--基于LSTM的糖尿病预测探究(pytorch实现)
人工智能·pytorch·rnn·深度学习·神经网络·机器学习·lstm
陌北v12 小时前
PyTorch广告点击率预测(CTR)利用深度学习提升广告效果
人工智能·pytorch·python·深度学习·ctr
算法黑哥6 小时前
损失函数曲面变平坦的方法
深度学习·对抗攻击
点云SLAM13 小时前
CVPR 2024 无人机/遥感/卫星图像方向总汇(航空图像和交叉视角定位)
深度学习·计算机视觉·cvpr·遥感·卫星图像·交叉视觉定位
白白糖14 小时前
深度学习 Pytorch 单层神经网络
人工智能·pytorch·深度学习·神经网络