【深度学习】transformer的encoder部分,多特征多变量,双头,一头回归,一头分类的代码实现,并且分开embedding的

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

class CustomEmbedding(nn.Module):
    def __init__(self, num_embeddings, embedding_dim):
        super(CustomEmbedding, self).__init__()
        self.embeddings = nn.Parameter(torch.randn(num_embeddings, embedding_dim))

    def forward(self, input):
        return self.embeddings[input]

class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dims, d_model, n_heads, n_layers, seq_len, num_classes, output_steps):
        super(TimeSeriesTransformer, self).__init__()
        
        self.input_dims = input_dims  # List of input dimensions for each feature
        self.d_model = d_model
        self.n_heads = n_heads
        self.n_layers = n_layers
        self.seq_len = seq_len
        
        # Custom embedding layers for each input feature with specified dimensions 这里的词表大小应该是根据输入特征的值可以调整的 @todo
        self.embeddings = nn.ModuleList([
            CustomEmbedding(num_embeddings=100 if i == 0 else 50, embedding_dim=dim)
            for i, dim in enumerate(input_dims)
        ])
        
        self.positional_encoding = self.get_positional_encoding(seq_len, d_model)

        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=d_model, nhead=n_heads), 
            num_layers=n_layers
        )
        
        # Output layers
        self.fc_classification = nn.Linear(d_model, num_classes * output_steps)  # Output for classification (multi-step)
        self.fc_regression = nn.Linear(d_model, 1 * output_steps)  # Output for regression (multi-step)

    def get_positional_encoding(self, seq_len, d_model):
        position = np.arange(seq_len)[:, np.newaxis]
        div_term = np.exp(np.arange(0, d_model, 2) * -(np.log(10000.0) / d_model))
        
        pos_enc = np.zeros((seq_len, d_model))  # Create an array of shape (seq_len, d_model)
        pos_enc[:, 0::2] = np.sin(position * div_term)
        pos_enc[:, 1::2] = np.cos(position * div_term)

        return torch.FloatTensor(pos_enc).unsqueeze(0)  # Shape (1, seq_len, d_model)

    def forward(self, x):
        # Get embeddings for each input feature 32X10X2-->32X10-->embed
        embeddings = [embed(x[:, :, i]) for i, embed in enumerate(self.embeddings)] # 32X10X32, 32X10X16--->32X19X48
        x = torch.cat(embeddings, dim=-1)  # Concatenate embeddings along the last dimension 32X10X48
        
        # Ensure correct shape before adding positional encoding
        # print(f"Shape after embeddings: {x.shape}")  # Should be (batch_size, seq_len, sum(input_dims))

        # Check if input dimensions match d_model
        assert x.shape[-1] == self.d_model, "Concatenated embeddings do not match d_model."
        # x.shape 32X10X48,然后把它加上位置编码, x.size(1) = 10

        x += self.positional_encoding[:, :x.size(1), :]  # Adding positional encoding self.positional_encoding.shape  (1, 10, 48) 是把每一个位置的每一个维度都加上位置编码
        x = x.permute(1, 0, 2)  # Change to (seq_len, batch_size, d_model)
        # 输入是什么,输出还是什么
        x = self.transformer_encoder(x)  # (seq_len, batch_size, d_model)
        x = x.mean(dim=0)  # Global average pooling over the sequence  在dim=0上求平均值,也就是把每一个batch的每个时间步的对应的特征都求平均值,得到一个batch的特征表示,这一步太难理解了 
        
        class_output = self.fc_classification(x)  # (batch_size, num_classes * output_steps)
        reg_output = self.fc_regression(x)  # (batch_size, 1 * output_steps)
        
        return class_output.reshape(-1, output_steps, num_classes), reg_output.reshape(-1, output_steps, 1)

# Parameters
# Adjust total dimensions to match d_model
input_dims = [32, 16]  # Example dimensions for each input feature (sum should match d_model, which is 64)
d_model = sum(input_dims)  # Ensure this matches the total of input dimensions
n_heads = 4
n_layers = 2
seq_len = 10
batch_size = 32
num_classes = 3
output_steps = 5
num_samples = 1000
num_epochs = 100
learning_rate = 0.001

# Generate synthetic time series data,它这里设置1000个词表大小是有意义的,但实际可能并不是这样,因为他的两个输入特征都是0,999了
# X = torch.randint(0, 999, (num_samples, seq_len, len(input_dims)))  # Input indices
x1 = torch.randint(0, 99, (num_samples, seq_len, 1))
x2 = torch.randint(0, 49, (num_samples, seq_len, 1))
X = torch.cat([x1, x2], dim=-1)  # Concatenate along the last dimension
y_class = torch.randint(0, num_classes, (num_samples, output_steps)) 
y_reg = torch.randn(num_samples, output_steps, 1)

# Create dataset and loader
dataset = TensorDataset(X, y_class, y_reg) # 这个就是zip的作用
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) # 这个是根据batch_size将数据分成多个小batch 并且将切好片的部分打包

# Instantiate the model
model = TimeSeriesTransformer(input_dims, d_model, n_heads, n_layers, seq_len, num_classes, output_steps)

# Define loss functions and optimizer
criterion_class = nn.CrossEntropyLoss(ignore_index=-1)  
criterion_reg = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss_class = 0.0
    running_loss_reg = 0.0
    
    for inputs, labels_class, labels_reg in data_loader:
        optimizer.zero_grad()
        outputs_class, outputs_reg = model(inputs)
        
        loss_class = criterion_class(outputs_class.view(-1, num_classes), labels_class.view(-1)) #32*5*3-->32*5  160,3  160
        loss_reg = criterion_reg(outputs_reg.view(-1, 1), labels_reg.view(-1, 1)) # 32*5*1-->160
        
        total_loss = loss_class + loss_reg
        total_loss.backward()
        optimizer.step()
        
        running_loss_class += loss_class.item()
        running_loss_reg += loss_reg.item()
        
    print(f'Epoch [{epoch + 1}/{num_epochs}], '
          f'Classification Loss: {running_loss_class / len(data_loader):.4f}, '
          f'Regression Loss: {running_loss_reg / len(data_loader):.4f}')

# Evaluation
# Evaluation
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels_class, labels_reg in data_loader:
        outputs_class, outputs_reg = model(inputs)
        
        # Flatten softmax outputs for classification
        predicted = torch.argmax(outputs_class, dim=2).view(-1)  # Size should be (batch_size * output_steps)
        labels_flat = labels_class.view(-1)  # Size should be (batch_size * output_steps)

        # Count correct predictions
        total += labels_flat.size(0)  # Total number of samples
        correct += (predicted == labels_flat).sum().item()  # Count correct predictions

    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
相关推荐
南商7 分钟前
yolov5 解决:export GIT_PYTHON_REFRESH=quiet
深度学习·yolov5
withoutfeelings916 分钟前
实验13 使用预训练resnet18实现CIFAR-10分类
人工智能·深度学习·分类
禾风wyh21 分钟前
【深度学习】分类问题代码实战之初始手写数据集
人工智能·深度学习·分类
龙的爹233326 分钟前
论文 | LazyLLM: DYNAMIC TOKEN PRUNING FOR EFFICIENTLONG CONTEXT LLM INFERENCE
人工智能·深度学习·算法·机器学习·自然语言处理·prompt·剪枝
多吃轻食34 分钟前
大模型开发和微调工具Llama-Factory-->训练方法(SFT, RLHF, DPO, KTO)
人工智能·深度学习·算法·自然语言处理·llama
qgh122342 分钟前
VoCo-LLaMA: Towards Vision Compression with Large Language Models
人工智能·深度学习·语言模型·llama
机器学习之心2 小时前
顶刊算法 | 鱼鹰算法OOA-BiTCN-BiGRU-Attention多输入单输出回归预测(Maltab)
人工智能·深度学习·回归·多输入单输出回归预测·attention·ooa-bitcn-bigru
scdifsn2 小时前
动手学深度学习10.5. 多头注意力-笔记&练习(PyTorch)
pytorch·笔记·深度学习·注意力机制·多头注意力
墨绿色的摆渡人2 小时前
用 Python 从零开始创建神经网络(十四):L1 和 L2 正则化(L1 and L2 Regularization)
人工智能·python·深度学习·神经网络
DogDaoDao3 小时前
文生视频、图生视频 AI 大模型开源项目介绍【持续更新】
人工智能·深度学习·ai·开源·大模型·文生视频·图生视频