自定义数据在深度学习中的应用方法
我将通过几个实际案例来展示如何将非标准数据应用到深度学习中。这些案例涵盖了传感器信号、时间序列、图数据等常见类型。
案例1:传感器信号分类(工业振动数据)
python
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
class VibrationSensorDataset(Dataset):
"""工业设备振动传感器数据集"""
def __init__(self, n_samples=1000, seq_length=256, n_channels=3):
self.seq_length = seq_length
self.n_channels = n_channels
self.class_names = ['正常', '轴承磨损', '不平衡', '不对中']
self.n_classes = len(self.class_names)
# 模拟振动传感器数据生成
self.data = []
self.labels = []
for i in range(n_samples):
label = i % self.n_classes
self.labels.append(label)
# 根据故障类型生成不同的振动信号
if label == 0: # 正常
signal = self._generate_normal_vibration()
elif label == 1: # 轴承磨损
signal = self._generate_bearing_wear_vibration()
elif label == 2: # 不平衡
signal = self._generate_unbalance_vibration()
else: # 不对中
signal = self._generate_misalignment_vibration()
self.data.append(signal)
self.data = np.array(self.data)
self.labels = np.array(self.labels)
def _generate_normal_vibration(self):
"""生成正常振动信号"""
t = np.linspace(0, 1, self.seq_length)
signals = []
for ch in range(self.n_channels):
# 正常振动:低频为主,噪声较小
base_freq = 50 # 50Hz基频
signal = 0.5 * np.sin(2 * np.pi * base_freq * t)
signal += 0.2 * np.sin(2 * np.pi * 2 * base_freq * t) # 二次谐波
signal += 0.1 * np.sin(2 * np.pi * 3 * base_freq * t) # 三次谐波
signal += 0.05 * np.random.randn(self.seq_length) # 少量噪声
signals.append(signal)
return np.array(signals).T # 形状: (seq_length, n_channels)
def _generate_bearing_wear_vibration(self):
"""生成轴承磨损振动信号"""
t = np.linspace(0, 1, self.seq_length)
signals = []
for ch in range(self.n_channels):
base_freq = 50
signal = 0.3 * np.sin(2 * np.pi * base_freq * t)
# 轴承磨损特征频率(示例)
bearing_freq = 4.2 * base_freq # 轴承故障频率
signal += 0.4 * np.sin(2 * np.pi * bearing_freq * t)
signal += 0.3 * np.random.randn(self.seq_length) # 噪声增加
# 添加冲击特征
for i in range(5):
idx = int(0.1 * i * self.seq_length)
if idx < self.seq_length:
signal[idx:idx+10] += 1.0 * np.exp(-np.arange(10)/2.0)
signals.append(signal)
return np.array(signals).T
def _generate_unbalance_vibration(self):
"""生成不平衡振动信号"""
t = np.linspace(0, 1, self.seq_length)
signals = []
for ch in range(self.n_channels):
base_freq = 50
# 不平衡:主要是一倍频振动增大
signal = 1.0 * np.sin(2 * np.pi * base_freq * t)
# 相位随通道变化
phase_shift = ch * np.pi / 3
signal += 0.3 * np.sin(2 * np.pi * base_freq * t + phase_shift)
signal += 0.1 * np.random.randn(self.seq_length)
signals.append(signal)
return np.array(signals).T
def _generate_misalignment_vibration(self):
"""生成不对中振动信号"""
t = np.linspace(0, 1, self.seq_length)
signals = []
for ch in range(self.n_channels):
base_freq = 50
# 不对中:二倍频和三倍频特征明显
signal = 0.4 * np.sin(2 * np.pi * base_freq * t)
signal += 0.6 * np.sin(2 * np.pi * 2 * base_freq * t) # 二倍频增强
signal += 0.4 * np.sin(2 * np.pi * 3 * base_freq * t) # 三倍频增强
signal += 0.2 * np.random.randn(self.seq_length)
signals.append(signal)
return np.array(signals).T
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
# 将数据转换为适合1D CNN的格式: (通道数, 序列长度)
data_tensor = torch.FloatTensor(self.data[idx]).transpose(0, 1)
label_tensor = torch.LongTensor([self.labels[idx]]).squeeze()
return data_tensor, label_tensor
class VibrationCNN1D(nn.Module):
"""1D CNN用于振动信号分类"""
def __init__(self, n_channels=3, n_classes=4):
super(VibrationCNN1D, self).__init__()
self.conv_layers = nn.Sequential(
# 第一层卷积
nn.Conv1d(n_channels, 64, kernel_size=7, padding=3),
nn.BatchNorm1d(64),
nn.ReLU(),
nn.MaxPool1d(2),
# 第二层卷积
nn.Conv1d(64, 128, kernel_size=5, padding=2),
nn.BatchNorm1d(128),
nn.ReLU(),
nn.MaxPool1d(2),
# 第三层卷积
nn.Conv1d(128, 256, kernel_size=3, padding=1),
nn.BatchNorm1d(256),
nn.ReLU(),
nn.MaxPool1d(2),
)
self.fc_layers = nn.Sequential(
nn.Linear(256 * 32, 128), # 假设输入序列长度256,经过3次池化后为32
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(128, n_classes)
)
def forward(self, x):
x = self.conv_layers(x)
x = x.view(x.size(0), -1) # 展平
x = self.fc_layers(x)
return x
# 使用示例
def train_vibration_classifier():
# 创建数据集
dataset = VibrationSensorDataset(n_samples=2000, seq_length=256, n_channels=3)
# 分割数据集
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
dataset, [train_size, val_size, test_size]
)
# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
# 创建模型
model = VibrationCNN1D(n_channels=3, n_classes=4)
# 训练过程...
# (训练代码与之前的示例类似,这里省略)
# 可视化振动信号
def visualize_vibration_signals():
dataset = VibrationSensorDataset(n_samples=4, seq_length=256, n_channels=3)
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
axes = axes.flatten()
for i in range(4):
data, label = dataset[i]
# data形状: (3, 256) - 转置为 (256, 3) 用于绘图
data = data.numpy().T
for ch in range(3):
axes[i].plot(data[:, ch], label=f'通道{ch+1}', alpha=0.7)
axes[i].set_title(f'类别: {dataset.class_names[label.item()]}')
axes[i].legend()
axes[i].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
案例2:时间序列预测(股票价格/销售量)
python
class TimeSeriesDataset(Dataset):
"""时间序列预测数据集"""
def __init__(self, data, lookback=60, forecast_horizon=30):
"""
Args:
data: 时间序列数据,形状为 (n_timesteps, n_features)
lookback: 使用过去多少个时间步进行预测
forecast_horizon: 预测未来多少个时间步
"""
self.lookback = lookback
self.forecast_horizon = forecast_horizon
# 创建样本
self.samples = []
self.targets = []
for i in range(lookback, len(data) - forecast_horizon):
sample = data[i-lookback:i] # 过去lookback个时间点
target = data[i:i+forecast_horizon, 0] # 预测未来forecast_horizon个时间点(第一个特征)
self.samples.append(sample)
self.targets.append(target)
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
sample = torch.FloatTensor(self.samples[idx])
target = torch.FloatTensor(self.targets[idx])
return sample, target
class TimeSeriesLSTM(nn.Module):
"""LSTM用于时间序列预测"""
def __init__(self, input_dim, hidden_dim=128, output_dim=1, num_layers=2):
super(TimeSeriesLSTM, self).__init__()
self.lstm = nn.LSTM(
input_size=input_dim,
hidden_size=hidden_dim,
num_layers=num_layers,
batch_first=True,
dropout=0.2
)
self.fc = nn.Sequential(
nn.Linear(hidden_dim, 64),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(64, output_dim)
)
def forward(self, x):
# x形状: (batch_size, lookback, input_dim)
lstm_out, _ = self.lstm(x)
# 只使用最后一个时间步的输出进行预测
last_output = lstm_out[:, -1, :]
# 预测多个时间步
output = self.fc(last_output)
return output
# 生成模拟时间序列数据
def generate_synthetic_time_series(n_timesteps=1000, n_features=5):
"""生成模拟时间序列数据"""
t = np.arange(n_timesteps) / 100.0
data = np.zeros((n_timesteps, n_features))
# 第一个特征:趋势 + 季节性 + 噪声
data[:, 0] = 10 + 0.5 * t + 3 * np.sin(2 * np.pi * t / 50.0)
data[:, 0] += 2 * np.random.randn(n_timesteps)
# 其他特征:相关特征
for i in range(1, n_features):
data[:, i] = 0.7 * data[:, 0] + np.random.randn(n_timesteps) * 2
return data
案例3:图数据分类(社交网络/分子结构)
python
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, global_mean_pool
import numpy as np
class GraphDataset(Dataset):
"""图数据集"""
def __init__(self, n_graphs=500):
self.n_graphs = n_graphs
self.class_names = ['正常分子', '有毒分子', '药物分子', '催化剂']
self.graphs = []
self.labels = []
for i in range(n_graphs):
label = i % 4
self.labels.append(label)
# 生成随机图数据(模拟分子结构)
graph = self._generate_molecule_like_graph(label)
self.graphs.append(graph)
def _generate_molecule_like_graph(self, label):
"""生成类似分子的图结构"""
# 随机节点数
n_nodes = np.random.randint(10, 50)
# 节点特征(模拟原子类型)
node_features = np.random.randn(n_nodes, 8)
# 生成边(模拟化学键)
edges = []
for i in range(n_nodes):
# 每个节点连接1-4个其他节点(模拟化学键)
n_edges = np.random.randint(1, 5)
neighbors = np.random.choice(n_nodes, n_edges, replace=False)
for j in neighbors:
if i != j:
edges.append([i, j])
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
x = torch.tensor(node_features, dtype=torch.float)
y = torch.tensor([label], dtype=torch.long)
return Data(x=x, edge_index=edge_index, y=y)
def __len__(self):
return len(self.graphs)
def __getitem__(self, idx):
return self.graphs[idx]
class GNNModel(nn.Module):
"""图神经网络模型"""
def __init__(self, node_features=8, hidden_dim=64, num_classes=4):
super(GNNModel, self).__init__()
self.conv1 = GCNConv(node_features, hidden_dim)
self.conv2 = GCNConv(hidden_dim, hidden_dim * 2)
self.conv3 = GCNConv(hidden_dim * 2, hidden_dim)
self.fc = nn.Sequential(
nn.Linear(hidden_dim, 32),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(32, num_classes)
)
def forward(self, data, batch=None):
x, edge_index = data.x, data.edge_index
x = F.relu(self.conv1(x, edge_index))
x = F.dropout(x, p=0.3, training=self.training)
x = F.relu(self.conv2(x, edge_index))
x = F.dropout(x, p=0.3, training=self.training)
x = F.relu(self.conv3(x, edge_index))
if batch is not None:
x = global_mean_pool(x, batch)
else:
# 对于单个图,使用全局平均池化
x = torch.mean(x, dim=0, keepdim=True)
x = self.fc(x)
return F.log_softmax(x, dim=1)
案例4:多模态数据(图像+文本)
python
class MultimodalDataset(Dataset):
"""多模态数据集(图像 + 文本)"""
def __init__(self, n_samples=1000):
self.n_samples = n_samples
self.class_names = ['电子产品', '服装', '食品', '书籍']
self.image_data = []
self.text_data = []
self.labels = []
for i in range(n_samples):
label = i % 4
self.labels.append(label)
# 生成模拟图像数据
image = self._generate_synthetic_image(label)
self.image_data.append(image)
# 生成模拟文本描述
text = self._generate_synthetic_text(label)
self.text_data.append(text)
def _generate_synthetic_image(self, label):
"""生成合成图像数据"""
# 在实际应用中,这里会加载真实图像
# 这里用随机数据模拟图像特征
return torch.randn(3, 224, 224) # 模拟RGB图像
def _generate_synthetic_text(self, label):
"""生成合成文本数据"""
descriptions = {
0: ["高质量电子设备", "最新科技产品", "智能家居设备"],
1: ["时尚潮流服装", "舒适面料", "设计感强"],
2: ["新鲜有机食品", "营养丰富", "美味可口"],
3: ["精彩小说", "教育教材", "专业书籍"]
}
desc = np.random.choice(descriptions[label])
# 在实际应用中,这里会使用BERT等模型的tokenizer
# 这里用随机向量模拟文本嵌入
return torch.randn(768) # 模拟BERT嵌入
def __len__(self):
return self.n_samples
def __getitem__(self, idx):
image = self.image_data[idx]
text = self.text_data[idx]
label = torch.tensor([self.labels[idx]], dtype=torch.long).squeeze()
return (image, text), label
class MultimodalClassifier(nn.Module):
"""多模态分类器"""
def __init__(self, image_features=512, text_features=768, num_classes=4):
super(MultimodalClassifier, self).__init__()
# 图像分支
self.image_encoder = nn.Sequential(
nn.Linear(3*224*224, 1024),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, image_features)
)
# 文本分支
self.text_encoder = nn.Sequential(
nn.Linear(text_features, 512),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, text_features//2)
)
# 融合层
self.fusion = nn.Sequential(
nn.Linear(image_features + text_features//2, 256),
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(256, num_classes)
)
def forward(self, x):
image, text = x
# 处理图像
batch_size = image.shape[0]
image = image.view(batch_size, -1)
image_features = self.image_encoder(image)
# 处理文本
text_features = self.text_encoder(text)
# 特征融合
combined = torch.cat([image_features, text_features], dim=1)
output = self.fusion(combined)
return output
案例5:音频信号处理(语音命令识别)
python
class AudioDataset(Dataset):
"""音频数据集"""
def __init__(self, n_samples=1000, audio_length=16000): # 1秒音频,16kHz采样率
self.audio_length = audio_length
self.class_names = ['打开', '关闭', '前进', '后退', '停止']
self.audio_data = []
self.labels = []
for i in range(n_samples):
label = i % len(self.class_names)
self.labels.append(label)
# 生成模拟音频信号
audio = self._generate_synthetic_audio(label)
self.audio_data.append(audio)
def _generate_synthetic_audio(self, label):
"""生成合成音频数据"""
t = np.linspace(0, 1, self.audio_length)
if label == 0: # 打开
freq = 200
signal = np.sin(2 * np.pi * freq * t)
elif label == 1: # 关闭
freq = 300
signal = np.sin(2 * np.pi * freq * t)
elif label == 2: # 前进
freq = 400
signal = np.sin(2 * np.pi * freq * t)
elif label == 3: # 后退
freq = 500
signal = np.sin(2 * np.pi * freq * t)
else: # 停止
freq = 600
signal = np.sin(2 * np.pi * freq * t)
# 添加噪声和包络
signal = signal * (1 + 0.5 * np.sin(2 * np.pi * 5 * t)) # 调制包络
signal += 0.1 * np.random.randn(self.audio_length) # 添加噪声
return torch.FloatTensor(signal)
def __len__(self):
return len(self.audio_data)
def __getitem__(self, idx):
audio = self.audio_data[idx]
label = torch.tensor([self.labels[idx]], dtype=torch.long).squeeze()
return audio, label
class AudioClassifier(nn.Module):
"""音频分类器"""
def __init__(self, input_length=16000, num_classes=5):
super(AudioClassifier, self).__init__()
# 1D卷积处理原始音频
self.conv_layers = nn.Sequential(
nn.Conv1d(1, 64, kernel_size=80, stride=4),
nn.BatchNorm1d(64),
nn.ReLU(),
nn.MaxPool1d(4),
nn.Conv1d(64, 128, kernel_size=3),
nn.BatchNorm1d(128),
nn.ReLU(),
nn.MaxPool1d(4),
nn.Conv1d(128, 256, kernel_size=3),
nn.BatchNorm1d(256),
nn.ReLU(),
nn.MaxPool1d(4),
)
# 计算经过卷积后的特征维度
conv_output_length = self._get_conv_output_length(input_length)
self.fc_layers = nn.Sequential(
nn.Linear(256 * conv_output_length, 256),
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(256, num_classes)
)
def _get_conv_output_length(self, input_length):
"""计算经过卷积层后的输出长度"""
length = input_length
# 第一层卷积
length = (length - 80) // 4 + 1
length = length // 4
# 第二层卷积
length = (length - 3) // 1 + 1
length = length // 4
# 第三层卷积
length = (length - 3) // 1 + 1
length = length // 4
return length
def forward(self, x):
# 添加通道维度: (batch_size, 1, audio_length)
x = x.unsqueeze(1)
x = self.conv_layers(x)
x = x.view(x.size(0), -1)
x = self.fc_layers(x)
return x
数据处理通用模式总结
python
class GenericDataProcessor:
"""通用数据处理流程模板"""
def process_custom_data(self, raw_data, data_type='time_series'):
"""
处理自定义数据的通用流程
Args:
raw_data: 原始数据
data_type: 数据类型,可选 'time_series', 'sensor', 'graph', 'audio', 'image_text'
"""
# 1. 数据清洗
cleaned_data = self._clean_data(raw_data)
# 2. 特征提取/工程
features = self._extract_features(cleaned_data, data_type)
# 3. 数据标准化/归一化
normalized_data = self._normalize(features)
# 4. 数据分割
train_data, val_data, test_data = self._split_data(normalized_data)
# 5. 创建数据集对象
dataset = self._create_dataset(train_data, val_data, test_data, data_type)
return dataset
def _clean_data(self, data):
"""数据清洗"""
# 移除异常值
# 处理缺失值
# 去除噪声
return data
def _extract_features(self, data, data_type):
"""特征提取"""
if data_type == 'time_series':
# 提取统计特征:均值、方差、峰值等
features = self._extract_time_series_features(data)
elif data_type == 'sensor':
# 提取时域、频域特征
features = self._extract_sensor_features(data)
elif data_type == 'audio':
# 提取MFCC、频谱特征
features = self._extract_audio_features(data)
else:
features = data
return features
def _create_dataset(self, train_data, val_data, test_data, data_type):
"""创建数据集对象"""
if data_type == 'time_series':
return TimeSeriesDataset(train_data)
elif data_type == 'sensor':
return VibrationSensorDataset()
# ... 其他数据类型
raise ValueError(f"不支持的数数类型: {data_type}")
# 实用工具函数
class DataAugmentation:
"""数据增强工具类"""
@staticmethod
def augment_time_series(data, methods=['jitter', 'scaling', 'rotation']):
"""时间序列数据增强"""
augmented = data.copy()
if 'jitter' in methods:
# 添加噪声
noise = np.random.normal(0, 0.05, data.shape)
augmented += noise
if 'scaling' in methods:
# 随机缩放
scale_factor = np.random.uniform(0.8, 1.2)
augmented *= scale_factor
if 'rotation' in methods:
# 随机旋转(对多变量时间序列)
if len(data.shape) > 1 and data.shape[1] > 1:
angle = np.random.uniform(-np.pi/12, np.pi/12)
rotation_matrix = np.array([
[np.cos(angle), -np.sin(angle)],
[np.sin(angle), np.cos(angle)]
])
augmented[:, :2] = np.dot(augmented[:, :2], rotation_matrix)
return augmented
@staticmethod
def augment_sensor_data(data, methods=['time_warp', 'magnitude_warp']):
"""传感器数据增强"""
# 实现各种传感器数据增强方法
pass
关键要点总结
- 数据理解:首先理解数据的物理意义和结构
- 特征工程:根据数据特点设计合适的特征
- 模型选择 :
- 时间序列:LSTM, GRU, 1D-CNN, Transformer
- 传感器数据:1D-CNN + LSTM, WaveNet
- 图数据:GCN, GAT, GraphSAGE
- 多模态:多分支网络,特征融合
- 评估指标:根据任务选择合适指标(准确率、F1、MAE、RMSE等)
- 部署考虑:模型大小、推理速度、资源限制
这种模块化的设计可以让您轻松地将自己的数据应用到深度学习中,无论是工业传感器数据、医疗信号还是金融时间序列。