PyTorch 常用方法总结
🔹 构造 / 创建
torch.xxx :torch.tensor(), torch.from_numpy(), torch.as_tensor(), torch.arange(), torch.linspace(), torch.logspace(), torch.zeros(), torch.ones(), torch.full(), torch.eye(), torch.empty(), torch.rand(), torch.randint(), torch.randn(), torch.normal(), torch.uniform(), torch.zeros_like(), torch.ones_like(), torch.empty_like(), torch.rand_like()
🔹 随机数种子
torch.xxx :torch.manual_seed(), torch.initial_seed(), torch.get_rng_state(), torch.set_rng_state()
torch.cuda.xxx :torch.cuda.manual_seed(), torch.cuda.manual_seed_all()
类 :torch.Generator
🔹 张量操作 / 形状变换
tensor.xxx :tensor.reshape(), tensor.view(), tensor.squeeze(), tensor.unsqueeze(), tensor.transpose(), tensor.permute(), tensor.repeat(), tensor.expand(), tensor.flatten(), tensor.unflatten(), tensor.flip(), tensor.roll(), tensor.narrow(), tensor.select()
torch.xxx :torch.cat(), torch.stack(), torch.chunk(), torch.split(), torch.rot90()
🔹 数学运算
tensor.xxx :tensor.add(), tensor.sub(), tensor.mul(), tensor.div(), tensor.pow(), tensor.exp(), tensor.log(), tensor.log10(), tensor.log2(), tensor.sqrt(), tensor.sin(), tensor.cos(), tensor.tan(), tensor.abs(), tensor.ceil(), tensor.floor(), tensor.round(), tensor.sum(), tensor.prod(), tensor.cumsum(), tensor.cumprod(), tensor.clamp(), tensor.clip(), tensor.sigmoid(), tensor.tanh()
torch.xxx :torch.add(), torch.sub(), torch.mul(), torch.div(), torch.pow(), torch.exp(), torch.log(), torch.sqrt(), torch.sin(), torch.cos(), torch.tan(), torch.abs(), torch.sum(), torch.prod(), torch.cumsum(), torch.cumprod(), torch.clamp(), torch.sigmoid(), torch.tanh()
🔹 统计
tensor.xxx :tensor.mean(), tensor.median(), tensor.mode(), tensor.std(), tensor.var(), tensor.min(), tensor.max(), tensor.argmin(), tensor.argmax(), tensor.unique(), tensor.topk(), tensor.sort(), tensor.argsort(), tensor.quantile(), tensor.kthvalue()
torch.xxx :torch.mean(), torch.median(), torch.mode(), torch.std(), torch.var(), torch.min(), torch.max(), torch.argmin(), torch.argmax(), torch.unique(), torch.topk(), torch.sort(), torch.argsort(), torch.quantile(), torch.kthvalue()
🔹 索引 / 条件
tensor.xxx :tensor.nonzero(), tensor.masked_select(), tensor.masked_fill(), tensor.any(), tensor.all()
torch.xxx :torch.where(), torch.nonzero(), torch.index_select(), torch.masked_select(), torch.gather(), torch.scatter(), torch.take(), torch.masked_fill(), torch.any(), torch.all()
🔹 比较运算
tensor.xxx :tensor.eq(), tensor.ne(), tensor.lt(), tensor.le(), tensor.gt(), tensor.ge(), tensor.equal(), tensor.isnan(), tensor.isinf(), tensor.isfinite()
torch.xxx :torch.eq(), torch.ne(), torch.lt(), torch.le(), torch.gt(), torch.ge(), torch.equal(), torch.allclose(), torch.isnan(), torch.isinf(), torch.isfinite()
🔹 线性代数
tensor.xxx :tensor.matmul(), tensor.mm(), tensor.bmm(), tensor.dot(), tensor.cross(), tensor.norm()
torch.xxx :torch.matmul(), torch.mm(), torch.bmm(), torch.dot(), torch.cross(), torch.norm()
torch.linalg.xxx :torch.linalg.det(), torch.linalg.inv(), torch.linalg.pinv(), torch.linalg.matrix_rank(), torch.linalg.solve(), torch.linalg.eig(), torch.linalg.svd(), torch.linalg.qr(), torch.linalg.cholesky(), torch.linalg.lstsq(), torch.linalg.norm()
🔹 自动求导 (autograd)
tensor属性 :tensor.requires_grad, tensor.grad
tensor方法 :tensor.backward(), tensor.detach()
torch.xxx :torch.no_grad(), torch.enable_grad(), torch.set_grad_enabled()
类 :torch.autograd.grad_mode
🔹 神经网络模块 (torch.nn)
基础层
类 :nn.Linear, nn.Bilinear, nn.Identity
卷积层
类 :nn.Conv1d, nn.Conv2d, nn.Conv3d, nn.ConvTranspose2d, nn.Unfold, nn.Fold
池化层
类 :nn.MaxPool2d, nn.AvgPool2d, nn.AdaptiveMaxPool2d, nn.AdaptiveAvgPool2d, nn.GlobalMaxPool2d
循环层
类 :nn.RNN, nn.LSTM, nn.GRU, nn.RNNCell, nn.LSTMCell, nn.GRUCell
正则化层
类 :nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.LayerNorm, nn.GroupNorm, nn.InstanceNorm2d, nn.Dropout, nn.Dropout2d, nn.Dropout3d
激活函数
类 :nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.ELU, nn.SELU, nn.Sigmoid, nn.Tanh, nn.Softmax, nn.LogSoftmax, nn.Softmin, nn.GELU, nn.Swish, nn.Mish
激活函数 (torch.nn.functional)
F.xxx :F.relu(), F.leaky_relu(), F.elu(), F.selu(), F.sigmoid(), F.tanh(), F.softmax(), F.log_softmax(), F.softmin(), F.gelu(), F.swish(), F.mish()
损失函数
类 :nn.MSELoss, nn.L1Loss, nn.CrossEntropyLoss, nn.NLLLoss, nn.BCELoss, nn.BCEWithLogitsLoss, nn.KLDivLoss, nn.SmoothL1Loss, nn.PoissonNLLLoss, nn.CosineEmbeddingLoss, nn.MarginRankingLoss
损失函数 (torch.nn.functional)
F.xxx :F.mse_loss(), F.l1_loss(), F.cross_entropy(), F.nll_loss(), F.binary_cross_entropy(), F.binary_cross_entropy_with_logits(), F.kl_div(), F.smooth_l1_loss(), F.cosine_embedding_loss()
容器
类 :nn.Sequential, nn.ModuleList, nn.ModuleDict, nn.ParameterList, nn.ParameterDict
其他常用函数 (torch.nn.functional)
F.xxx :F.conv2d(), F.max_pool2d(), F.avg_pool2d(), F.dropout(), F.batch_norm(), F.layer_norm(), F.interpolate(), F.pad(), F.normalize(), F.embedding(), F.one_hot()
🔹 优化器 (torch.optim)
类 :optim.SGD, optim.Adam, optim.AdamW, optim.RMSprop, optim.Adagrad, optim.Adadelta, optim.Adamax, optim.NAdam, optim.RAdam, optim.LBFGS
优化器方法
optimizer.xxx :optimizer.step(), optimizer.zero_grad(), optimizer.state_dict(), optimizer.load_state_dict()
优化器参数
参数 :lr, momentum, weight_decay, betas, eps, amsgrad
🔹 学习率调度器 (torch.optim.lr_scheduler)
类 :lr_scheduler.StepLR, lr_scheduler.MultiStepLR, lr_scheduler.ExponentialLR, lr_scheduler.CosineAnnealingLR, lr_scheduler.ReduceLROnPlateau, lr_scheduler.CyclicLR, lr_scheduler.OneCycleLR, lr_scheduler.LambdaLR, lr_scheduler.CosineAnnealingWarmRestarts
调度器方法
scheduler.xxx :scheduler.step(), scheduler.get_last_lr(), scheduler.state_dict(), scheduler.load_state_dict()
调度器参数
参数 :step_size, gamma, milestones, T_max, eta_min, factor, patience, base_lr, max_lr
🔹 数据处理 (torch.utils.data)
数据集类
类 :data.Dataset, data.TensorDataset, data.ConcatDataset, data.Subset
数据加载器
类 :data.DataLoader
参数 :batch_size, shuffle, num_workers, pin_memory, drop_last, sampler
采样器
类 :data.Sampler, data.RandomSampler, data.SequentialSampler, data.WeightedRandomSampler, data.SubsetRandomSampler
数据分割
函数 :data.random_split()
🔹 数据变换 (torchvision.transforms)
类 :transforms.Compose, transforms.ToTensor, transforms.Normalize, transforms.Resize, transforms.CenterCrop, transforms.RandomCrop, transforms.RandomHorizontalFlip, transforms.RandomVerticalFlip, transforms.RandomRotation, transforms.ColorJitter, transforms.RandomResizedCrop, transforms.Pad, transforms.Lambda
变换参数
参数 :size, mean, std, scale, ratio, degrees, brightness, contrast, saturation, hue
🔹 CUDA / 设备
设备相关
torch.xxx :torch.device(), torch.cuda.is_available(), torch.cuda.device_count(), torch.cuda.get_device_name(), torch.cuda.current_device(), torch.cuda.empty_cache(), torch.cuda.memory_allocated(), torch.cuda.memory_reserved()
张量设备操作
tensor.xxx :tensor.to(), tensor.cpu(), tensor.cuda(), tensor.half(), tensor.float(), tensor.double(), tensor.int(), tensor.long(), tensor.bool()
设备参数
参数 :device='cuda', device='cpu', non_blocking=True, memory_format
🔹 保存 / 加载
torch.xxx :torch.save(), torch.load(), torch.jit.save(), torch.jit.load(), torch.jit.script(), torch.jit.trace()
模型状态
model.xxx :model.state_dict(), model.load_state_dict(), model.parameters(), model.named_parameters(), model.modules(), model.named_modules()
保存参数
参数 :map_location, strict=False, weights_only=True
🔹 分布式训练
类 :nn.DataParallel, nn.parallel.DistributedDataParallel
函数 :torch.distributed.init_process_group(), torch.distributed.barrier(), torch.distributed.all_reduce(), torch.distributed.broadcast()
🔹 模型模式控制
model.xxx :model.train(), model.eval(), model.requires_grad_()
参数 :training=True/False
🔹 初始化方法 (torch.nn.init)
init.xxx :init.xavier_uniform_(), init.xavier_normal_(), init.kaiming_uniform_(), init.kaiming_normal_(), init.normal_(), init.uniform_(), init.constant_(), init.zeros_(), init.ones_()
🔹 工具函数
torch.xxx :torch.__version__, torch.set_printoptions(), torch.get_default_dtype(), torch.set_default_dtype(), torch.set_num_threads(), torch.get_num_threads()
打印选项参数
参数 :precision, threshold, edgeitems, linewidth, profile, sci_mode
📋 标准模型构建框架
python
import torch
import torch.nn as nn
import torch.nn.functional as F
class StandardModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size, dropout=0.1):
super(StandardModel, self).__init__()
# 网络层定义
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, output_size)
# 正则化
self.dropout = nn.Dropout(dropout)
self.batch_norm = nn.BatchNorm1d(hidden_size)
def forward(self, x):
# x.size() -> [batch_size, input_size]
x = F.relu(self.fc1(x)) # 使用 F.relu() 函数式激活
x = self.batch_norm(x) # 批归一化
x = self.dropout(x) # dropout正则化
x = F.relu(self.fc2(x)) # 第二层 + 激活
x = self.dropout(x) # dropout
x = self.fc3(x) # 输出层(无激活)
return x
# 实例化模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = StandardModel(input_size=784, hidden_size=256, output_size=10).to(device)
# 模型初始化
def init_weights(m):
if isinstance(m, nn.Linear):
torch.nn.init.xavier_uniform_(m.weight) # 权重初始化
m.bias.data.fill_(0.01) # 偏置初始化
model.apply(init_weights)
# 查看模型信息
print(f"模型参数数量: {sum(p.numel() for p in model.parameters())}")
print(f"可训练参数: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
🚀 标准训练框架
python
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
def train_model(model, train_loader, val_loader, epochs=100, device='cpu'):
# 损失函数和优化器
criterion = nn.CrossEntropyLoss() # 损失函数
optimizer = optim.Adam(model.parameters(), # 优化器
lr=0.001,
weight_decay=1e-4,
betas=(0.9, 0.999))
scheduler = StepLR(optimizer, # 学习率调度器
step_size=30,
gamma=0.1)
best_val_loss = float('inf')
for epoch in range(epochs):
# ============ 训练阶段 ============
model.train() # 设置为训练模式
train_loss = 0.0
train_correct = 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device) # 数据移至设备
optimizer.zero_grad() # 清零梯度
output = model(data) # 前向传播
loss = criterion(output, target) # 计算损失
loss.backward() # 反向传播
optimizer.step() # 更新参数
# 统计
train_loss += loss.item()
pred = output.argmax(dim=1) # 获取预测类别
train_correct += pred.eq(target).sum().item()
# ============ 验证阶段 ============
model.eval() # 设置为评估模式
val_loss = 0.0
val_correct = 0
with torch.no_grad(): # 禁用梯度计算
for data, target in val_loader:
data, target = data.to(device), target.to(device)
output = model(data)
val_loss += criterion(output, target).item()
pred = output.argmax(dim=1)
val_correct += pred.eq(target).sum().item()
# ============ 计算指标 ============
train_acc = 100. * train_correct / len(train_loader.dataset)
val_acc = 100. * val_correct / len(val_loader.dataset)
train_loss /= len(train_loader)
val_loss /= len(val_loader)
print(f'Epoch {epoch+1}/{epochs}:')
print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
print(f'Learning Rate: {scheduler.get_last_lr()[0]:.6f}')
# ============ 保存最佳模型 ============
if val_loss < best_val_loss:
best_val_loss = val_loss
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(), # 模型参数
'optimizer_state_dict': optimizer.state_dict(), # 优化器状态
'scheduler_state_dict': scheduler.state_dict(), # 调度器状态
'loss': val_loss,
'accuracy': val_acc,
}, 'best_model.pth')
scheduler.step() # 更新学习率
print('-' * 50)
# ============ 模型恢复 ============
def load_checkpoint(model, optimizer, scheduler, filename):
"""加载训练检查点"""
checkpoint = torch.load(filename, map_location='cpu')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
if scheduler:
scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
return epoch, loss
# ============ 预测函数 ============
def predict(model, data_loader, device):
"""模型预测"""
model.eval() # 评估模式
predictions = []
probabilities = []
with torch.no_grad(): # 禁用梯度
for data, _ in data_loader:
data = data.to(device)
output = model(data) # 前向传播
prob = F.softmax(output, dim=1) # 转换为概率
pred = output.argmax(dim=1) # 获取预测类别
predictions.extend(pred.cpu().numpy())
probabilities.extend(prob.cpu().numpy())
return predictions, probabilities
# ============ 模型评估 ============
def evaluate_model(model, test_loader, device):
"""评估模型性能"""
model.eval()
test_loss = 0
correct = 0
criterion = nn.CrossEntropyLoss()
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
pred = output.argmax(dim=1)
correct += pred.eq(target).sum().item()
test_loss /= len(test_loader)
accuracy = 100. * correct / len(test_loader.dataset)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {accuracy:.2f}%')
return test_loss, accuracy