【算法工程师】—— Pytorch

Pytorch

张量（Tensor）基础

张量创建与初始化

方法	作用	示例
`torch.tensor()`	从数据创建	`torch.tensor([1,2,3])`
`torch.zeros()`	全0张量	`torch.zeros((3,3))`
`torch.ones()`	全1张量	`torch.ones((2,4))`
`torch.rand()`	均匀分布随机	`torch.rand((3,3))`
`torch.randn()`	标准正态分布	`torch.randn((100,))`
`torch.arange()`	等差张量	`torch.arange(0, 10, 2)`
`torch.linspace()`	等间隔张量	`torch.linspace(0, 1, 5)`
`torch.eye()`	单位矩阵	`torch.eye(3)`
`torch.full()`	填充张量	`torch.full((3,3), 5)`
`torch.empty()`	未初始化	`torch.empty((3,3))`
`torch.from_numpy()`	从NumPy创建	`torch.from_numpy(np_array)`

python 复制代码

import torch
import numpy as np

print("=== 张量创建与初始化 ===")

# 1.1 基础创建
x = torch.tensor([1, 2, 3, 4, 5])
print(f"从列表创建: {x}, dtype={x.dtype}")

# 1.2 特殊张量
zeros = torch.zeros(3, 3)          # 3x3全0
ones = torch.ones(2, 4)            # 2x4全1
rand_tensor = torch.rand(3, 3)     # 3x3随机数[0,1)
randn_tensor = torch.randn(100)    # 100个标准正态分布
eye_matrix = torch.eye(3)          # 3x3单位矩阵
full_tensor = torch.full((3, 3), 255.0)  # 3x3全255

print(f"\n全0张量:\n{zeros}")
print(f"\n随机张量:\n{rand_tensor}")

# 1.3 序列张量
range_tensor = torch.arange(0, 10, 2)      # [0, 2, 4, 6, 8]
linspace_tensor = torch.linspace(0, 1, 5)  # [0., 0.25, 0.5, 0.75, 1.]

print(f"\n等差张量: {range_tensor}")
print(f"等间隔张量: {linspace_tensor}")

# 1.4 与NumPy互转
np_array = np.array([1, 2, 3, 4, 5])
tensor_from_np = torch.from_numpy(np_array)  # NumPy -> Tensor
tensor_to_np = tensor_from_np.numpy()        # Tensor -> NumPy

print(f"\nNumPy数组: {np_array}")
print(f"转为Tensor: {tensor_from_np}")
print(f"转回NumPy: {tensor_to_np}")

# 1.5 数据类型和设备
x_float = torch.tensor([1, 2, 3], dtype=torch.float32)
x_double = torch.tensor([1, 2, 3], dtype=torch.float64)
x_int = torch.tensor([1, 2, 3], dtype=torch.int32)
x_long = torch.tensor([1, 2, 3], dtype=torch.int64)

print(f"\n数据类型:")
print(f"float32: {x_float.dtype}")
print(f"float64: {x_double.dtype}")
print(f"int32: {x_int.dtype}")
print(f"int64: {x_long.dtype}")

# 1.6 设备（CPU/GPU）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\n当前设备: {device}")

if torch.cuda.is_available():
    x_gpu = torch.tensor([1, 2, 3]).cuda()  # 移动到GPU
    print(f"GPU张量: {x_gpu.device}")

张量属性与操作

属性/方法	作用	示例
`.shape` / `.size()`	形状	`tensor.shape`
`.dtype`	数据类型	`tensor.dtype`
`.device`	设备	`tensor.device`
`.ndim`	维度数	`tensor.ndim`
`.numel()`	元素总数	`tensor.numel()`
`.reshape()`	重塑形状	`tensor.reshape((3,4))`
`.view()`	视图重塑	`tensor.view(-1, 16)`
`.squeeze()`	压缩维度	`tensor.squeeze()`
`.unsqueeze()`	增加维度	`tensor.unsqueeze(0)`
`.permute()`	维度重排	`tensor.permute(2,0,1)`
`.transpose()`	转置	`tensor.transpose(0,1)`
`.t()`	矩阵转置	`tensor.t()`
`.contiguous()`	连续内存	`tensor.contiguous()`

python 复制代码

print("\n=== 张量属性与操作 ===")

# 创建测试张量
tensor = torch.randn(3, 4, 5)  # 3x4x5张量

# 1.1 基本属性
print(f"形状: {tensor.shape}")
print(f"数据类型: {tensor.dtype}")
print(f"设备: {tensor.device}")
print(f"维度数: {tensor.ndim}")
print(f"元素总数: {tensor.numel()}")

# 1.2 形状操作
# 重塑
reshaped = tensor.reshape(12, 5)  # 改为12x5
print(f"\n重塑为(12,5): {reshaped.shape}")

# 视图（共享内存）
viewed = tensor.view(-1, 20)  # -1表示自动计算
print(f"视图为(-1,20): {viewed.shape}")

# 压缩/增加维度
# 假设有一个维度为1的张量
tensor_1d = torch.randn(1, 3, 1, 5)
squeezed = tensor_1d.squeeze()  # 压缩所有维度为1的
unsqueezed = squeezed.unsqueeze(0)  # 在第0维增加维度
print(f"\n原始形状: {tensor_1d.shape}")
print(f"压缩后: {squeezed.shape}")
print(f"增加维度后: {unsqueezed.shape}")

# 1.3 维度重排
# 图像数据通常需要调整维度顺序
image_tensor = torch.randn(3, 224, 224)  # (通道, 高, 宽)
# 转为PyTorch期望的(N, C, H, W)格式
batch_tensor = image_tensor.unsqueeze(0)  # (1, 3, 224, 224)
print(f"\n图像张量形状: {image_tensor.shape}")
print(f"批次格式: {batch_tensor.shape}")

# 维度重排（通道最后 -> 通道最先）
hwc_tensor = torch.randn(224, 224, 3)  # 高度, 宽度, 通道
chw_tensor = hwc_tensor.permute(2, 0, 1)  # 通道, 高度, 宽度
print(f"\nHWC格式: {hwc_tensor.shape}")
print(f"CHW格式: {chw_tensor.shape}")

# 1.4 转置
matrix = torch.randn(3, 4)
transposed = matrix.transpose(0, 1)  # 或 matrix.T
print(f"\n矩阵形状: {matrix.shape}")
print(f"转置形状: {transposed.shape}")

# 1.5 内存连续性
# 某些操作后张量可能不连续
non_contiguous = transposed  # transpose操作产生不连续张量
is_contiguous = non_contiguous.is_contiguous()
print(f"\n张量是否连续: {is_contiguous}")

# 转为连续内存
contiguous_tensor = non_contiguous.contiguous()
print(f"转为连续后: {contiguous_tensor.is_contiguous()}")

张量运算

基本数学运算

运算	作用	示例
`+`, `-`, `*`, `/`	基本运算	`a + b`, `a * b`
`@`, `torch.matmul()`	矩阵乘法	`a @ b`
`torch.mm()`	矩阵乘法(2D)	`torch.mm(a, b)`
`torch.add()`	加法	`torch.add(a, b)`
`torch.mul()`	乘法	`torch.mul(a, b)`
`torch.div()`	除法	`torch.div(a, b)`
`torch.pow()`	幂运算	`torch.pow(a, 2)`
`torch.sqrt()`	平方根	`torch.sqrt(a)`
`torch.exp()`	指数	`torch.exp(a)`
`torch.log()`	对数	`torch.log(a)`
`torch.abs()`	绝对值	`torch.abs(a)`
`torch.clamp()`	限幅	`torch.clamp(a, min, max)`

python 复制代码

print("=== 张量基本运算 ===")

# 创建测试张量
a = torch.tensor([1.0, 2.0, 3.0, 4.0])
b = torch.tensor([5.0, 6.0, 7.0, 8.0])

# 2.1 基础运算
print("基础运算:")
print(f"a + b = {a + b}")
print(f"a - b = {a - b}")
print(f"a * b = {a * b}")
print(f"a / b = {a / b}")
print(f"a ** 2 = {a ** 2}")

# 2.2 函数运算
print("\n函数运算:")
print(f"平方根 sqrt(a): {torch.sqrt(a)}")
print(f"指数 exp(a): {torch.exp(a)}")
print(f"自然对数 log(a): {torch.log(a)}")
print(f"绝对值 abs(-a): {torch.abs(-a)}")

# 2.3 限幅操作（常用于图像处理）
values = torch.tensor([-2.0, -1.0, 0.0, 1.0, 2.0])
clamped = torch.clamp(values, min=-1.0, max=1.0)
print(f"\n限幅 [-1, 1]: {clamped}")

# 2.4 矩阵运算
matrix_a = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
matrix_b = torch.tensor([[5.0, 6.0], [7.0, 8.0]])

print(f"\n矩阵乘法:")
print(f"a @ b = \n{a @ b}")  # 向量点积
print(f"\nmatrix_a @ matrix_b = \n{matrix_a @ matrix_b}")
print(f"\ntorch.matmul(matrix_a, matrix_b) = \n{torch.matmul(matrix_a, matrix_b)}")

# 2.5 广播机制
vector = torch.tensor([1, 2, 3])
matrix = torch.tensor([[1, 2, 3], [4, 5, 6]])

result = matrix + vector  # vector广播到每行
print(f"\n广播运算:")
print(f"matrix:\n{matrix}")
print(f"vector: {vector}")
print(f"matrix + vector:\n{result}")

统计运算

函数	作用	示例
`torch.sum()`	求和	`torch.sum(tensor)`
`torch.mean()`	平均值	`torch.mean(tensor)`
`torch.std()`	标准差	`torch.std(tensor)`
`torch.var()`	方差	`torch.var(tensor)`
`torch.min()`	最小值	`torch.min(tensor)`
`torch.max()`	最大值	`torch.max(tensor)`
`torch.argmin()`	最小值索引	`torch.argmin(tensor)`
`torch.argmax()`	最大值索引	`torch.argmax(tensor)`
`torch.median()`	中位数	`torch.median(tensor)`
`torch.sort()`	排序	`torch.sort(tensor)`
`torch.topk()`	前k个值	`torch.topk(tensor, k)`
`torch.unique()`	唯一值	`torch.unique(tensor)`

python 复制代码

print("\n=== 统计运算 ===")

# 创建测试数据
data = torch.randn(100)  # 100个正态分布随机数
matrix = torch.randn(3, 4)

print("统计运算:")
print(f"数据形状: {data.shape}")
print(f"总和: {torch.sum(data):.3f}")
print(f"平均值: {torch.mean(data):.3f}")
print(f"标准差: {torch.std(data):.3f}")
print(f"方差: {torch.var(data):.3f}")
print(f"最小值: {torch.min(data):.3f}")
print(f"最大值: {torch.max(data):.3f}")

# 维度统计
print(f"\n矩阵统计:")
print(f"原始矩阵:\n{matrix}")
print(f"按行求和: {torch.sum(matrix, dim=1)}")
print(f"按列求平均: {torch.mean(matrix, dim=0)}")

# 索引统计
print(f"\n索引统计:")
values, indices = torch.max(matrix, dim=1)  # 每行最大值和索引
print(f"每行最大值: {values}")
print(f"每行最大值索引: {indices}")

# 排序
print(f"\n排序:")
sorted_values, sorted_indices = torch.sort(matrix, dim=1, descending=True)
print(f"每行降序排序:\n{sorted_values}")
print(f"排序索引:\n{sorted_indices}")

# Top-k
print(f"\nTop-k:")
top_values, top_indices = torch.topk(matrix, k=2, dim=1)
print(f"每行前2个值:\n{top_values}")
print(f"每行前2个索引:\n{top_indices}")

# 唯一值
tensor_with_dups = torch.tensor([1, 2, 2, 3, 3, 3, 4])
unique_values = torch.unique(tensor_with_dups)
print(f"\n唯一值: {unique_values}")

张量索引与切片

操作	语法	说明
基本索引	`tensor[index]`	索引
切片	`tensor[start:end:step]`	切片
布尔索引	`tensor[mask]`	布尔索引
花式索引	`tensor[[indices]]`	索引列表
`:`	`tensor[:]`	所有元素
`...`	`tensor[..., 0]`	省略号
`torch.masked_select()`	掩码选择	`torch.masked_select(tensor, mask)`
`torch.gather()`	聚集	`torch.gather(tensor, dim, index)`
`torch.index_select()`	索引选择	`torch.index_select(tensor, dim, index)`

python 复制代码

print("\n=== 张量索引与切片 ===")

# 创建测试张量
tensor = torch.arange(24).reshape(4, 6)
print(f"原始张量:\n{tensor}")

# 2.1 基本索引
print(f"\n基本索引:")
print(f"第一行: {tensor[0]}")
print(f"第二列: {tensor[:, 1]}")
print(f"元素[2,3]: {tensor[2, 3]}")

# 2.2 切片
print(f"\n切片:")
print(f"行1-2, 列2-4:\n{tensor[1:3, 2:5]}")
print(f"每隔一行:\n{tensor[::2]}")

# 2.3 布尔索引
mask = tensor > 10
print(f"\n布尔索引 (tensor > 10):\n{tensor[mask]}")

# 2.4 花式索引
indices = torch.tensor([0, 2, 3])
print(f"\n花式索引 [0,2,3]:\n{tensor[indices]}")

# 2.5 高级索引函数
print(f"\n高级索引函数:")

# masked_select
mask = tensor > 15
selected = torch.masked_select(tensor, mask)
print(f"masked_select (>15): {selected}")

# index_select
selected_rows = torch.index_select(tensor, dim=0, index=torch.tensor([0, 2]))
print(f"index_select 行[0,2]:\n{selected_rows}")

# gather (复杂索引)
# 假设我们要从每行选择不同列的元素
index = torch.tensor([[0, 1, 2], [2, 3, 4], [1, 2, 3], [3, 4, 5]])
gathered = torch.gather(tensor, dim=1, index=index)
print(f"gather操作:\n{gathered}")

# 2.6 省略号索引
tensor_3d = torch.arange(60).reshape(3, 4, 5)
print(f"\n省略号索引:")
print(f"张量形状: {tensor_3d.shape}")
print(f"tensor_3d[..., 0] 形状: {tensor_3d[..., 0].shape}")

自动求导（Autograd）

梯度计算基础

概念/函数	作用	说明
`requires_grad=True`	启用梯度	跟踪计算历史
`.backward()`	反向传播	计算梯度
`.grad`	梯度值	参数的梯度
`.grad_fn`	梯度函数	创建该张量的函数
`torch.no_grad()`	禁用梯度	推理时使用
`.detach()`	分离张量	从计算图分离
`torch.autograd.grad()`	计算梯度	手动计算梯度
`.retain_grad()`	保留梯度	非叶子节点保留梯度

python 复制代码

print("=== 自动求导基础 ===")

# 3.1 基本梯度计算
x = torch.tensor(2.0, requires_grad=True)
y = torch.tensor(3.0, requires_grad=True)

# 定义计算
z = x**2 + y**3 + 10

# 反向传播
z.backward()

print(f"x = {x.item()}, y = {y.item()}")
print(f"z = x² + y³ + 10 = {z.item()}")
print(f"∂z/∂x = {x.grad.item()}")  # 2x = 4
print(f"∂z/∂y = {y.grad.item()}")  # 3y² = 27

# 3.2 多次反向传播需要清零梯度
x.grad.zero_()
y.grad.zero_()

# 重新计算
w = x*y + torch.sin(x)
w.backward()
print(f"\n清零后重新计算:")
print(f"w = x*y + sin(x) = {w.item()}")
print(f"∂w/∂x = {x.grad.item()}")  # y + cos(x) = 3 + cos(2)
print(f"∂w/∂y = {y.grad.item()}")  # x = 2

# 3.3 非标量张量的梯度
print(f"\n非标量梯度:")
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x**2

# 需要指定梯度权重
v = torch.tensor([0.1, 1.0, 0.001])
y.backward(v)
print(f"x = {x}")
print(f"y = x² = {y}")
print(f"梯度权重 v = {v}")
print(f"梯度 ∂y/∂x = {x.grad}")  # 2x * v

# 3.4 禁用梯度上下文
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)

with torch.no_grad():
    y = x * 2  # 不会跟踪梯度
    print(f"\n在no_grad上下文中的操作:")
    print(f"y.requires_grad = {y.requires_grad}")

# 3.5 分离张量
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x**2
z = y.detach()  # 分离y，不再跟踪梯度

print(f"\n张量分离:")
print(f"y.requires_grad = {y.requires_grad}")
print(f"z.requires_grad = {z.requires_grad}")

# 3.6 梯度累加
print(f"\n梯度累加:")
x = torch.tensor(2.0, requires_grad=True)

for i in range(3):
    y = x * i if i > 0 else x
    y.backward(retain_graph=True if i < 2 else False)
    print(f"第{i+1}次反向传播，梯度: {x.grad.item()}")
    
# 注意：最后一次不需要retain_graph

自定义自动求导函数

python 复制代码

print("\n=== 自定义自动求导函数 ===")

# 3.1 使用 torch.autograd.Function
class MyReLU(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        # 前向传播
        ctx.save_for_backward(input)  # 保存输入用于反向传播
        return input.clamp(min=0)
    
    @staticmethod
    def backward(ctx, grad_output):
        # 反向传播
        input, = ctx.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input

# 使用自定义函数
x = torch.randn(5, requires_grad=True)
print(f"输入: {x}")

my_relu = MyReLU.apply
y = my_relu(x)
print(f"ReLU输出: {y}")

# 计算梯度
y.sum().backward()
print(f"梯度: {x.grad}")

# 3.2 对比内置ReLU
x.grad.zero_()
y_builtin = torch.relu(x)
y_builtin.sum().backward()
print(f"内置ReLU梯度: {x.grad}")

神经网络模块（torch.nn）

层定义与使用

层类型	类名	主要参数
线性层	`nn.Linear()`	`in_features`, `out_features`, `bias`
卷积层	`nn.Conv2d()`	`in_channels`, `out_channels`, `kernel_size`, `stride`, `padding`
池化层	`nn.MaxPool2d()`	`kernel_size`, `stride`, `padding`
激活函数	`nn.ReLU()`	`inplace`
归一化	`nn.BatchNorm2d()`	`num_features`
Dropout	`nn.Dropout()`	`p`
循环层	`nn.LSTM()`	`input_size`, `hidden_size`, `num_layers`
Embedding	`nn.Embedding()`	`num_embeddings`, `embedding_dim`

python 复制代码

import torch.nn as nn

print("=== 神经网络层 ===")

# 4.1 线性层
linear = nn.Linear(in_features=10, out_features=5, bias=True)
print(f"线性层: {linear}")
print(f"权重形状: {linear.weight.shape}")  # (5, 10)
print(f"偏置形状: {linear.bias.shape}")    # (5,)

# 前向传播
x = torch.randn(3, 10)  # 批大小=3, 特征=10
y = linear(x)
print(f"输入形状: {x.shape}")
print(f"输出形状: {y.shape}")

# 4.2 卷积层（CV核心）
conv = nn.Conv2d(
    in_channels=3,      # 输入通道数（RGB图像为3）
    out_channels=16,    # 输出通道数（滤波器数量）
    kernel_size=3,      # 卷积核大小
    stride=1,           # 步长
    padding=1,          # 填充
    bias=True
)
print(f"\n卷积层: {conv}")
print(f"权重形状: {conv.weight.shape}")  # (16, 3, 3, 3)

# 卷积前向传播
image = torch.randn(4, 3, 32, 32)  # 批大小=4, 通道=3, 高=32, 宽=32
output = conv(image)
print(f"输入形状: {image.shape}")
print(f"输出形状: {output.shape}")  # (4, 16, 32, 32) 由于padding=1，尺寸不变

# 4.3 池化层
pool = nn.MaxPool2d(kernel_size=2, stride=2)
pooled = pool(output)
print(f"\n最大池化后形状: {pooled.shape}")  # (4, 16, 16, 16)

# 4.4 批量归一化
batch_norm = nn.BatchNorm2d(num_features=16)
normed = batch_norm(pooled)
print(f"批量归一化后形状: {normed.shape}")

# 4.5 Dropout
dropout = nn.Dropout(p=0.5)
dropped = dropout(normed)
print(f"Dropout后形状: {dropped.shape}")

# 4.6 激活函数
relu = nn.ReLU(inplace=True)  # inplace=True节省内存
activated = relu(dropped)
print(f"ReLU激活后形状: {activated.shape}")

# 其他激活函数
sigmoid = nn.Sigmoid()
tanh = nn.Tanh()
leaky_relu = nn.LeakyReLU(negative_slope=0.01)
softmax = nn.Softmax(dim=1)  # 沿哪个维度做softmax

损失函数

损失函数	类名	适用场景
均方误差	`nn.MSELoss()`	回归问题
交叉熵	`nn.CrossEntropyLoss()`	多分类
二元交叉熵	`nn.BCELoss()`	二分类
BCE with logits	`nn.BCEWithLogitsLoss()`	二分类（数值稳定）
L1损失	`nn.L1Loss()`	回归，稀疏性
Huber损失	`nn.SmoothL1Loss()`	回归，对异常值鲁棒
KL散度	`nn.KLDivLoss()`	概率分布
Triplet损失	`nn.TripletMarginLoss()`	度量学习
Contrastive损失	`nn.ContrastiveLoss()`	对比学习

python 复制代码

print("\n=== 损失函数 ===")

# 4.1 回归损失
mse_loss = nn.MSELoss()
l1_loss = nn.L1Loss()
smooth_l1 = nn.SmoothL1Loss()

# 示例：图像重建
pred = torch.randn(4, 3, 32, 32)
target = torch.randn(4, 3, 32, 32)

mse = mse_loss(pred, target)
l1 = l1_loss(pred, target)
huber = smooth_l1(pred, target)

print(f"MSE损失: {mse.item():.4f}")
print(f"L1损失: {l1.item():.4f}")
print(f"Huber损失: {huber.item():.4f}")

# 4.2 分类损失
ce_loss = nn.CrossEntropyLoss()
bce_loss = nn.BCELoss()
bce_logits_loss = nn.BCEWithLogitsLoss()

# 多分类示例
logits = torch.randn(4, 10)  # 4个样本，10个类别
labels = torch.randint(0, 10, (4,))  # 真实标签

ce = ce_loss(logits, labels)
print(f"\n交叉熵损失: {ce.item():.4f}")

# 二分类示例
sigmoid_output = torch.rand(4, 1)  # 经过sigmoid的输出
binary_labels = torch.randint(0, 2, (4, 1)).float()

bce = bce_loss(sigmoid_output, binary_labels)
print(f"二元交叉熵损失: {bce.item():.4f}")

# 使用BCEWithLogitsLoss（更稳定）
logits = torch.randn(4, 1)  # 未经过sigmoid
bce_logits = bce_logits_loss(logits, binary_labels)
print(f"BCEWithLogits损失: {bce_logits.item():.4f}")

# 4.3 特定任务损失
# Triplet损失（人脸识别、度量学习）
triplet_loss = nn.TripletMarginLoss(margin=1.0)

anchor = torch.randn(4, 128)  # 锚点样本
positive = torch.randn(4, 128)  # 正样本
negative = torch.randn(4, 128)  # 负样本

triplet = triplet_loss(anchor, positive, negative)
print(f"\nTriplet损失: {triplet.item():.4f}")

优化器

优化器	类名	主要参数
SGD	`torch.optim.SGD()`	`params`, `lr`, `momentum`, `weight_decay`
Adam	`torch.optim.Adam()`	`params`, `lr`, `betas`, `eps`, `weight_decay`
AdamW	`torch.optim.AdamW()`	Adam with decoupled weight decay
RMSprop	`torch.optim.RMSprop()`	`params`, `lr`, `alpha`, `weight_decay`
Adagrad	`torch.optim.Adagrad()`	`params`, `lr`, `lr_decay`
学习率调度	各种`scheduler`	调整学习率

python 复制代码

import torch.optim as optim

print("\n=== 优化器 ===")

# 创建简单模型
model = nn.Sequential(
    nn.Linear(10, 20),
    nn.ReLU(),
    nn.Linear(20, 2)
)

# 4.1 SGD优化器
optimizer_sgd = optim.SGD(
    model.parameters(),
    lr=0.01,
    momentum=0.9,
    weight_decay=1e-4  # L2正则化
)
print(f"SGD优化器: {optimizer_sgd}")

# 4.2 Adam优化器
optimizer_adam = optim.Adam(
    model.parameters(),
    lr=0.001,
    betas=(0.9, 0.999),  # 一阶和二阶矩估计的衰减率
    eps=1e-8,            # 数值稳定性
    weight_decay=0
)
print(f"Adam优化器: {optimizer_adam}")

# 4.3 AdamW优化器（推荐）
optimizer_adamw = optim.AdamW(
    model.parameters(),
    lr=0.001,
    weight_decay=0.01  # 解耦权重衰减
)
print(f"AdamW优化器: {optimizer_adamw}")

# 4.4 优化器使用示例
criterion = nn.CrossEntropyLoss()

# 模拟训练步骤
for epoch in range(3):
    # 模拟数据
    inputs = torch.randn(4, 10)
    targets = torch.randint(0, 2, (4,))
    
    # 前向传播
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    
    # 反向传播
    optimizer_adamw.zero_grad()  # 清零梯度
    loss.backward()              # 计算梯度
    
    # 梯度裁剪（防止梯度爆炸）
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    
    # 参数更新
    optimizer_adamw.step()
    
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

# 4.5 学习率调度器
print(f"\n学习率调度器:")

# 创建优化器
optimizer = optim.Adam(model.parameters(), lr=0.01)

# StepLR: 每隔step_size个epoch，lr乘以gamma
scheduler_step = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# MultiStepLR: 在指定epochs调整学习率
scheduler_multi = optim.lr_scheduler.MultiStepLR(
    optimizer, milestones=[30, 80], gamma=0.1
)

# CosineAnnealingLR: 余弦退火
scheduler_cosine = optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=50, eta_min=0
)

# ReduceLROnPlateau: 当指标停止改善时降低学习率
scheduler_plateau = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.1, patience=5
)

print(f"初始学习率: {optimizer.param_groups[0]['lr']}")

# 模拟调度器使用
for epoch in range(5):
    # 模拟训练
    loss = torch.tensor(0.5 - epoch*0.1)
    
    # StepLR更新
    scheduler_step.step()
    
    # Plateau更新（需要监控指标）
    scheduler_plateau.step(loss)
    
    print(f"Epoch {epoch+1}, LR: {optimizer.param_groups[0]['lr']:.6f}")

数据加载与处理

Dataset和DataLoader

类/函数	作用	主要参数
`Dataset`	数据集基类	需要实现`__len__`和`__getitem__`
`DataLoader`	数据加载器	`dataset`, `batch_size`, `shuffle`, `num_workers`
`TensorDataset`	张量数据集	`*tensors`
`Subset`	子集	`dataset`, `indices`
`random_split()`	随机分割	`dataset`, `lengths`

python 复制代码

from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
import torchvision.transforms as transforms

print("=== 数据加载与处理 ===")

# 5.1 自定义Dataset
class ImageDataset(Dataset):
    """自定义图像数据集"""
    
    def __init__(self, data, labels, transform=None):
        """
        参数:
            data: 图像数据，形状为(N, H, W, C)或(N, C, H, W)
            labels: 标签
            transform: 数据增强变换
        """
        self.data = data
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        image = self.data[idx]
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

# 创建模拟数据
num_samples = 100
image_data = torch.randn(num_samples, 3, 32, 32)  # 100张32x32 RGB图像
image_labels = torch.randint(0, 10, (num_samples,))  # 10个类别

# 5.2 创建Dataset实例
transform = transforms.Compose([
    transforms.ToPILImage(),           # 转为PIL图像
    transforms.RandomHorizontalFlip(), # 随机水平翻转
    transforms.RandomRotation(10),     # 随机旋转±10度
    transforms.ToTensor(),             # 转为Tensor
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # 归一化
])

dataset = ImageDataset(image_data, image_labels, transform=transform)
print(f"数据集大小: {len(dataset)}")
print(f"单个样本形状: {dataset[0][0].shape}")  # (C, H, W)

# 5.3 创建DataLoader
dataloader = DataLoader(
    dataset=dataset,
    batch_size=16,
    shuffle=True,
    num_workers=2,      # 多进程加载
    pin_memory=True     # 使用锁页内存，加速GPU传输
)

print(f"\nDataLoader信息:")
print(f"批次大小: {dataloader.batch_size}")
print(f"总批次数: {len(dataloader)}")

# 5.4 使用DataLoader
for batch_idx, (images, labels) in enumerate(dataloader):
    print(f"\n批次 {batch_idx}:")
    print(f"  图像形状: {images.shape}")  # (16, 3, 32, 32)
    print(f"  标签形状: {labels.shape}")  # (16,)
    
    if batch_idx == 2:  # 只查看前3个批次
        break

# 5.5 TensorDataset（简单情况）
tensor_dataset = TensorDataset(image_data, image_labels)
tensor_dataloader = DataLoader(tensor_dataset, batch_size=8, shuffle=True)

print(f"\nTensorDataset批次:")
for images, labels in tensor_dataloader:
    print(f"  形状: {images.shape}, {labels.shape}")
    break

# 5.6 数据集分割
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

print(f"\n数据集分割:")
print(f"训练集: {len(train_dataset)} 样本")
print(f"验证集: {len(val_dataset)} 样本")

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

数据增强（Transforms）

变换	类名	作用
裁剪	`transforms.RandomCrop()`	随机裁剪
翻转	`transforms.RandomHorizontalFlip()`	随机水平翻转
旋转	`transforms.RandomRotation()`	随机旋转
颜色变换	`transforms.ColorJitter()`	调整亮度、对比度等
归一化	`transforms.Normalize()`	标准化
大小调整	`transforms.Resize()`	调整大小
转为Tensor	`transforms.ToTensor()`	PIL/Numpy转为Tensor
随机擦除	`transforms.RandomErasing()`	Cutout数据增强
组合变换	`transforms.Compose()`	组合多个变换
自定义变换	`transforms.Lambda()`	自定义变换

python 复制代码

import torchvision.transforms as transforms
from PIL import Image
import numpy as np

print("\n=== 数据增强变换 ===")

# 5.1 创建模拟图像（PIL格式）
np_image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
pil_image = Image.fromarray(np_image)
print(f"原始图像: {pil_image.size}, 模式: {pil_image.mode}")

# 5.2 定义增强管道
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),          # 调整大小
    transforms.RandomCrop(112),             # 随机裁剪到112x112
    transforms.RandomHorizontalFlip(p=0.5), # 50%概率水平翻转
    transforms.RandomRotation(degrees=15),  # 随机旋转±15度
    transforms.ColorJitter(
        brightness=0.2,    # 亮度调整
        contrast=0.2,      # 对比度调整
        saturation=0.2,    # 饱和度调整
        hue=0.1           # 色调调整
    ),
    transforms.ToTensor(),                  # 转为Tensor
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],        # ImageNet均值
        std=[0.229, 0.224, 0.225]          # ImageNet标准差
    ),
    transforms.RandomErasing(               # Cutout
        p=0.5,                             # 应用概率
        scale=(0.02, 0.1),                 # 擦除面积比例
        ratio=(0.3, 3.3),                  # 宽高比范围
        value=0                            # 填充值
    )
])

# 5.3 验证集变换（通常更简单）
val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.CenterCrop(112),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# 5.4 应用变换
augmented_image = train_transform(pil_image)
print(f"\n增强后图像形状: {augmented_image.shape}")  # (3, 112, 112)
print(f"像素值范围: [{augmented_image.min():.3f}, {augmented_image.max():.3f}]")

# 5.5 自定义变换
def custom_normalize(tensor):
    """自定义归一化，将值缩放到[-1, 1]"""
    return tensor * 2 - 1

custom_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(custom_normalize)
])

custom_image = custom_transform(pil_image)
print(f"\n自定义归一化范围: [{custom_image.min():.3f}, {custom_image.max():.3f}]")

# 5.6 多裁剪（常用于测试时增强）
ten_crop = transforms.TenCrop(size=112)  # 4个角+中心，再水平翻转
image_tensor = transforms.ToTensor()(pil_image.resize((224, 224)))
crops = ten_crop(image_tensor)
print(f"\nTenCrop输出数量: {len(crops)}")
print(f"每个裁剪形状: {crops[0].shape}")

模型构建

模型定义方式

方式	类/方法	适用场景
Sequential	`nn.Sequential()`	简单线性堆叠
Module子类	`class Model(nn.Module)`	复杂模型
ModuleList	`nn.ModuleList()`	动态层列表
ModuleDict	`nn.ModuleDict()`	字典形式组织层

python 复制代码

print("=== 模型构建 ===")

# 6.1 Sequential方式（简单模型）
model_seq = nn.Sequential(
    nn.Conv2d(3, 16, kernel_size=3, padding=1),
    nn.BatchNorm2d(16),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(2),
    
    nn.Conv2d(16, 32, kernel_size=3, padding=1),
    nn.BatchNorm2d(32),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(2),
    
    nn.Flatten(),
    nn.Linear(32 * 8 * 8, 128),  # 假设输入为32x32，经过两次池化后为8x8
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(128, 10)           # 10个类别
)

print("Sequential模型:")
print(model_seq)

# 测试前向传播
x = torch.randn(4, 3, 32, 32)
output = model_seq(x)
print(f"\n输入形状: {x.shape}")
print(f"输出形状: {output.shape}")

# 6.2 Module子类方式（推荐）
class CNNClassifier(nn.Module):
    """自定义CNN分类器"""
    
    def __init__(self, num_classes=10):
        super(CNNClassifier, self).__init__()
        
        # 特征提取器
        self.features = nn.Sequential(
            # 第一个卷积块
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            
            # 第二个卷积块
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            
            # 第三个卷积块
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
        )
        
        # 分类器
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 4 * 4, 256),  # 假设输入32x32，经过3次池化后为4x4
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )
        
        # 初始化权重
        self._initialize_weights()
    
    def _initialize_weights(self):
        """初始化权重"""
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        """前向传播"""
        features = self.features(x)
        output = self.classifier(features)
        return output
    
    def get_features(self, x):
        """提取特征（用于迁移学习）"""
        return self.features(x)

# 创建模型实例
model = CNNClassifier(num_classes=10)
print(f"\n自定义CNN模型:")
print(model)

# 统计模型参数
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\n模型参数统计:")
print(f"总参数: {total_params:,}")
print(f"可训练参数: {trainable_params:,}")

# 前向传播
x = torch.randn(4, 3, 32, 32)
output = model(x)
print(f"\n输入形状: {x.shape}")
print(f"输出形状: {output.shape}")

# 提取特征
features = model.get_features(x)
print(f"特征图形状: {features.shape}")

# 6.3 ModuleList和ModuleDict
class DynamicModel(nn.Module):
    """使用ModuleList和ModuleDict的动态模型"""
    
    def __init__(self, num_layers=3, num_classes=10):
        super(DynamicModel, self).__init__()
        
        # 使用ModuleList存储动态数量的层
        self.layers = nn.ModuleList([
            nn.Linear(10, 20) if i == 0 else nn.Linear(20, 20)
            for i in range(num_layers)
        ])
        
        # 使用ModuleDict存储不同类型的层
        self.operations = nn.ModuleDict({
            'relu': nn.ReLU(),
            'dropout': nn.Dropout(0.5),
            'final': nn.Linear(20, num_classes)
        })
    
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
            x = self.operations['relu'](x)
            x = self.operations['dropout'](x)
        
        x = self.operations['final'](x)
        return x

dynamic_model = DynamicModel(num_layers=4)
print(f"\n动态模型:")
print(dynamic_model)

x = torch.randn(4, 10)
output = dynamic_model(x)
print(f"输出形状: {output.shape}")

预训练模型

python 复制代码

import torchvision.models as models

print("\n=== 预训练模型 ===")

# 6.1 加载预训练模型
# ResNet
resnet18 = models.resnet18(pretrained=True)
resnet50 = models.resnet50(pretrained=True)

# VGG
vgg16 = models.vgg16(pretrained=True)

# EfficientNet
try:
    efficientnet = models.efficientnet_b0(pretrained=True)
except:
    print("EfficientNet需要torchvision>=0.11")

# Vision Transformer (ViT)
try:
    vit = models.vit_b_16(pretrained=True)
except:
    print("ViT需要torchvision>=0.12")

print(f"ResNet18结构:")
print(resnet18)

# 6.2 修改预训练模型
num_classes = 100  # 新任务类别数

# 方法1: 替换最后一层
resnet18.fc = nn.Linear(resnet18.fc.in_features, num_classes)

# 方法2: 添加新层
class CustomResNet(nn.Module):
    def __init__(self, num_classes=100):
        super(CustomResNet, self).__init__()
        # 加载预训练特征提取器
        self.features = nn.Sequential(*list(resnet50.children())[:-1])
        # 冻结特征提取器参数
        for param in self.features.parameters():
            param.requires_grad = False
        
        # 添加自定义分类器
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(2048, 512),  # ResNet50最后一层特征维度为2048
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        features = self.features(x)
        output = self.classifier(features)
        return output

custom_resnet = CustomResNet(num_classes=100)
print(f"\n自定义ResNet:")
print(f"总参数: {sum(p.numel() for p in custom_resnet.parameters()):,}")
print(f"可训练参数: {sum(p.numel() for p in custom_resnet.parameters() if p.requires_grad):,}")

# 6.3 模型保存与加载
# 保存整个模型
torch.save(custom_resnet.state_dict(), 'model.pth')

# 保存检查点（包含优化器状态等）
checkpoint = {
    'epoch': 10,
    'model_state_dict': custom_resnet.state_dict(),
    'optimizer_state_dict': optimizer_adamw.state_dict(),
    'loss': 0.123,
}
torch.save(checkpoint, 'checkpoint.pth')

# 加载模型
loaded_model = CustomResNet(num_classes=100)
loaded_model.load_state_dict(torch.load('model.pth'))
loaded_model.eval()  # 设置为评估模式

# 加载检查点
checkpoint = torch.load('checkpoint.pth')
loaded_model.load_state_dict(checkpoint['model_state_dict'])

print(f"\n模型保存与加载完成")

训练与验证

训练循环模板

python 复制代码

print("=== 训练与验证 ===")

def train_epoch(model, dataloader, criterion, optimizer, device):
    """训练一个epoch"""
    model.train()  # 设置为训练模式
    running_loss = 0.0
    correct = 0
    total = 0
    
    for batch_idx, (inputs, targets) in enumerate(dataloader):
        # 移动数据到设备
        inputs, targets = inputs.to(device), targets.to(device)
        
        # 清零梯度
        optimizer.zero_grad()
        
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # 反向传播
        loss.backward()
        
        # 梯度裁剪
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        # 参数更新
        optimizer.step()
        
        # 统计
        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        # 打印进度
        if (batch_idx + 1) % 10 == 0:
            print(f'  批次: {batch_idx+1}/{len(dataloader)}, '
                  f'损失: {loss.item():.4f}')
    
    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc

def validate_epoch(model, dataloader, criterion, device):
    """验证一个epoch"""
    model.eval()  # 设置为评估模式
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():  # 禁用梯度计算
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            # 前向传播
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            # 统计
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc

# 7.2 完整训练流程
def train_model(model, train_loader, val_loader, criterion, optimizer, 
                scheduler, num_epochs, device):
    """完整训练流程"""
    
    train_losses, train_accs = [], []
    val_losses, val_accs = [], []
    
    best_acc = 0.0
    best_model_wts = None
    
    for epoch in range(num_epochs):
        print(f'\nEpoch {epoch+1}/{num_epochs}')
        print('-' * 40)
        
        # 训练
        train_loss, train_acc = train_epoch(
            model, train_loader, criterion, optimizer, device
        )
        
        # 验证
        val_loss, val_acc = validate_epoch(
            model, val_loader, criterion, device
        )
        
        # 学习率调度
        if scheduler is not None:
            scheduler.step(val_loss)  # 对于ReduceLROnPlateau
            # 或者 scheduler.step() 对于其他调度器
        
        # 记录结果
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        val_losses.append(val_loss)
        val_accs.append(val_acc)
        
        print(f'训练 - 损失: {train_loss:.4f}, 准确率: {train_acc:.2f}%')
        print(f'验证 - 损失: {val_loss:.4f}, 准确率: {val_acc:.2f}%')
        
        # 保存最佳模型
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = model.state_dict().copy()
            torch.save(best_model_wts, 'best_model.pth')
            print(f'  保存最佳模型，准确率: {best_acc:.2f}%')
    
    # 加载最佳模型权重
    model.load_state_dict(best_model_wts)
    
    return {
        'model': model,
        'train_losses': train_losses,
        'train_accs': train_accs,
        'val_losses': val_losses,
        'val_accs': val_accs,
        'best_acc': best_acc
    }

# 7.3 模拟训练
print("模拟训练流程:")

# 创建模拟数据
train_dataset = TensorDataset(
    torch.randn(100, 3, 32, 32),
    torch.randint(0, 10, (100,))
)
val_dataset = TensorDataset(
    torch.randn(20, 3, 32, 32),
    torch.randint(0, 10, (20,))
)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# 创建简单模型
simple_model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(3*32*32, 128),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(128, 10)
)

# 训练配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = simple_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=3
)

# 运行训练（这里只运行2个epoch作为演示）
print(f"使用设备: {device}")
results = train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    num_epochs=2,
    device=device
)

print(f"\n训练完成，最佳验证准确率: {results['best_acc']:.2f}%")

混合精度训练

python 复制代码

print("\n=== 混合精度训练 ===")

try:
    from torch.cuda.amp import autocast, GradScaler
    
    # 初始化梯度缩放器
    scaler = GradScaler()
    
    def train_epoch_amp(model, dataloader, criterion, optimizer, device, scaler):
        """使用混合精度训练的epoch"""
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()
            
            # 使用autocast进行混合精度前向传播
            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, targets)
            
            # 使用scaler进行梯度缩放和反向传播
            scaler.scale(loss).backward()
            
            # 梯度裁剪（需要先unscale）
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            # 更新参数
            scaler.step(optimizer)
            scaler.update()
            
            # 统计
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
        
        epoch_loss = running_loss / total
        epoch_acc = 100. * correct / total
        
        return epoch_loss, epoch_acc
    
    print("混合精度训练可用")
    
except ImportError:
    print("混合精度训练需要PyTorch>=1.6")

分布式训练

python 复制代码

print("\n=== 分布式训练 ===")

def setup_distributed():
    """设置分布式训练环境"""
    import os
    
    # 检查是否在分布式环境中
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        rank = int(os.environ["RANK"])
        world_size = int(os.environ['WORLD_SIZE'])
        gpu = int(os.environ['LOCAL_RANK'])
        
        torch.cuda.set_device(gpu)
        torch.distributed.init_process_group(
            backend='nccl',
            init_method='env://',
            world_size=world_size,
            rank=rank
        )
        
        return True, rank, world_size, gpu
    
    return False, 0, 1, 0

# 检查分布式环境
is_distributed, rank, world_size, gpu = setup_distributed()

if is_distributed:
    print(f"分布式训练: rank={rank}, world_size={world_size}, gpu={gpu}")
    
    # 创建分布式模型
    model = simple_model.cuda(gpu)
    model = torch.nn.parallel.DistributedDataParallel(
        model, device_ids=[gpu]
    )
    
    # 创建分布式采样器
    from torch.utils.data.distributed import DistributedSampler
    
    train_sampler = DistributedSampler(
        train_dataset,
        num_replicas=world_size,
        rank=rank
    )
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=16,
        sampler=train_sampler,
        num_workers=2
    )
    
    print(f"使用分布式数据采样器")
else:
    print("单机训练")

模型部署与推理

模型推理

python 复制代码

print("\n=== 模型推理 ===")

# 8.1 基本推理
model.eval()  # 确保模型在评估模式

# 创建测试数据
test_input = torch.randn(1, 3, 32, 32).to(device)

# 推理（不计算梯度）
with torch.no_grad():
    output = model(test_input)
    probabilities = torch.softmax(output, dim=1)
    predicted_class = torch.argmax(probabilities, dim=1)

print(f"输入形状: {test_input.shape}")
print(f"输出形状: {output.shape}")
print(f"预测类别: {predicted_class.item()}")
print(f"类别概率: {probabilities[0]}")

# 8.2 批量推理
def batch_inference(model, dataloader, device):
    """批量推理"""
    model.eval()
    all_predictions = []
    all_probabilities = []
    
    with torch.no_grad():
        for inputs, _ in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            probs = torch.softmax(outputs, dim=1)
            predictions = torch.argmax(probs, dim=1)
            
            all_predictions.append(predictions.cpu())
            all_probabilities.append(probs.cpu())
    
    all_predictions = torch.cat(all_predictions, dim=0)
    all_probabilities = torch.cat(all_probabilities, dim=0)
    
    return all_predictions, all_probabilities

# 测试批量推理
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
predictions, probabilities = batch_inference(model, val_loader, device)

print(f"\n批量推理结果:")
print(f"预测形状: {predictions.shape}")
print(f"概率形状: {probabilities.shape}")
print(f"前5个预测: {predictions[:5]}")

# 8.3 置信度阈值
confidence_threshold = 0.8
high_conf_mask = probabilities.max(dim=1)[0] > confidence_threshold
high_conf_predictions = predictions[high_conf_mask]

print(f"\n置信度阈值过滤:")
print(f"总样本数: {len(predictions)}")
print(f"高置信度样本数: {len(high_conf_predictions)}")
print(f"高置信度比例: {len(high_conf_predictions)/len(predictions):.2%}")

模型导出与部署

python 复制代码

print("\n=== 模型导出与部署 ===")

# 8.1 导出为TorchScript
# 方法1: Tracing（跟踪）
example_input = torch.randn(1, 3, 32, 32).to(device)
traced_model = torch.jit.trace(model, example_input)
traced_model.save("model_traced.pt")
print("TorchScript (Tracing) 导出完成")

# 方法2: Scripting（脚本）
# scripted_model = torch.jit.script(model)
# scripted_model.save("model_scripted.pt")
# print("TorchScript (Scripting) 导出完成")

# 8.2 加载TorchScript模型
loaded_traced_model = torch.jit.load("model_traced.pt")
loaded_traced_model.eval()

# 使用导出的模型推理
with torch.no_grad():
    traced_output = loaded_traced_model(test_input)

print(f"原始模型输出: {output[0, :3]}")
print(f"导出模型输出: {traced_output[0, :3]}")
print(f"输出是否一致: {torch.allclose(output, traced_output, rtol=1e-3)}")

# 8.3 ONNX导出（跨平台部署）
try:
    import onnx
    import onnxruntime
    
    # 导出为ONNX格式
    torch.onnx.export(
        model,                    # 模型
        test_input,               # 示例输入
        "model.onnx",             # 保存路径
        export_params=True,       # 导出参数
        opset_version=11,         # ONNX算子集版本
        do_constant_folding=True, # 优化常量
        input_names=['input'],    # 输入名称
        output_names=['output'],  # 输出名称
        dynamic_axes={            # 动态维度
            'input': {0: 'batch_size'},
            'output': {0: 'batch_size'}
        }
    )
    
    print("ONNX导出完成")
    
    # 使用ONNX Runtime推理
    ort_session = onnxruntime.InferenceSession("model.onnx")
    
    # 准备输入
    ort_inputs = {ort_session.get_inputs()[0].name: test_input.cpu().numpy()}
    
    # 推理
    ort_outputs = ort_session.run(None, ort_inputs)
    
    print(f"ONNX Runtime输出形状: {ort_outputs[0].shape}")
    
except ImportError:
    print("ONNX导出需要安装onnx和onnxruntime")

# 8.4 模型量化（减小模型大小，加速推理）
print("\n=== 模型量化 ===")

# 动态量化（推理时量化）
quantized_model = torch.quantization.quantize_dynamic(
    model,  # 原始模型
    {torch.nn.Linear},  # 需要量化的模块类型
    dtype=torch.qint8   # 量化数据类型
)

print(f"量化模型大小对比:")
print(f"  原始模型: {sum(p.numel() for p in model.parameters()):,} 参数")
print(f"  量化模型: {sum(p.numel() for p in quantized_model.parameters()):,} 参数")

# 保存量化模型
torch.save(quantized_model.state_dict(), "quantized_model.pth")

# 8.5 模型剪枝
print("\n=== 模型剪枝 ===")

# 简单的L1 unstructured剪枝
parameters_to_prune = []
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        parameters_to_prune.append((module, 'weight'))

# 应用剪枝
torch.nn.utils.prune.global_unstructured(
    parameters_to_prune,
    pruning_method=torch.nn.utils.prune.L1Unstructured,
    amount=0.2,  # 剪枝20%的参数
)

# 检查剪枝效果
pruned_params = 0
total_params = 0
for module, _ in parameters_to_prune:
    mask = module.weight_mask  # 剪枝掩码
    pruned_params += (mask == 0).sum().item()
    total_params += mask.numel()

print(f"剪枝比例: {pruned_params/total_params:.2%}")

计算机视觉特定功能

图像处理工具

python 复制代码

print("\n=== 计算机视觉特定功能 ===")

# 9.1 torchvision.transforms.functional
import torchvision.transforms.functional as F

# 创建测试图像
test_image = torch.randn(3, 224, 224)

# 图像变换
resized = F.resize(test_image, (112, 112))
cropped = F.center_crop(test_image, (200, 200))
hflipped = F.hflip(test_image)
rotated = F.rotate(test_image, angle=45)
adjusted = F.adjust_brightness(test_image, brightness_factor=1.5)

print(f"原始图像形状: {test_image.shape}")
print(f"调整大小后: {resized.shape}")
print(f"中心裁剪后: {cropped.shape}")

# 9.2 torchvision.ops（图像操作）
try:
    from torchvision.ops import nms, roi_align, roi_pool
    
    # 模拟目标检测结果
    boxes = torch.tensor([
        [10, 10, 50, 50],    # [x1, y1, x2, y2]
        [15, 15, 55, 55],
        [100, 100, 150, 150]
    ], dtype=torch.float32)
    
    scores = torch.tensor([0.9, 0.8, 0.7])
    
    # NMS（非极大值抑制）
    keep = nms(boxes, scores, iou_threshold=0.5)
    print(f"\nNMS保留的框索引: {keep}")
    
    # ROI Align
    features = torch.randn(1, 256, 32, 32)  # 特征图
    rois = torch.tensor([[0, 10, 10, 50, 50]])  # [batch_idx, x1, y1, x2, y2]
    
    roi_features = roi_align(
        features, rois,
        output_size=(7, 7),
        spatial_scale=1/16.0,  # 特征图相对于原图的比例
        sampling_ratio=2
    )
    print(f"ROI Align输出形状: {roi_features.shape}")
    
except ImportError:
    print("torchvision.ops需要torchvision>=0.3")

# 9.3 可视化工具
import matplotlib.pyplot as plt

def visualize_feature_maps(feature_maps, num_maps=16):
    """可视化特征图"""
    feature_maps = feature_maps.detach().cpu()
    
    # 选择前num_maps个特征图
    if feature_maps.shape[1] > num_maps:
        feature_maps = feature_maps[:, :num_maps, :, :]
    
    # 创建子图
    fig, axes = plt.subplots(4, 4, figsize=(12, 12))
    axes = axes.ravel()
    
    for idx in range(min(num_maps, len(axes))):
        feature_map = feature_maps[0, idx]  # 取第一个样本
        axes[idx].imshow(feature_map, cmap='viridis')
        axes[idx].axis('off')
        axes[idx].set_title(f'Channel {idx}')
    
    plt.tight_layout()
    return fig

# 测试特征图可视化
with torch.no_grad():
    test_input = torch.randn(1, 3, 32, 32)
    features = model.get_features(test_input)  # 假设模型有get_features方法

print(f"特征图形状: {features.shape}")

# 可视化（实际使用时取消注释）
# fig = visualize_feature_maps(features)
# plt.show()

常用CV模型实现

python 复制代码

print("\n=== 常用CV模型实现 ===")

# 9.1 ResNet残差块
class ResidualBlock(nn.Module):
    """ResNet残差块"""
    
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        
        self.conv1 = nn.Conv2d(
            in_channels, out_channels,
            kernel_size=3, stride=stride, padding=1, bias=False
        )
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        
        self.conv2 = nn.Conv2d(
            out_channels, out_channels,
            kernel_size=3, stride=1, padding=1, bias=False
        )
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.downsample = downsample
        
    def forward(self, x):
        identity = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.downsample is not None:
            identity = self.downsample(x)
        
        out += identity
        out = self.relu(out)
        
        return out

# 测试残差块
res_block = ResidualBlock(64, 128, stride=2)
test_input = torch.randn(4, 64, 32, 32)
output = res_block(test_input)
print(f"ResidualBlock:")
print(f"  输入形状: {test_input.shape}")
print(f"  输出形状: {output.shape}")

# 9.2 注意力机制（SE Block）
class SEBlock(nn.Module):
    """Squeeze-and-Excitation注意力块"""
    
    def __init__(self, channel, reduction=16):
        super(SEBlock, self).__init__()
        
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        b, c, _, _ = x.size()
        
        # Squeeze
        y = self.avg_pool(x).view(b, c)
        
        # Excitation
        y = self.fc(y).view(b, c, 1, 1)
        
        # Scale
        return x * y.expand_as(x)

# 测试SE Block
se_block = SEBlock(128)
se_output = se_block(output)
print(f"\nSEBlock输出形状: {se_output.shape}")

# 9.3 U-Net编码器块
class UNetEncoderBlock(nn.Module):
    """U-Net编码器块"""
    
    def __init__(self, in_channels, out_channels):
        super(UNetEncoderBlock, self).__init__()
        
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
        self.pool = nn.MaxPool2d(2)
    
    def forward(self, x):
        # 编码路径
        x = self.conv(x)
        pooled = self.pool(x)
        return x, pooled

# 9.4 深度可分离卷积
class DepthwiseSeparableConv(nn.Module):
    """深度可分离卷积（MobileNet使用）"""
    
    def __init__(self, in_channels, out_channels, stride=1):
        super(DepthwiseSeparableConv, self).__init__()
        
        # 深度卷积
        self.depthwise = nn.Conv2d(
            in_channels, in_channels,
            kernel_size=3, stride=stride,
            padding=1, groups=in_channels,  # groups=in_channels实现深度卷积
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(in_channels)
        
        # 逐点卷积
        self.pointwise = nn.Conv2d(
            in_channels, out_channels,
            kernel_size=1, bias=False
        )
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.relu = nn.ReLU(inplace=True)
    
    def forward(self, x):
        x = self.depthwise(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.pointwise(x)
        x = self.bn2(x)
        x = self.relu(x)
        
        return x

# 测试深度可分离卷积
ds_conv = DepthwiseSeparableConv(32, 64, stride=2)
test_input = torch.randn(4, 32, 32, 32)
output = ds_conv(test_input)
print(f"\nDepthwiseSeparableConv:")
print(f"  输入形状: {test_input.shape}")
print(f"  输出形状: {output.shape}")

# 统计参数数量对比
standard_conv = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
print(f"\n参数数量对比:")
print(f"  标准卷积: {sum(p.numel() for p in standard_conv.parameters()):,}")
print(f"  深度可分离卷积: {sum(p.numel() for p in ds_conv.parameters()):,}")