【算法工程师】—— Pytorch

Pytorch

张量(Tensor)基础

张量创建与初始化
方法 作用 示例
torch.tensor() 从数据创建 torch.tensor([1,2,3])
torch.zeros() 全0张量 torch.zeros((3,3))
torch.ones() 全1张量 torch.ones((2,4))
torch.rand() 均匀分布随机 torch.rand((3,3))
torch.randn() 标准正态分布 torch.randn((100,))
torch.arange() 等差张量 torch.arange(0, 10, 2)
torch.linspace() 等间隔张量 torch.linspace(0, 1, 5)
torch.eye() 单位矩阵 torch.eye(3)
torch.full() 填充张量 torch.full((3,3), 5)
torch.empty() 未初始化 torch.empty((3,3))
torch.from_numpy() 从NumPy创建 torch.from_numpy(np_array)
python 复制代码
import torch
import numpy as np

print("=== 张量创建与初始化 ===")

# 1.1 基础创建
x = torch.tensor([1, 2, 3, 4, 5])
print(f"从列表创建: {x}, dtype={x.dtype}")

# 1.2 特殊张量
zeros = torch.zeros(3, 3)          # 3x3全0
ones = torch.ones(2, 4)            # 2x4全1
rand_tensor = torch.rand(3, 3)     # 3x3随机数[0,1)
randn_tensor = torch.randn(100)    # 100个标准正态分布
eye_matrix = torch.eye(3)          # 3x3单位矩阵
full_tensor = torch.full((3, 3), 255.0)  # 3x3全255

print(f"\n全0张量:\n{zeros}")
print(f"\n随机张量:\n{rand_tensor}")

# 1.3 序列张量
range_tensor = torch.arange(0, 10, 2)      # [0, 2, 4, 6, 8]
linspace_tensor = torch.linspace(0, 1, 5)  # [0., 0.25, 0.5, 0.75, 1.]

print(f"\n等差张量: {range_tensor}")
print(f"等间隔张量: {linspace_tensor}")

# 1.4 与NumPy互转
np_array = np.array([1, 2, 3, 4, 5])
tensor_from_np = torch.from_numpy(np_array)  # NumPy -> Tensor
tensor_to_np = tensor_from_np.numpy()        # Tensor -> NumPy

print(f"\nNumPy数组: {np_array}")
print(f"转为Tensor: {tensor_from_np}")
print(f"转回NumPy: {tensor_to_np}")

# 1.5 数据类型和设备
x_float = torch.tensor([1, 2, 3], dtype=torch.float32)
x_double = torch.tensor([1, 2, 3], dtype=torch.float64)
x_int = torch.tensor([1, 2, 3], dtype=torch.int32)
x_long = torch.tensor([1, 2, 3], dtype=torch.int64)

print(f"\n数据类型:")
print(f"float32: {x_float.dtype}")
print(f"float64: {x_double.dtype}")
print(f"int32: {x_int.dtype}")
print(f"int64: {x_long.dtype}")

# 1.6 设备(CPU/GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\n当前设备: {device}")

if torch.cuda.is_available():
    x_gpu = torch.tensor([1, 2, 3]).cuda()  # 移动到GPU
    print(f"GPU张量: {x_gpu.device}")
张量属性与操作
属性/方法 作用 示例
.shape / .size() 形状 tensor.shape
.dtype 数据类型 tensor.dtype
.device 设备 tensor.device
.ndim 维度数 tensor.ndim
.numel() 元素总数 tensor.numel()
.reshape() 重塑形状 tensor.reshape((3,4))
.view() 视图重塑 tensor.view(-1, 16)
.squeeze() 压缩维度 tensor.squeeze()
.unsqueeze() 增加维度 tensor.unsqueeze(0)
.permute() 维度重排 tensor.permute(2,0,1)
.transpose() 转置 tensor.transpose(0,1)
.t() 矩阵转置 tensor.t()
.contiguous() 连续内存 tensor.contiguous()
python 复制代码
print("\n=== 张量属性与操作 ===")

# 创建测试张量
tensor = torch.randn(3, 4, 5)  # 3x4x5张量

# 1.1 基本属性
print(f"形状: {tensor.shape}")
print(f"数据类型: {tensor.dtype}")
print(f"设备: {tensor.device}")
print(f"维度数: {tensor.ndim}")
print(f"元素总数: {tensor.numel()}")

# 1.2 形状操作
# 重塑
reshaped = tensor.reshape(12, 5)  # 改为12x5
print(f"\n重塑为(12,5): {reshaped.shape}")

# 视图(共享内存)
viewed = tensor.view(-1, 20)  # -1表示自动计算
print(f"视图为(-1,20): {viewed.shape}")

# 压缩/增加维度
# 假设有一个维度为1的张量
tensor_1d = torch.randn(1, 3, 1, 5)
squeezed = tensor_1d.squeeze()  # 压缩所有维度为1的
unsqueezed = squeezed.unsqueeze(0)  # 在第0维增加维度
print(f"\n原始形状: {tensor_1d.shape}")
print(f"压缩后: {squeezed.shape}")
print(f"增加维度后: {unsqueezed.shape}")

# 1.3 维度重排
# 图像数据通常需要调整维度顺序
image_tensor = torch.randn(3, 224, 224)  # (通道, 高, 宽)
# 转为PyTorch期望的(N, C, H, W)格式
batch_tensor = image_tensor.unsqueeze(0)  # (1, 3, 224, 224)
print(f"\n图像张量形状: {image_tensor.shape}")
print(f"批次格式: {batch_tensor.shape}")

# 维度重排(通道最后 -> 通道最先)
hwc_tensor = torch.randn(224, 224, 3)  # 高度, 宽度, 通道
chw_tensor = hwc_tensor.permute(2, 0, 1)  # 通道, 高度, 宽度
print(f"\nHWC格式: {hwc_tensor.shape}")
print(f"CHW格式: {chw_tensor.shape}")

# 1.4 转置
matrix = torch.randn(3, 4)
transposed = matrix.transpose(0, 1)  # 或 matrix.T
print(f"\n矩阵形状: {matrix.shape}")
print(f"转置形状: {transposed.shape}")

# 1.5 内存连续性
# 某些操作后张量可能不连续
non_contiguous = transposed  # transpose操作产生不连续张量
is_contiguous = non_contiguous.is_contiguous()
print(f"\n张量是否连续: {is_contiguous}")

# 转为连续内存
contiguous_tensor = non_contiguous.contiguous()
print(f"转为连续后: {contiguous_tensor.is_contiguous()}")

张量运算

基本数学运算
运算 作用 示例
+, -, *, / 基本运算 a + b, a * b
@, torch.matmul() 矩阵乘法 a @ b
torch.mm() 矩阵乘法(2D) torch.mm(a, b)
torch.add() 加法 torch.add(a, b)
torch.mul() 乘法 torch.mul(a, b)
torch.div() 除法 torch.div(a, b)
torch.pow() 幂运算 torch.pow(a, 2)
torch.sqrt() 平方根 torch.sqrt(a)
torch.exp() 指数 torch.exp(a)
torch.log() 对数 torch.log(a)
torch.abs() 绝对值 torch.abs(a)
torch.clamp() 限幅 torch.clamp(a, min, max)
python 复制代码
print("=== 张量基本运算 ===")

# 创建测试张量
a = torch.tensor([1.0, 2.0, 3.0, 4.0])
b = torch.tensor([5.0, 6.0, 7.0, 8.0])

# 2.1 基础运算
print("基础运算:")
print(f"a + b = {a + b}")
print(f"a - b = {a - b}")
print(f"a * b = {a * b}")
print(f"a / b = {a / b}")
print(f"a ** 2 = {a ** 2}")

# 2.2 函数运算
print("\n函数运算:")
print(f"平方根 sqrt(a): {torch.sqrt(a)}")
print(f"指数 exp(a): {torch.exp(a)}")
print(f"自然对数 log(a): {torch.log(a)}")
print(f"绝对值 abs(-a): {torch.abs(-a)}")

# 2.3 限幅操作(常用于图像处理)
values = torch.tensor([-2.0, -1.0, 0.0, 1.0, 2.0])
clamped = torch.clamp(values, min=-1.0, max=1.0)
print(f"\n限幅 [-1, 1]: {clamped}")

# 2.4 矩阵运算
matrix_a = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
matrix_b = torch.tensor([[5.0, 6.0], [7.0, 8.0]])

print(f"\n矩阵乘法:")
print(f"a @ b = \n{a @ b}")  # 向量点积
print(f"\nmatrix_a @ matrix_b = \n{matrix_a @ matrix_b}")
print(f"\ntorch.matmul(matrix_a, matrix_b) = \n{torch.matmul(matrix_a, matrix_b)}")

# 2.5 广播机制
vector = torch.tensor([1, 2, 3])
matrix = torch.tensor([[1, 2, 3], [4, 5, 6]])

result = matrix + vector  # vector广播到每行
print(f"\n广播运算:")
print(f"matrix:\n{matrix}")
print(f"vector: {vector}")
print(f"matrix + vector:\n{result}")
统计运算
函数 作用 示例
torch.sum() 求和 torch.sum(tensor)
torch.mean() 平均值 torch.mean(tensor)
torch.std() 标准差 torch.std(tensor)
torch.var() 方差 torch.var(tensor)
torch.min() 最小值 torch.min(tensor)
torch.max() 最大值 torch.max(tensor)
torch.argmin() 最小值索引 torch.argmin(tensor)
torch.argmax() 最大值索引 torch.argmax(tensor)
torch.median() 中位数 torch.median(tensor)
torch.sort() 排序 torch.sort(tensor)
torch.topk() 前k个值 torch.topk(tensor, k)
torch.unique() 唯一值 torch.unique(tensor)
python 复制代码
print("\n=== 统计运算 ===")

# 创建测试数据
data = torch.randn(100)  # 100个正态分布随机数
matrix = torch.randn(3, 4)

print("统计运算:")
print(f"数据形状: {data.shape}")
print(f"总和: {torch.sum(data):.3f}")
print(f"平均值: {torch.mean(data):.3f}")
print(f"标准差: {torch.std(data):.3f}")
print(f"方差: {torch.var(data):.3f}")
print(f"最小值: {torch.min(data):.3f}")
print(f"最大值: {torch.max(data):.3f}")

# 维度统计
print(f"\n矩阵统计:")
print(f"原始矩阵:\n{matrix}")
print(f"按行求和: {torch.sum(matrix, dim=1)}")
print(f"按列求平均: {torch.mean(matrix, dim=0)}")

# 索引统计
print(f"\n索引统计:")
values, indices = torch.max(matrix, dim=1)  # 每行最大值和索引
print(f"每行最大值: {values}")
print(f"每行最大值索引: {indices}")

# 排序
print(f"\n排序:")
sorted_values, sorted_indices = torch.sort(matrix, dim=1, descending=True)
print(f"每行降序排序:\n{sorted_values}")
print(f"排序索引:\n{sorted_indices}")

# Top-k
print(f"\nTop-k:")
top_values, top_indices = torch.topk(matrix, k=2, dim=1)
print(f"每行前2个值:\n{top_values}")
print(f"每行前2个索引:\n{top_indices}")

# 唯一值
tensor_with_dups = torch.tensor([1, 2, 2, 3, 3, 3, 4])
unique_values = torch.unique(tensor_with_dups)
print(f"\n唯一值: {unique_values}")
张量索引与切片
操作 语法 说明
基本索引 tensor[index] 索引
切片 tensor[start:end:step] 切片
布尔索引 tensor[mask] 布尔索引
花式索引 tensor[[indices]] 索引列表
: tensor[:] 所有元素
... tensor[..., 0] 省略号
torch.masked_select() 掩码选择 torch.masked_select(tensor, mask)
torch.gather() 聚集 torch.gather(tensor, dim, index)
torch.index_select() 索引选择 torch.index_select(tensor, dim, index)
python 复制代码
print("\n=== 张量索引与切片 ===")

# 创建测试张量
tensor = torch.arange(24).reshape(4, 6)
print(f"原始张量:\n{tensor}")

# 2.1 基本索引
print(f"\n基本索引:")
print(f"第一行: {tensor[0]}")
print(f"第二列: {tensor[:, 1]}")
print(f"元素[2,3]: {tensor[2, 3]}")

# 2.2 切片
print(f"\n切片:")
print(f"行1-2, 列2-4:\n{tensor[1:3, 2:5]}")
print(f"每隔一行:\n{tensor[::2]}")

# 2.3 布尔索引
mask = tensor > 10
print(f"\n布尔索引 (tensor > 10):\n{tensor[mask]}")

# 2.4 花式索引
indices = torch.tensor([0, 2, 3])
print(f"\n花式索引 [0,2,3]:\n{tensor[indices]}")

# 2.5 高级索引函数
print(f"\n高级索引函数:")

# masked_select
mask = tensor > 15
selected = torch.masked_select(tensor, mask)
print(f"masked_select (>15): {selected}")

# index_select
selected_rows = torch.index_select(tensor, dim=0, index=torch.tensor([0, 2]))
print(f"index_select 行[0,2]:\n{selected_rows}")

# gather (复杂索引)
# 假设我们要从每行选择不同列的元素
index = torch.tensor([[0, 1, 2], [2, 3, 4], [1, 2, 3], [3, 4, 5]])
gathered = torch.gather(tensor, dim=1, index=index)
print(f"gather操作:\n{gathered}")

# 2.6 省略号索引
tensor_3d = torch.arange(60).reshape(3, 4, 5)
print(f"\n省略号索引:")
print(f"张量形状: {tensor_3d.shape}")
print(f"tensor_3d[..., 0] 形状: {tensor_3d[..., 0].shape}")

自动求导(Autograd)

梯度计算基础
概念/函数 作用 说明
requires_grad=True 启用梯度 跟踪计算历史
.backward() 反向传播 计算梯度
.grad 梯度值 参数的梯度
.grad_fn 梯度函数 创建该张量的函数
torch.no_grad() 禁用梯度 推理时使用
.detach() 分离张量 从计算图分离
torch.autograd.grad() 计算梯度 手动计算梯度
.retain_grad() 保留梯度 非叶子节点保留梯度
python 复制代码
print("=== 自动求导基础 ===")

# 3.1 基本梯度计算
x = torch.tensor(2.0, requires_grad=True)
y = torch.tensor(3.0, requires_grad=True)

# 定义计算
z = x**2 + y**3 + 10

# 反向传播
z.backward()

print(f"x = {x.item()}, y = {y.item()}")
print(f"z = x² + y³ + 10 = {z.item()}")
print(f"∂z/∂x = {x.grad.item()}")  # 2x = 4
print(f"∂z/∂y = {y.grad.item()}")  # 3y² = 27

# 3.2 多次反向传播需要清零梯度
x.grad.zero_()
y.grad.zero_()

# 重新计算
w = x*y + torch.sin(x)
w.backward()
print(f"\n清零后重新计算:")
print(f"w = x*y + sin(x) = {w.item()}")
print(f"∂w/∂x = {x.grad.item()}")  # y + cos(x) = 3 + cos(2)
print(f"∂w/∂y = {y.grad.item()}")  # x = 2

# 3.3 非标量张量的梯度
print(f"\n非标量梯度:")
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x**2

# 需要指定梯度权重
v = torch.tensor([0.1, 1.0, 0.001])
y.backward(v)
print(f"x = {x}")
print(f"y = x² = {y}")
print(f"梯度权重 v = {v}")
print(f"梯度 ∂y/∂x = {x.grad}")  # 2x * v

# 3.4 禁用梯度上下文
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)

with torch.no_grad():
    y = x * 2  # 不会跟踪梯度
    print(f"\n在no_grad上下文中的操作:")
    print(f"y.requires_grad = {y.requires_grad}")

# 3.5 分离张量
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x**2
z = y.detach()  # 分离y,不再跟踪梯度

print(f"\n张量分离:")
print(f"y.requires_grad = {y.requires_grad}")
print(f"z.requires_grad = {z.requires_grad}")

# 3.6 梯度累加
print(f"\n梯度累加:")
x = torch.tensor(2.0, requires_grad=True)

for i in range(3):
    y = x * i if i > 0 else x
    y.backward(retain_graph=True if i < 2 else False)
    print(f"第{i+1}次反向传播,梯度: {x.grad.item()}")
    
# 注意:最后一次不需要retain_graph
自定义自动求导函数
python 复制代码
print("\n=== 自定义自动求导函数 ===")

# 3.1 使用 torch.autograd.Function
class MyReLU(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        # 前向传播
        ctx.save_for_backward(input)  # 保存输入用于反向传播
        return input.clamp(min=0)
    
    @staticmethod
    def backward(ctx, grad_output):
        # 反向传播
        input, = ctx.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input

# 使用自定义函数
x = torch.randn(5, requires_grad=True)
print(f"输入: {x}")

my_relu = MyReLU.apply
y = my_relu(x)
print(f"ReLU输出: {y}")

# 计算梯度
y.sum().backward()
print(f"梯度: {x.grad}")

# 3.2 对比内置ReLU
x.grad.zero_()
y_builtin = torch.relu(x)
y_builtin.sum().backward()
print(f"内置ReLU梯度: {x.grad}")

神经网络模块(torch.nn)

层定义与使用
层类型 类名 主要参数
线性层 nn.Linear() in_features, out_features, bias
卷积层 nn.Conv2d() in_channels, out_channels, kernel_size, stride, padding
池化层 nn.MaxPool2d() kernel_size, stride, padding
激活函数 nn.ReLU() inplace
归一化 nn.BatchNorm2d() num_features
Dropout nn.Dropout() p
循环层 nn.LSTM() input_size, hidden_size, num_layers
Embedding nn.Embedding() num_embeddings, embedding_dim
python 复制代码
import torch.nn as nn

print("=== 神经网络层 ===")

# 4.1 线性层
linear = nn.Linear(in_features=10, out_features=5, bias=True)
print(f"线性层: {linear}")
print(f"权重形状: {linear.weight.shape}")  # (5, 10)
print(f"偏置形状: {linear.bias.shape}")    # (5,)

# 前向传播
x = torch.randn(3, 10)  # 批大小=3, 特征=10
y = linear(x)
print(f"输入形状: {x.shape}")
print(f"输出形状: {y.shape}")

# 4.2 卷积层(CV核心)
conv = nn.Conv2d(
    in_channels=3,      # 输入通道数(RGB图像为3)
    out_channels=16,    # 输出通道数(滤波器数量)
    kernel_size=3,      # 卷积核大小
    stride=1,           # 步长
    padding=1,          # 填充
    bias=True
)
print(f"\n卷积层: {conv}")
print(f"权重形状: {conv.weight.shape}")  # (16, 3, 3, 3)

# 卷积前向传播
image = torch.randn(4, 3, 32, 32)  # 批大小=4, 通道=3, 高=32, 宽=32
output = conv(image)
print(f"输入形状: {image.shape}")
print(f"输出形状: {output.shape}")  # (4, 16, 32, 32) 由于padding=1,尺寸不变

# 4.3 池化层
pool = nn.MaxPool2d(kernel_size=2, stride=2)
pooled = pool(output)
print(f"\n最大池化后形状: {pooled.shape}")  # (4, 16, 16, 16)

# 4.4 批量归一化
batch_norm = nn.BatchNorm2d(num_features=16)
normed = batch_norm(pooled)
print(f"批量归一化后形状: {normed.shape}")

# 4.5 Dropout
dropout = nn.Dropout(p=0.5)
dropped = dropout(normed)
print(f"Dropout后形状: {dropped.shape}")

# 4.6 激活函数
relu = nn.ReLU(inplace=True)  # inplace=True节省内存
activated = relu(dropped)
print(f"ReLU激活后形状: {activated.shape}")

# 其他激活函数
sigmoid = nn.Sigmoid()
tanh = nn.Tanh()
leaky_relu = nn.LeakyReLU(negative_slope=0.01)
softmax = nn.Softmax(dim=1)  # 沿哪个维度做softmax
损失函数
损失函数 类名 适用场景
均方误差 nn.MSELoss() 回归问题
交叉熵 nn.CrossEntropyLoss() 多分类
二元交叉熵 nn.BCELoss() 二分类
BCE with logits nn.BCEWithLogitsLoss() 二分类(数值稳定)
L1损失 nn.L1Loss() 回归,稀疏性
Huber损失 nn.SmoothL1Loss() 回归,对异常值鲁棒
KL散度 nn.KLDivLoss() 概率分布
Triplet损失 nn.TripletMarginLoss() 度量学习
Contrastive损失 nn.ContrastiveLoss() 对比学习
python 复制代码
print("\n=== 损失函数 ===")

# 4.1 回归损失
mse_loss = nn.MSELoss()
l1_loss = nn.L1Loss()
smooth_l1 = nn.SmoothL1Loss()

# 示例:图像重建
pred = torch.randn(4, 3, 32, 32)
target = torch.randn(4, 3, 32, 32)

mse = mse_loss(pred, target)
l1 = l1_loss(pred, target)
huber = smooth_l1(pred, target)

print(f"MSE损失: {mse.item():.4f}")
print(f"L1损失: {l1.item():.4f}")
print(f"Huber损失: {huber.item():.4f}")

# 4.2 分类损失
ce_loss = nn.CrossEntropyLoss()
bce_loss = nn.BCELoss()
bce_logits_loss = nn.BCEWithLogitsLoss()

# 多分类示例
logits = torch.randn(4, 10)  # 4个样本,10个类别
labels = torch.randint(0, 10, (4,))  # 真实标签

ce = ce_loss(logits, labels)
print(f"\n交叉熵损失: {ce.item():.4f}")

# 二分类示例
sigmoid_output = torch.rand(4, 1)  # 经过sigmoid的输出
binary_labels = torch.randint(0, 2, (4, 1)).float()

bce = bce_loss(sigmoid_output, binary_labels)
print(f"二元交叉熵损失: {bce.item():.4f}")

# 使用BCEWithLogitsLoss(更稳定)
logits = torch.randn(4, 1)  # 未经过sigmoid
bce_logits = bce_logits_loss(logits, binary_labels)
print(f"BCEWithLogits损失: {bce_logits.item():.4f}")

# 4.3 特定任务损失
# Triplet损失(人脸识别、度量学习)
triplet_loss = nn.TripletMarginLoss(margin=1.0)

anchor = torch.randn(4, 128)  # 锚点样本
positive = torch.randn(4, 128)  # 正样本
negative = torch.randn(4, 128)  # 负样本

triplet = triplet_loss(anchor, positive, negative)
print(f"\nTriplet损失: {triplet.item():.4f}")
优化器
优化器 类名 主要参数
SGD torch.optim.SGD() params, lr, momentum, weight_decay
Adam torch.optim.Adam() params, lr, betas, eps, weight_decay
AdamW torch.optim.AdamW() Adam with decoupled weight decay
RMSprop torch.optim.RMSprop() params, lr, alpha, weight_decay
Adagrad torch.optim.Adagrad() params, lr, lr_decay
学习率调度 各种scheduler 调整学习率
python 复制代码
import torch.optim as optim

print("\n=== 优化器 ===")

# 创建简单模型
model = nn.Sequential(
    nn.Linear(10, 20),
    nn.ReLU(),
    nn.Linear(20, 2)
)

# 4.1 SGD优化器
optimizer_sgd = optim.SGD(
    model.parameters(),
    lr=0.01,
    momentum=0.9,
    weight_decay=1e-4  # L2正则化
)
print(f"SGD优化器: {optimizer_sgd}")

# 4.2 Adam优化器
optimizer_adam = optim.Adam(
    model.parameters(),
    lr=0.001,
    betas=(0.9, 0.999),  # 一阶和二阶矩估计的衰减率
    eps=1e-8,            # 数值稳定性
    weight_decay=0
)
print(f"Adam优化器: {optimizer_adam}")

# 4.3 AdamW优化器(推荐)
optimizer_adamw = optim.AdamW(
    model.parameters(),
    lr=0.001,
    weight_decay=0.01  # 解耦权重衰减
)
print(f"AdamW优化器: {optimizer_adamw}")

# 4.4 优化器使用示例
criterion = nn.CrossEntropyLoss()

# 模拟训练步骤
for epoch in range(3):
    # 模拟数据
    inputs = torch.randn(4, 10)
    targets = torch.randint(0, 2, (4,))
    
    # 前向传播
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    
    # 反向传播
    optimizer_adamw.zero_grad()  # 清零梯度
    loss.backward()              # 计算梯度
    
    # 梯度裁剪(防止梯度爆炸)
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    
    # 参数更新
    optimizer_adamw.step()
    
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

# 4.5 学习率调度器
print(f"\n学习率调度器:")

# 创建优化器
optimizer = optim.Adam(model.parameters(), lr=0.01)

# StepLR: 每隔step_size个epoch,lr乘以gamma
scheduler_step = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# MultiStepLR: 在指定epochs调整学习率
scheduler_multi = optim.lr_scheduler.MultiStepLR(
    optimizer, milestones=[30, 80], gamma=0.1
)

# CosineAnnealingLR: 余弦退火
scheduler_cosine = optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=50, eta_min=0
)

# ReduceLROnPlateau: 当指标停止改善时降低学习率
scheduler_plateau = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.1, patience=5
)

print(f"初始学习率: {optimizer.param_groups[0]['lr']}")

# 模拟调度器使用
for epoch in range(5):
    # 模拟训练
    loss = torch.tensor(0.5 - epoch*0.1)
    
    # StepLR更新
    scheduler_step.step()
    
    # Plateau更新(需要监控指标)
    scheduler_plateau.step(loss)
    
    print(f"Epoch {epoch+1}, LR: {optimizer.param_groups[0]['lr']:.6f}")

数据加载与处理

Dataset和DataLoader
类/函数 作用 主要参数
Dataset 数据集基类 需要实现__len____getitem__
DataLoader 数据加载器 dataset, batch_size, shuffle, num_workers
TensorDataset 张量数据集 *tensors
Subset 子集 dataset, indices
random_split() 随机分割 dataset, lengths
python 复制代码
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
import torchvision.transforms as transforms

print("=== 数据加载与处理 ===")

# 5.1 自定义Dataset
class ImageDataset(Dataset):
    """自定义图像数据集"""
    
    def __init__(self, data, labels, transform=None):
        """
        参数:
            data: 图像数据,形状为(N, H, W, C)或(N, C, H, W)
            labels: 标签
            transform: 数据增强变换
        """
        self.data = data
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        image = self.data[idx]
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

# 创建模拟数据
num_samples = 100
image_data = torch.randn(num_samples, 3, 32, 32)  # 100张32x32 RGB图像
image_labels = torch.randint(0, 10, (num_samples,))  # 10个类别

# 5.2 创建Dataset实例
transform = transforms.Compose([
    transforms.ToPILImage(),           # 转为PIL图像
    transforms.RandomHorizontalFlip(), # 随机水平翻转
    transforms.RandomRotation(10),     # 随机旋转±10度
    transforms.ToTensor(),             # 转为Tensor
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # 归一化
])

dataset = ImageDataset(image_data, image_labels, transform=transform)
print(f"数据集大小: {len(dataset)}")
print(f"单个样本形状: {dataset[0][0].shape}")  # (C, H, W)

# 5.3 创建DataLoader
dataloader = DataLoader(
    dataset=dataset,
    batch_size=16,
    shuffle=True,
    num_workers=2,      # 多进程加载
    pin_memory=True     # 使用锁页内存,加速GPU传输
)

print(f"\nDataLoader信息:")
print(f"批次大小: {dataloader.batch_size}")
print(f"总批次数: {len(dataloader)}")

# 5.4 使用DataLoader
for batch_idx, (images, labels) in enumerate(dataloader):
    print(f"\n批次 {batch_idx}:")
    print(f"  图像形状: {images.shape}")  # (16, 3, 32, 32)
    print(f"  标签形状: {labels.shape}")  # (16,)
    
    if batch_idx == 2:  # 只查看前3个批次
        break

# 5.5 TensorDataset(简单情况)
tensor_dataset = TensorDataset(image_data, image_labels)
tensor_dataloader = DataLoader(tensor_dataset, batch_size=8, shuffle=True)

print(f"\nTensorDataset批次:")
for images, labels in tensor_dataloader:
    print(f"  形状: {images.shape}, {labels.shape}")
    break

# 5.6 数据集分割
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

print(f"\n数据集分割:")
print(f"训练集: {len(train_dataset)} 样本")
print(f"验证集: {len(val_dataset)} 样本")

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
数据增强(Transforms)
变换 类名 作用
裁剪 transforms.RandomCrop() 随机裁剪
翻转 transforms.RandomHorizontalFlip() 随机水平翻转
旋转 transforms.RandomRotation() 随机旋转
颜色变换 transforms.ColorJitter() 调整亮度、对比度等
归一化 transforms.Normalize() 标准化
大小调整 transforms.Resize() 调整大小
转为Tensor transforms.ToTensor() PIL/Numpy转为Tensor
随机擦除 transforms.RandomErasing() Cutout数据增强
组合变换 transforms.Compose() 组合多个变换
自定义变换 transforms.Lambda() 自定义变换
python 复制代码
import torchvision.transforms as transforms
from PIL import Image
import numpy as np

print("\n=== 数据增强变换 ===")

# 5.1 创建模拟图像(PIL格式)
np_image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
pil_image = Image.fromarray(np_image)
print(f"原始图像: {pil_image.size}, 模式: {pil_image.mode}")

# 5.2 定义增强管道
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),          # 调整大小
    transforms.RandomCrop(112),             # 随机裁剪到112x112
    transforms.RandomHorizontalFlip(p=0.5), # 50%概率水平翻转
    transforms.RandomRotation(degrees=15),  # 随机旋转±15度
    transforms.ColorJitter(
        brightness=0.2,    # 亮度调整
        contrast=0.2,      # 对比度调整
        saturation=0.2,    # 饱和度调整
        hue=0.1           # 色调调整
    ),
    transforms.ToTensor(),                  # 转为Tensor
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],        # ImageNet均值
        std=[0.229, 0.224, 0.225]          # ImageNet标准差
    ),
    transforms.RandomErasing(               # Cutout
        p=0.5,                             # 应用概率
        scale=(0.02, 0.1),                 # 擦除面积比例
        ratio=(0.3, 3.3),                  # 宽高比范围
        value=0                            # 填充值
    )
])

# 5.3 验证集变换(通常更简单)
val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.CenterCrop(112),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# 5.4 应用变换
augmented_image = train_transform(pil_image)
print(f"\n增强后图像形状: {augmented_image.shape}")  # (3, 112, 112)
print(f"像素值范围: [{augmented_image.min():.3f}, {augmented_image.max():.3f}]")

# 5.5 自定义变换
def custom_normalize(tensor):
    """自定义归一化,将值缩放到[-1, 1]"""
    return tensor * 2 - 1

custom_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(custom_normalize)
])

custom_image = custom_transform(pil_image)
print(f"\n自定义归一化范围: [{custom_image.min():.3f}, {custom_image.max():.3f}]")

# 5.6 多裁剪(常用于测试时增强)
ten_crop = transforms.TenCrop(size=112)  # 4个角+中心,再水平翻转
image_tensor = transforms.ToTensor()(pil_image.resize((224, 224)))
crops = ten_crop(image_tensor)
print(f"\nTenCrop输出数量: {len(crops)}")
print(f"每个裁剪形状: {crops[0].shape}")

模型构建

模型定义方式
方式 类/方法 适用场景
Sequential nn.Sequential() 简单线性堆叠
Module子类 class Model(nn.Module) 复杂模型
ModuleList nn.ModuleList() 动态层列表
ModuleDict nn.ModuleDict() 字典形式组织层
python 复制代码
print("=== 模型构建 ===")

# 6.1 Sequential方式(简单模型)
model_seq = nn.Sequential(
    nn.Conv2d(3, 16, kernel_size=3, padding=1),
    nn.BatchNorm2d(16),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(2),
    
    nn.Conv2d(16, 32, kernel_size=3, padding=1),
    nn.BatchNorm2d(32),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(2),
    
    nn.Flatten(),
    nn.Linear(32 * 8 * 8, 128),  # 假设输入为32x32,经过两次池化后为8x8
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(128, 10)           # 10个类别
)

print("Sequential模型:")
print(model_seq)

# 测试前向传播
x = torch.randn(4, 3, 32, 32)
output = model_seq(x)
print(f"\n输入形状: {x.shape}")
print(f"输出形状: {output.shape}")

# 6.2 Module子类方式(推荐)
class CNNClassifier(nn.Module):
    """自定义CNN分类器"""
    
    def __init__(self, num_classes=10):
        super(CNNClassifier, self).__init__()
        
        # 特征提取器
        self.features = nn.Sequential(
            # 第一个卷积块
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            
            # 第二个卷积块
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            
            # 第三个卷积块
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
        )
        
        # 分类器
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 4 * 4, 256),  # 假设输入32x32,经过3次池化后为4x4
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )
        
        # 初始化权重
        self._initialize_weights()
    
    def _initialize_weights(self):
        """初始化权重"""
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        """前向传播"""
        features = self.features(x)
        output = self.classifier(features)
        return output
    
    def get_features(self, x):
        """提取特征(用于迁移学习)"""
        return self.features(x)

# 创建模型实例
model = CNNClassifier(num_classes=10)
print(f"\n自定义CNN模型:")
print(model)

# 统计模型参数
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\n模型参数统计:")
print(f"总参数: {total_params:,}")
print(f"可训练参数: {trainable_params:,}")

# 前向传播
x = torch.randn(4, 3, 32, 32)
output = model(x)
print(f"\n输入形状: {x.shape}")
print(f"输出形状: {output.shape}")

# 提取特征
features = model.get_features(x)
print(f"特征图形状: {features.shape}")

# 6.3 ModuleList和ModuleDict
class DynamicModel(nn.Module):
    """使用ModuleList和ModuleDict的动态模型"""
    
    def __init__(self, num_layers=3, num_classes=10):
        super(DynamicModel, self).__init__()
        
        # 使用ModuleList存储动态数量的层
        self.layers = nn.ModuleList([
            nn.Linear(10, 20) if i == 0 else nn.Linear(20, 20)
            for i in range(num_layers)
        ])
        
        # 使用ModuleDict存储不同类型的层
        self.operations = nn.ModuleDict({
            'relu': nn.ReLU(),
            'dropout': nn.Dropout(0.5),
            'final': nn.Linear(20, num_classes)
        })
    
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
            x = self.operations['relu'](x)
            x = self.operations['dropout'](x)
        
        x = self.operations['final'](x)
        return x

dynamic_model = DynamicModel(num_layers=4)
print(f"\n动态模型:")
print(dynamic_model)

x = torch.randn(4, 10)
output = dynamic_model(x)
print(f"输出形状: {output.shape}")
预训练模型
python 复制代码
import torchvision.models as models

print("\n=== 预训练模型 ===")

# 6.1 加载预训练模型
# ResNet
resnet18 = models.resnet18(pretrained=True)
resnet50 = models.resnet50(pretrained=True)

# VGG
vgg16 = models.vgg16(pretrained=True)

# EfficientNet
try:
    efficientnet = models.efficientnet_b0(pretrained=True)
except:
    print("EfficientNet需要torchvision>=0.11")

# Vision Transformer (ViT)
try:
    vit = models.vit_b_16(pretrained=True)
except:
    print("ViT需要torchvision>=0.12")

print(f"ResNet18结构:")
print(resnet18)

# 6.2 修改预训练模型
num_classes = 100  # 新任务类别数

# 方法1: 替换最后一层
resnet18.fc = nn.Linear(resnet18.fc.in_features, num_classes)

# 方法2: 添加新层
class CustomResNet(nn.Module):
    def __init__(self, num_classes=100):
        super(CustomResNet, self).__init__()
        # 加载预训练特征提取器
        self.features = nn.Sequential(*list(resnet50.children())[:-1])
        # 冻结特征提取器参数
        for param in self.features.parameters():
            param.requires_grad = False
        
        # 添加自定义分类器
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(2048, 512),  # ResNet50最后一层特征维度为2048
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        features = self.features(x)
        output = self.classifier(features)
        return output

custom_resnet = CustomResNet(num_classes=100)
print(f"\n自定义ResNet:")
print(f"总参数: {sum(p.numel() for p in custom_resnet.parameters()):,}")
print(f"可训练参数: {sum(p.numel() for p in custom_resnet.parameters() if p.requires_grad):,}")

# 6.3 模型保存与加载
# 保存整个模型
torch.save(custom_resnet.state_dict(), 'model.pth')

# 保存检查点(包含优化器状态等)
checkpoint = {
    'epoch': 10,
    'model_state_dict': custom_resnet.state_dict(),
    'optimizer_state_dict': optimizer_adamw.state_dict(),
    'loss': 0.123,
}
torch.save(checkpoint, 'checkpoint.pth')

# 加载模型
loaded_model = CustomResNet(num_classes=100)
loaded_model.load_state_dict(torch.load('model.pth'))
loaded_model.eval()  # 设置为评估模式

# 加载检查点
checkpoint = torch.load('checkpoint.pth')
loaded_model.load_state_dict(checkpoint['model_state_dict'])

print(f"\n模型保存与加载完成")

训练与验证

训练循环模板
python 复制代码
print("=== 训练与验证 ===")

def train_epoch(model, dataloader, criterion, optimizer, device):
    """训练一个epoch"""
    model.train()  # 设置为训练模式
    running_loss = 0.0
    correct = 0
    total = 0
    
    for batch_idx, (inputs, targets) in enumerate(dataloader):
        # 移动数据到设备
        inputs, targets = inputs.to(device), targets.to(device)
        
        # 清零梯度
        optimizer.zero_grad()
        
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # 反向传播
        loss.backward()
        
        # 梯度裁剪
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        # 参数更新
        optimizer.step()
        
        # 统计
        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        # 打印进度
        if (batch_idx + 1) % 10 == 0:
            print(f'  批次: {batch_idx+1}/{len(dataloader)}, '
                  f'损失: {loss.item():.4f}')
    
    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc

def validate_epoch(model, dataloader, criterion, device):
    """验证一个epoch"""
    model.eval()  # 设置为评估模式
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():  # 禁用梯度计算
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            # 前向传播
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            # 统计
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc

# 7.2 完整训练流程
def train_model(model, train_loader, val_loader, criterion, optimizer, 
                scheduler, num_epochs, device):
    """完整训练流程"""
    
    train_losses, train_accs = [], []
    val_losses, val_accs = [], []
    
    best_acc = 0.0
    best_model_wts = None
    
    for epoch in range(num_epochs):
        print(f'\nEpoch {epoch+1}/{num_epochs}')
        print('-' * 40)
        
        # 训练
        train_loss, train_acc = train_epoch(
            model, train_loader, criterion, optimizer, device
        )
        
        # 验证
        val_loss, val_acc = validate_epoch(
            model, val_loader, criterion, device
        )
        
        # 学习率调度
        if scheduler is not None:
            scheduler.step(val_loss)  # 对于ReduceLROnPlateau
            # 或者 scheduler.step() 对于其他调度器
        
        # 记录结果
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        val_losses.append(val_loss)
        val_accs.append(val_acc)
        
        print(f'训练 - 损失: {train_loss:.4f}, 准确率: {train_acc:.2f}%')
        print(f'验证 - 损失: {val_loss:.4f}, 准确率: {val_acc:.2f}%')
        
        # 保存最佳模型
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = model.state_dict().copy()
            torch.save(best_model_wts, 'best_model.pth')
            print(f'  保存最佳模型,准确率: {best_acc:.2f}%')
    
    # 加载最佳模型权重
    model.load_state_dict(best_model_wts)
    
    return {
        'model': model,
        'train_losses': train_losses,
        'train_accs': train_accs,
        'val_losses': val_losses,
        'val_accs': val_accs,
        'best_acc': best_acc
    }

# 7.3 模拟训练
print("模拟训练流程:")

# 创建模拟数据
train_dataset = TensorDataset(
    torch.randn(100, 3, 32, 32),
    torch.randint(0, 10, (100,))
)
val_dataset = TensorDataset(
    torch.randn(20, 3, 32, 32),
    torch.randint(0, 10, (20,))
)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# 创建简单模型
simple_model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(3*32*32, 128),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(128, 10)
)

# 训练配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = simple_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=3
)

# 运行训练(这里只运行2个epoch作为演示)
print(f"使用设备: {device}")
results = train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    num_epochs=2,
    device=device
)

print(f"\n训练完成,最佳验证准确率: {results['best_acc']:.2f}%")
混合精度训练
python 复制代码
print("\n=== 混合精度训练 ===")

try:
    from torch.cuda.amp import autocast, GradScaler
    
    # 初始化梯度缩放器
    scaler = GradScaler()
    
    def train_epoch_amp(model, dataloader, criterion, optimizer, device, scaler):
        """使用混合精度训练的epoch"""
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()
            
            # 使用autocast进行混合精度前向传播
            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, targets)
            
            # 使用scaler进行梯度缩放和反向传播
            scaler.scale(loss).backward()
            
            # 梯度裁剪(需要先unscale)
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            # 更新参数
            scaler.step(optimizer)
            scaler.update()
            
            # 统计
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
        
        epoch_loss = running_loss / total
        epoch_acc = 100. * correct / total
        
        return epoch_loss, epoch_acc
    
    print("混合精度训练可用")
    
except ImportError:
    print("混合精度训练需要PyTorch>=1.6")
分布式训练
python 复制代码
print("\n=== 分布式训练 ===")

def setup_distributed():
    """设置分布式训练环境"""
    import os
    
    # 检查是否在分布式环境中
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        rank = int(os.environ["RANK"])
        world_size = int(os.environ['WORLD_SIZE'])
        gpu = int(os.environ['LOCAL_RANK'])
        
        torch.cuda.set_device(gpu)
        torch.distributed.init_process_group(
            backend='nccl',
            init_method='env://',
            world_size=world_size,
            rank=rank
        )
        
        return True, rank, world_size, gpu
    
    return False, 0, 1, 0

# 检查分布式环境
is_distributed, rank, world_size, gpu = setup_distributed()

if is_distributed:
    print(f"分布式训练: rank={rank}, world_size={world_size}, gpu={gpu}")
    
    # 创建分布式模型
    model = simple_model.cuda(gpu)
    model = torch.nn.parallel.DistributedDataParallel(
        model, device_ids=[gpu]
    )
    
    # 创建分布式采样器
    from torch.utils.data.distributed import DistributedSampler
    
    train_sampler = DistributedSampler(
        train_dataset,
        num_replicas=world_size,
        rank=rank
    )
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=16,
        sampler=train_sampler,
        num_workers=2
    )
    
    print(f"使用分布式数据采样器")
else:
    print("单机训练")

模型部署与推理

模型推理
python 复制代码
print("\n=== 模型推理 ===")

# 8.1 基本推理
model.eval()  # 确保模型在评估模式

# 创建测试数据
test_input = torch.randn(1, 3, 32, 32).to(device)

# 推理(不计算梯度)
with torch.no_grad():
    output = model(test_input)
    probabilities = torch.softmax(output, dim=1)
    predicted_class = torch.argmax(probabilities, dim=1)

print(f"输入形状: {test_input.shape}")
print(f"输出形状: {output.shape}")
print(f"预测类别: {predicted_class.item()}")
print(f"类别概率: {probabilities[0]}")

# 8.2 批量推理
def batch_inference(model, dataloader, device):
    """批量推理"""
    model.eval()
    all_predictions = []
    all_probabilities = []
    
    with torch.no_grad():
        for inputs, _ in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            probs = torch.softmax(outputs, dim=1)
            predictions = torch.argmax(probs, dim=1)
            
            all_predictions.append(predictions.cpu())
            all_probabilities.append(probs.cpu())
    
    all_predictions = torch.cat(all_predictions, dim=0)
    all_probabilities = torch.cat(all_probabilities, dim=0)
    
    return all_predictions, all_probabilities

# 测试批量推理
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
predictions, probabilities = batch_inference(model, val_loader, device)

print(f"\n批量推理结果:")
print(f"预测形状: {predictions.shape}")
print(f"概率形状: {probabilities.shape}")
print(f"前5个预测: {predictions[:5]}")

# 8.3 置信度阈值
confidence_threshold = 0.8
high_conf_mask = probabilities.max(dim=1)[0] > confidence_threshold
high_conf_predictions = predictions[high_conf_mask]

print(f"\n置信度阈值过滤:")
print(f"总样本数: {len(predictions)}")
print(f"高置信度样本数: {len(high_conf_predictions)}")
print(f"高置信度比例: {len(high_conf_predictions)/len(predictions):.2%}")
模型导出与部署
python 复制代码
print("\n=== 模型导出与部署 ===")

# 8.1 导出为TorchScript
# 方法1: Tracing(跟踪)
example_input = torch.randn(1, 3, 32, 32).to(device)
traced_model = torch.jit.trace(model, example_input)
traced_model.save("model_traced.pt")
print("TorchScript (Tracing) 导出完成")

# 方法2: Scripting(脚本)
# scripted_model = torch.jit.script(model)
# scripted_model.save("model_scripted.pt")
# print("TorchScript (Scripting) 导出完成")

# 8.2 加载TorchScript模型
loaded_traced_model = torch.jit.load("model_traced.pt")
loaded_traced_model.eval()

# 使用导出的模型推理
with torch.no_grad():
    traced_output = loaded_traced_model(test_input)

print(f"原始模型输出: {output[0, :3]}")
print(f"导出模型输出: {traced_output[0, :3]}")
print(f"输出是否一致: {torch.allclose(output, traced_output, rtol=1e-3)}")

# 8.3 ONNX导出(跨平台部署)
try:
    import onnx
    import onnxruntime
    
    # 导出为ONNX格式
    torch.onnx.export(
        model,                    # 模型
        test_input,               # 示例输入
        "model.onnx",             # 保存路径
        export_params=True,       # 导出参数
        opset_version=11,         # ONNX算子集版本
        do_constant_folding=True, # 优化常量
        input_names=['input'],    # 输入名称
        output_names=['output'],  # 输出名称
        dynamic_axes={            # 动态维度
            'input': {0: 'batch_size'},
            'output': {0: 'batch_size'}
        }
    )
    
    print("ONNX导出完成")
    
    # 使用ONNX Runtime推理
    ort_session = onnxruntime.InferenceSession("model.onnx")
    
    # 准备输入
    ort_inputs = {ort_session.get_inputs()[0].name: test_input.cpu().numpy()}
    
    # 推理
    ort_outputs = ort_session.run(None, ort_inputs)
    
    print(f"ONNX Runtime输出形状: {ort_outputs[0].shape}")
    
except ImportError:
    print("ONNX导出需要安装onnx和onnxruntime")

# 8.4 模型量化(减小模型大小,加速推理)
print("\n=== 模型量化 ===")

# 动态量化(推理时量化)
quantized_model = torch.quantization.quantize_dynamic(
    model,  # 原始模型
    {torch.nn.Linear},  # 需要量化的模块类型
    dtype=torch.qint8   # 量化数据类型
)

print(f"量化模型大小对比:")
print(f"  原始模型: {sum(p.numel() for p in model.parameters()):,} 参数")
print(f"  量化模型: {sum(p.numel() for p in quantized_model.parameters()):,} 参数")

# 保存量化模型
torch.save(quantized_model.state_dict(), "quantized_model.pth")

# 8.5 模型剪枝
print("\n=== 模型剪枝 ===")

# 简单的L1 unstructured剪枝
parameters_to_prune = []
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        parameters_to_prune.append((module, 'weight'))

# 应用剪枝
torch.nn.utils.prune.global_unstructured(
    parameters_to_prune,
    pruning_method=torch.nn.utils.prune.L1Unstructured,
    amount=0.2,  # 剪枝20%的参数
)

# 检查剪枝效果
pruned_params = 0
total_params = 0
for module, _ in parameters_to_prune:
    mask = module.weight_mask  # 剪枝掩码
    pruned_params += (mask == 0).sum().item()
    total_params += mask.numel()

print(f"剪枝比例: {pruned_params/total_params:.2%}")

计算机视觉特定功能

图像处理工具
python 复制代码
print("\n=== 计算机视觉特定功能 ===")

# 9.1 torchvision.transforms.functional
import torchvision.transforms.functional as F

# 创建测试图像
test_image = torch.randn(3, 224, 224)

# 图像变换
resized = F.resize(test_image, (112, 112))
cropped = F.center_crop(test_image, (200, 200))
hflipped = F.hflip(test_image)
rotated = F.rotate(test_image, angle=45)
adjusted = F.adjust_brightness(test_image, brightness_factor=1.5)

print(f"原始图像形状: {test_image.shape}")
print(f"调整大小后: {resized.shape}")
print(f"中心裁剪后: {cropped.shape}")

# 9.2 torchvision.ops(图像操作)
try:
    from torchvision.ops import nms, roi_align, roi_pool
    
    # 模拟目标检测结果
    boxes = torch.tensor([
        [10, 10, 50, 50],    # [x1, y1, x2, y2]
        [15, 15, 55, 55],
        [100, 100, 150, 150]
    ], dtype=torch.float32)
    
    scores = torch.tensor([0.9, 0.8, 0.7])
    
    # NMS(非极大值抑制)
    keep = nms(boxes, scores, iou_threshold=0.5)
    print(f"\nNMS保留的框索引: {keep}")
    
    # ROI Align
    features = torch.randn(1, 256, 32, 32)  # 特征图
    rois = torch.tensor([[0, 10, 10, 50, 50]])  # [batch_idx, x1, y1, x2, y2]
    
    roi_features = roi_align(
        features, rois,
        output_size=(7, 7),
        spatial_scale=1/16.0,  # 特征图相对于原图的比例
        sampling_ratio=2
    )
    print(f"ROI Align输出形状: {roi_features.shape}")
    
except ImportError:
    print("torchvision.ops需要torchvision>=0.3")

# 9.3 可视化工具
import matplotlib.pyplot as plt

def visualize_feature_maps(feature_maps, num_maps=16):
    """可视化特征图"""
    feature_maps = feature_maps.detach().cpu()
    
    # 选择前num_maps个特征图
    if feature_maps.shape[1] > num_maps:
        feature_maps = feature_maps[:, :num_maps, :, :]
    
    # 创建子图
    fig, axes = plt.subplots(4, 4, figsize=(12, 12))
    axes = axes.ravel()
    
    for idx in range(min(num_maps, len(axes))):
        feature_map = feature_maps[0, idx]  # 取第一个样本
        axes[idx].imshow(feature_map, cmap='viridis')
        axes[idx].axis('off')
        axes[idx].set_title(f'Channel {idx}')
    
    plt.tight_layout()
    return fig

# 测试特征图可视化
with torch.no_grad():
    test_input = torch.randn(1, 3, 32, 32)
    features = model.get_features(test_input)  # 假设模型有get_features方法

print(f"特征图形状: {features.shape}")

# 可视化(实际使用时取消注释)
# fig = visualize_feature_maps(features)
# plt.show()
常用CV模型实现
python 复制代码
print("\n=== 常用CV模型实现 ===")

# 9.1 ResNet残差块
class ResidualBlock(nn.Module):
    """ResNet残差块"""
    
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        
        self.conv1 = nn.Conv2d(
            in_channels, out_channels,
            kernel_size=3, stride=stride, padding=1, bias=False
        )
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        
        self.conv2 = nn.Conv2d(
            out_channels, out_channels,
            kernel_size=3, stride=1, padding=1, bias=False
        )
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.downsample = downsample
        
    def forward(self, x):
        identity = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.downsample is not None:
            identity = self.downsample(x)
        
        out += identity
        out = self.relu(out)
        
        return out

# 测试残差块
res_block = ResidualBlock(64, 128, stride=2)
test_input = torch.randn(4, 64, 32, 32)
output = res_block(test_input)
print(f"ResidualBlock:")
print(f"  输入形状: {test_input.shape}")
print(f"  输出形状: {output.shape}")

# 9.2 注意力机制(SE Block)
class SEBlock(nn.Module):
    """Squeeze-and-Excitation注意力块"""
    
    def __init__(self, channel, reduction=16):
        super(SEBlock, self).__init__()
        
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        b, c, _, _ = x.size()
        
        # Squeeze
        y = self.avg_pool(x).view(b, c)
        
        # Excitation
        y = self.fc(y).view(b, c, 1, 1)
        
        # Scale
        return x * y.expand_as(x)

# 测试SE Block
se_block = SEBlock(128)
se_output = se_block(output)
print(f"\nSEBlock输出形状: {se_output.shape}")

# 9.3 U-Net编码器块
class UNetEncoderBlock(nn.Module):
    """U-Net编码器块"""
    
    def __init__(self, in_channels, out_channels):
        super(UNetEncoderBlock, self).__init__()
        
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
        self.pool = nn.MaxPool2d(2)
    
    def forward(self, x):
        # 编码路径
        x = self.conv(x)
        pooled = self.pool(x)
        return x, pooled

# 9.4 深度可分离卷积
class DepthwiseSeparableConv(nn.Module):
    """深度可分离卷积(MobileNet使用)"""
    
    def __init__(self, in_channels, out_channels, stride=1):
        super(DepthwiseSeparableConv, self).__init__()
        
        # 深度卷积
        self.depthwise = nn.Conv2d(
            in_channels, in_channels,
            kernel_size=3, stride=stride,
            padding=1, groups=in_channels,  # groups=in_channels实现深度卷积
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(in_channels)
        
        # 逐点卷积
        self.pointwise = nn.Conv2d(
            in_channels, out_channels,
            kernel_size=1, bias=False
        )
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.relu = nn.ReLU(inplace=True)
    
    def forward(self, x):
        x = self.depthwise(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.pointwise(x)
        x = self.bn2(x)
        x = self.relu(x)
        
        return x

# 测试深度可分离卷积
ds_conv = DepthwiseSeparableConv(32, 64, stride=2)
test_input = torch.randn(4, 32, 32, 32)
output = ds_conv(test_input)
print(f"\nDepthwiseSeparableConv:")
print(f"  输入形状: {test_input.shape}")
print(f"  输出形状: {output.shape}")

# 统计参数数量对比
standard_conv = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
print(f"\n参数数量对比:")
print(f"  标准卷积: {sum(p.numel() for p in standard_conv.parameters()):,}")
print(f"  深度可分离卷积: {sum(p.numel() for p in ds_conv.parameters()):,}")
相关推荐
bing.shao2 小时前
文心大模型 5.0 正式版上线:用 Golang 解锁全模态 AI 工业化落地新路径
人工智能·golang·dubbo
lina_mua2 小时前
Cursor模型选择完全指南:为前端开发找到最佳AI助手
java·前端·人工智能·编辑器·visual studio
高洁012 小时前
数字孪生应用于特种设备领域的技术难点
人工智能·python·深度学习·机器学习·知识图谱
wen__xvn2 小时前
模拟题刷题1
数据结构·算法
秋92 小时前
idea中如何使用Trae AI插件,并举例说明
java·人工智能·intellij-idea
一尘之中2 小时前
认知革命:从UFO到天乘
人工智能·数据挖掘·ai写作
亲爱的非洲野猪2 小时前
1动态规划入门:从斐波那契到网格路径
算法·动态规划
Leon Cheng2 小时前
工作流引擎在AI Agent中的应用
大数据·人工智能
华南首席酱油官2 小时前
精工筑净 标杆引领:净化板厂家赋能净化彩钢板行业新高度
大数据·人工智能