Pytorch
张量(Tensor)基础
张量创建与初始化
| 方法 |
作用 |
示例 |
torch.tensor() |
从数据创建 |
torch.tensor([1,2,3]) |
torch.zeros() |
全0张量 |
torch.zeros((3,3)) |
torch.ones() |
全1张量 |
torch.ones((2,4)) |
torch.rand() |
均匀分布随机 |
torch.rand((3,3)) |
torch.randn() |
标准正态分布 |
torch.randn((100,)) |
torch.arange() |
等差张量 |
torch.arange(0, 10, 2) |
torch.linspace() |
等间隔张量 |
torch.linspace(0, 1, 5) |
torch.eye() |
单位矩阵 |
torch.eye(3) |
torch.full() |
填充张量 |
torch.full((3,3), 5) |
torch.empty() |
未初始化 |
torch.empty((3,3)) |
torch.from_numpy() |
从NumPy创建 |
torch.from_numpy(np_array) |
python
复制代码
import torch
import numpy as np
print("=== 张量创建与初始化 ===")
# 1.1 基础创建
x = torch.tensor([1, 2, 3, 4, 5])
print(f"从列表创建: {x}, dtype={x.dtype}")
# 1.2 特殊张量
zeros = torch.zeros(3, 3) # 3x3全0
ones = torch.ones(2, 4) # 2x4全1
rand_tensor = torch.rand(3, 3) # 3x3随机数[0,1)
randn_tensor = torch.randn(100) # 100个标准正态分布
eye_matrix = torch.eye(3) # 3x3单位矩阵
full_tensor = torch.full((3, 3), 255.0) # 3x3全255
print(f"\n全0张量:\n{zeros}")
print(f"\n随机张量:\n{rand_tensor}")
# 1.3 序列张量
range_tensor = torch.arange(0, 10, 2) # [0, 2, 4, 6, 8]
linspace_tensor = torch.linspace(0, 1, 5) # [0., 0.25, 0.5, 0.75, 1.]
print(f"\n等差张量: {range_tensor}")
print(f"等间隔张量: {linspace_tensor}")
# 1.4 与NumPy互转
np_array = np.array([1, 2, 3, 4, 5])
tensor_from_np = torch.from_numpy(np_array) # NumPy -> Tensor
tensor_to_np = tensor_from_np.numpy() # Tensor -> NumPy
print(f"\nNumPy数组: {np_array}")
print(f"转为Tensor: {tensor_from_np}")
print(f"转回NumPy: {tensor_to_np}")
# 1.5 数据类型和设备
x_float = torch.tensor([1, 2, 3], dtype=torch.float32)
x_double = torch.tensor([1, 2, 3], dtype=torch.float64)
x_int = torch.tensor([1, 2, 3], dtype=torch.int32)
x_long = torch.tensor([1, 2, 3], dtype=torch.int64)
print(f"\n数据类型:")
print(f"float32: {x_float.dtype}")
print(f"float64: {x_double.dtype}")
print(f"int32: {x_int.dtype}")
print(f"int64: {x_long.dtype}")
# 1.6 设备(CPU/GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\n当前设备: {device}")
if torch.cuda.is_available():
x_gpu = torch.tensor([1, 2, 3]).cuda() # 移动到GPU
print(f"GPU张量: {x_gpu.device}")
张量属性与操作
| 属性/方法 |
作用 |
示例 |
.shape / .size() |
形状 |
tensor.shape |
.dtype |
数据类型 |
tensor.dtype |
.device |
设备 |
tensor.device |
.ndim |
维度数 |
tensor.ndim |
.numel() |
元素总数 |
tensor.numel() |
.reshape() |
重塑形状 |
tensor.reshape((3,4)) |
.view() |
视图重塑 |
tensor.view(-1, 16) |
.squeeze() |
压缩维度 |
tensor.squeeze() |
.unsqueeze() |
增加维度 |
tensor.unsqueeze(0) |
.permute() |
维度重排 |
tensor.permute(2,0,1) |
.transpose() |
转置 |
tensor.transpose(0,1) |
.t() |
矩阵转置 |
tensor.t() |
.contiguous() |
连续内存 |
tensor.contiguous() |
python
复制代码
print("\n=== 张量属性与操作 ===")
# 创建测试张量
tensor = torch.randn(3, 4, 5) # 3x4x5张量
# 1.1 基本属性
print(f"形状: {tensor.shape}")
print(f"数据类型: {tensor.dtype}")
print(f"设备: {tensor.device}")
print(f"维度数: {tensor.ndim}")
print(f"元素总数: {tensor.numel()}")
# 1.2 形状操作
# 重塑
reshaped = tensor.reshape(12, 5) # 改为12x5
print(f"\n重塑为(12,5): {reshaped.shape}")
# 视图(共享内存)
viewed = tensor.view(-1, 20) # -1表示自动计算
print(f"视图为(-1,20): {viewed.shape}")
# 压缩/增加维度
# 假设有一个维度为1的张量
tensor_1d = torch.randn(1, 3, 1, 5)
squeezed = tensor_1d.squeeze() # 压缩所有维度为1的
unsqueezed = squeezed.unsqueeze(0) # 在第0维增加维度
print(f"\n原始形状: {tensor_1d.shape}")
print(f"压缩后: {squeezed.shape}")
print(f"增加维度后: {unsqueezed.shape}")
# 1.3 维度重排
# 图像数据通常需要调整维度顺序
image_tensor = torch.randn(3, 224, 224) # (通道, 高, 宽)
# 转为PyTorch期望的(N, C, H, W)格式
batch_tensor = image_tensor.unsqueeze(0) # (1, 3, 224, 224)
print(f"\n图像张量形状: {image_tensor.shape}")
print(f"批次格式: {batch_tensor.shape}")
# 维度重排(通道最后 -> 通道最先)
hwc_tensor = torch.randn(224, 224, 3) # 高度, 宽度, 通道
chw_tensor = hwc_tensor.permute(2, 0, 1) # 通道, 高度, 宽度
print(f"\nHWC格式: {hwc_tensor.shape}")
print(f"CHW格式: {chw_tensor.shape}")
# 1.4 转置
matrix = torch.randn(3, 4)
transposed = matrix.transpose(0, 1) # 或 matrix.T
print(f"\n矩阵形状: {matrix.shape}")
print(f"转置形状: {transposed.shape}")
# 1.5 内存连续性
# 某些操作后张量可能不连续
non_contiguous = transposed # transpose操作产生不连续张量
is_contiguous = non_contiguous.is_contiguous()
print(f"\n张量是否连续: {is_contiguous}")
# 转为连续内存
contiguous_tensor = non_contiguous.contiguous()
print(f"转为连续后: {contiguous_tensor.is_contiguous()}")
张量运算
基本数学运算
| 运算 |
作用 |
示例 |
+, -, *, / |
基本运算 |
a + b, a * b |
@, torch.matmul() |
矩阵乘法 |
a @ b |
torch.mm() |
矩阵乘法(2D) |
torch.mm(a, b) |
torch.add() |
加法 |
torch.add(a, b) |
torch.mul() |
乘法 |
torch.mul(a, b) |
torch.div() |
除法 |
torch.div(a, b) |
torch.pow() |
幂运算 |
torch.pow(a, 2) |
torch.sqrt() |
平方根 |
torch.sqrt(a) |
torch.exp() |
指数 |
torch.exp(a) |
torch.log() |
对数 |
torch.log(a) |
torch.abs() |
绝对值 |
torch.abs(a) |
torch.clamp() |
限幅 |
torch.clamp(a, min, max) |
python
复制代码
print("=== 张量基本运算 ===")
# 创建测试张量
a = torch.tensor([1.0, 2.0, 3.0, 4.0])
b = torch.tensor([5.0, 6.0, 7.0, 8.0])
# 2.1 基础运算
print("基础运算:")
print(f"a + b = {a + b}")
print(f"a - b = {a - b}")
print(f"a * b = {a * b}")
print(f"a / b = {a / b}")
print(f"a ** 2 = {a ** 2}")
# 2.2 函数运算
print("\n函数运算:")
print(f"平方根 sqrt(a): {torch.sqrt(a)}")
print(f"指数 exp(a): {torch.exp(a)}")
print(f"自然对数 log(a): {torch.log(a)}")
print(f"绝对值 abs(-a): {torch.abs(-a)}")
# 2.3 限幅操作(常用于图像处理)
values = torch.tensor([-2.0, -1.0, 0.0, 1.0, 2.0])
clamped = torch.clamp(values, min=-1.0, max=1.0)
print(f"\n限幅 [-1, 1]: {clamped}")
# 2.4 矩阵运算
matrix_a = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
matrix_b = torch.tensor([[5.0, 6.0], [7.0, 8.0]])
print(f"\n矩阵乘法:")
print(f"a @ b = \n{a @ b}") # 向量点积
print(f"\nmatrix_a @ matrix_b = \n{matrix_a @ matrix_b}")
print(f"\ntorch.matmul(matrix_a, matrix_b) = \n{torch.matmul(matrix_a, matrix_b)}")
# 2.5 广播机制
vector = torch.tensor([1, 2, 3])
matrix = torch.tensor([[1, 2, 3], [4, 5, 6]])
result = matrix + vector # vector广播到每行
print(f"\n广播运算:")
print(f"matrix:\n{matrix}")
print(f"vector: {vector}")
print(f"matrix + vector:\n{result}")
统计运算
| 函数 |
作用 |
示例 |
torch.sum() |
求和 |
torch.sum(tensor) |
torch.mean() |
平均值 |
torch.mean(tensor) |
torch.std() |
标准差 |
torch.std(tensor) |
torch.var() |
方差 |
torch.var(tensor) |
torch.min() |
最小值 |
torch.min(tensor) |
torch.max() |
最大值 |
torch.max(tensor) |
torch.argmin() |
最小值索引 |
torch.argmin(tensor) |
torch.argmax() |
最大值索引 |
torch.argmax(tensor) |
torch.median() |
中位数 |
torch.median(tensor) |
torch.sort() |
排序 |
torch.sort(tensor) |
torch.topk() |
前k个值 |
torch.topk(tensor, k) |
torch.unique() |
唯一值 |
torch.unique(tensor) |
python
复制代码
print("\n=== 统计运算 ===")
# 创建测试数据
data = torch.randn(100) # 100个正态分布随机数
matrix = torch.randn(3, 4)
print("统计运算:")
print(f"数据形状: {data.shape}")
print(f"总和: {torch.sum(data):.3f}")
print(f"平均值: {torch.mean(data):.3f}")
print(f"标准差: {torch.std(data):.3f}")
print(f"方差: {torch.var(data):.3f}")
print(f"最小值: {torch.min(data):.3f}")
print(f"最大值: {torch.max(data):.3f}")
# 维度统计
print(f"\n矩阵统计:")
print(f"原始矩阵:\n{matrix}")
print(f"按行求和: {torch.sum(matrix, dim=1)}")
print(f"按列求平均: {torch.mean(matrix, dim=0)}")
# 索引统计
print(f"\n索引统计:")
values, indices = torch.max(matrix, dim=1) # 每行最大值和索引
print(f"每行最大值: {values}")
print(f"每行最大值索引: {indices}")
# 排序
print(f"\n排序:")
sorted_values, sorted_indices = torch.sort(matrix, dim=1, descending=True)
print(f"每行降序排序:\n{sorted_values}")
print(f"排序索引:\n{sorted_indices}")
# Top-k
print(f"\nTop-k:")
top_values, top_indices = torch.topk(matrix, k=2, dim=1)
print(f"每行前2个值:\n{top_values}")
print(f"每行前2个索引:\n{top_indices}")
# 唯一值
tensor_with_dups = torch.tensor([1, 2, 2, 3, 3, 3, 4])
unique_values = torch.unique(tensor_with_dups)
print(f"\n唯一值: {unique_values}")
张量索引与切片
| 操作 |
语法 |
说明 |
| 基本索引 |
tensor[index] |
索引 |
| 切片 |
tensor[start:end:step] |
切片 |
| 布尔索引 |
tensor[mask] |
布尔索引 |
| 花式索引 |
tensor[[indices]] |
索引列表 |
: |
tensor[:] |
所有元素 |
... |
tensor[..., 0] |
省略号 |
torch.masked_select() |
掩码选择 |
torch.masked_select(tensor, mask) |
torch.gather() |
聚集 |
torch.gather(tensor, dim, index) |
torch.index_select() |
索引选择 |
torch.index_select(tensor, dim, index) |
python
复制代码
print("\n=== 张量索引与切片 ===")
# 创建测试张量
tensor = torch.arange(24).reshape(4, 6)
print(f"原始张量:\n{tensor}")
# 2.1 基本索引
print(f"\n基本索引:")
print(f"第一行: {tensor[0]}")
print(f"第二列: {tensor[:, 1]}")
print(f"元素[2,3]: {tensor[2, 3]}")
# 2.2 切片
print(f"\n切片:")
print(f"行1-2, 列2-4:\n{tensor[1:3, 2:5]}")
print(f"每隔一行:\n{tensor[::2]}")
# 2.3 布尔索引
mask = tensor > 10
print(f"\n布尔索引 (tensor > 10):\n{tensor[mask]}")
# 2.4 花式索引
indices = torch.tensor([0, 2, 3])
print(f"\n花式索引 [0,2,3]:\n{tensor[indices]}")
# 2.5 高级索引函数
print(f"\n高级索引函数:")
# masked_select
mask = tensor > 15
selected = torch.masked_select(tensor, mask)
print(f"masked_select (>15): {selected}")
# index_select
selected_rows = torch.index_select(tensor, dim=0, index=torch.tensor([0, 2]))
print(f"index_select 行[0,2]:\n{selected_rows}")
# gather (复杂索引)
# 假设我们要从每行选择不同列的元素
index = torch.tensor([[0, 1, 2], [2, 3, 4], [1, 2, 3], [3, 4, 5]])
gathered = torch.gather(tensor, dim=1, index=index)
print(f"gather操作:\n{gathered}")
# 2.6 省略号索引
tensor_3d = torch.arange(60).reshape(3, 4, 5)
print(f"\n省略号索引:")
print(f"张量形状: {tensor_3d.shape}")
print(f"tensor_3d[..., 0] 形状: {tensor_3d[..., 0].shape}")
自动求导(Autograd)
梯度计算基础
| 概念/函数 |
作用 |
说明 |
requires_grad=True |
启用梯度 |
跟踪计算历史 |
.backward() |
反向传播 |
计算梯度 |
.grad |
梯度值 |
参数的梯度 |
.grad_fn |
梯度函数 |
创建该张量的函数 |
torch.no_grad() |
禁用梯度 |
推理时使用 |
.detach() |
分离张量 |
从计算图分离 |
torch.autograd.grad() |
计算梯度 |
手动计算梯度 |
.retain_grad() |
保留梯度 |
非叶子节点保留梯度 |
python
复制代码
print("=== 自动求导基础 ===")
# 3.1 基本梯度计算
x = torch.tensor(2.0, requires_grad=True)
y = torch.tensor(3.0, requires_grad=True)
# 定义计算
z = x**2 + y**3 + 10
# 反向传播
z.backward()
print(f"x = {x.item()}, y = {y.item()}")
print(f"z = x² + y³ + 10 = {z.item()}")
print(f"∂z/∂x = {x.grad.item()}") # 2x = 4
print(f"∂z/∂y = {y.grad.item()}") # 3y² = 27
# 3.2 多次反向传播需要清零梯度
x.grad.zero_()
y.grad.zero_()
# 重新计算
w = x*y + torch.sin(x)
w.backward()
print(f"\n清零后重新计算:")
print(f"w = x*y + sin(x) = {w.item()}")
print(f"∂w/∂x = {x.grad.item()}") # y + cos(x) = 3 + cos(2)
print(f"∂w/∂y = {y.grad.item()}") # x = 2
# 3.3 非标量张量的梯度
print(f"\n非标量梯度:")
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x**2
# 需要指定梯度权重
v = torch.tensor([0.1, 1.0, 0.001])
y.backward(v)
print(f"x = {x}")
print(f"y = x² = {y}")
print(f"梯度权重 v = {v}")
print(f"梯度 ∂y/∂x = {x.grad}") # 2x * v
# 3.4 禁用梯度上下文
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
with torch.no_grad():
y = x * 2 # 不会跟踪梯度
print(f"\n在no_grad上下文中的操作:")
print(f"y.requires_grad = {y.requires_grad}")
# 3.5 分离张量
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x**2
z = y.detach() # 分离y,不再跟踪梯度
print(f"\n张量分离:")
print(f"y.requires_grad = {y.requires_grad}")
print(f"z.requires_grad = {z.requires_grad}")
# 3.6 梯度累加
print(f"\n梯度累加:")
x = torch.tensor(2.0, requires_grad=True)
for i in range(3):
y = x * i if i > 0 else x
y.backward(retain_graph=True if i < 2 else False)
print(f"第{i+1}次反向传播,梯度: {x.grad.item()}")
# 注意:最后一次不需要retain_graph
自定义自动求导函数
python
复制代码
print("\n=== 自定义自动求导函数 ===")
# 3.1 使用 torch.autograd.Function
class MyReLU(torch.autograd.Function):
@staticmethod
def forward(ctx, input):
# 前向传播
ctx.save_for_backward(input) # 保存输入用于反向传播
return input.clamp(min=0)
@staticmethod
def backward(ctx, grad_output):
# 反向传播
input, = ctx.saved_tensors
grad_input = grad_output.clone()
grad_input[input < 0] = 0
return grad_input
# 使用自定义函数
x = torch.randn(5, requires_grad=True)
print(f"输入: {x}")
my_relu = MyReLU.apply
y = my_relu(x)
print(f"ReLU输出: {y}")
# 计算梯度
y.sum().backward()
print(f"梯度: {x.grad}")
# 3.2 对比内置ReLU
x.grad.zero_()
y_builtin = torch.relu(x)
y_builtin.sum().backward()
print(f"内置ReLU梯度: {x.grad}")
神经网络模块(torch.nn)
层定义与使用
| 层类型 |
类名 |
主要参数 |
| 线性层 |
nn.Linear() |
in_features, out_features, bias |
| 卷积层 |
nn.Conv2d() |
in_channels, out_channels, kernel_size, stride, padding |
| 池化层 |
nn.MaxPool2d() |
kernel_size, stride, padding |
| 激活函数 |
nn.ReLU() |
inplace |
| 归一化 |
nn.BatchNorm2d() |
num_features |
| Dropout |
nn.Dropout() |
p |
| 循环层 |
nn.LSTM() |
input_size, hidden_size, num_layers |
| Embedding |
nn.Embedding() |
num_embeddings, embedding_dim |
python
复制代码
import torch.nn as nn
print("=== 神经网络层 ===")
# 4.1 线性层
linear = nn.Linear(in_features=10, out_features=5, bias=True)
print(f"线性层: {linear}")
print(f"权重形状: {linear.weight.shape}") # (5, 10)
print(f"偏置形状: {linear.bias.shape}") # (5,)
# 前向传播
x = torch.randn(3, 10) # 批大小=3, 特征=10
y = linear(x)
print(f"输入形状: {x.shape}")
print(f"输出形状: {y.shape}")
# 4.2 卷积层(CV核心)
conv = nn.Conv2d(
in_channels=3, # 输入通道数(RGB图像为3)
out_channels=16, # 输出通道数(滤波器数量)
kernel_size=3, # 卷积核大小
stride=1, # 步长
padding=1, # 填充
bias=True
)
print(f"\n卷积层: {conv}")
print(f"权重形状: {conv.weight.shape}") # (16, 3, 3, 3)
# 卷积前向传播
image = torch.randn(4, 3, 32, 32) # 批大小=4, 通道=3, 高=32, 宽=32
output = conv(image)
print(f"输入形状: {image.shape}")
print(f"输出形状: {output.shape}") # (4, 16, 32, 32) 由于padding=1,尺寸不变
# 4.3 池化层
pool = nn.MaxPool2d(kernel_size=2, stride=2)
pooled = pool(output)
print(f"\n最大池化后形状: {pooled.shape}") # (4, 16, 16, 16)
# 4.4 批量归一化
batch_norm = nn.BatchNorm2d(num_features=16)
normed = batch_norm(pooled)
print(f"批量归一化后形状: {normed.shape}")
# 4.5 Dropout
dropout = nn.Dropout(p=0.5)
dropped = dropout(normed)
print(f"Dropout后形状: {dropped.shape}")
# 4.6 激活函数
relu = nn.ReLU(inplace=True) # inplace=True节省内存
activated = relu(dropped)
print(f"ReLU激活后形状: {activated.shape}")
# 其他激活函数
sigmoid = nn.Sigmoid()
tanh = nn.Tanh()
leaky_relu = nn.LeakyReLU(negative_slope=0.01)
softmax = nn.Softmax(dim=1) # 沿哪个维度做softmax
损失函数
| 损失函数 |
类名 |
适用场景 |
| 均方误差 |
nn.MSELoss() |
回归问题 |
| 交叉熵 |
nn.CrossEntropyLoss() |
多分类 |
| 二元交叉熵 |
nn.BCELoss() |
二分类 |
| BCE with logits |
nn.BCEWithLogitsLoss() |
二分类(数值稳定) |
| L1损失 |
nn.L1Loss() |
回归,稀疏性 |
| Huber损失 |
nn.SmoothL1Loss() |
回归,对异常值鲁棒 |
| KL散度 |
nn.KLDivLoss() |
概率分布 |
| Triplet损失 |
nn.TripletMarginLoss() |
度量学习 |
| Contrastive损失 |
nn.ContrastiveLoss() |
对比学习 |
python
复制代码
print("\n=== 损失函数 ===")
# 4.1 回归损失
mse_loss = nn.MSELoss()
l1_loss = nn.L1Loss()
smooth_l1 = nn.SmoothL1Loss()
# 示例:图像重建
pred = torch.randn(4, 3, 32, 32)
target = torch.randn(4, 3, 32, 32)
mse = mse_loss(pred, target)
l1 = l1_loss(pred, target)
huber = smooth_l1(pred, target)
print(f"MSE损失: {mse.item():.4f}")
print(f"L1损失: {l1.item():.4f}")
print(f"Huber损失: {huber.item():.4f}")
# 4.2 分类损失
ce_loss = nn.CrossEntropyLoss()
bce_loss = nn.BCELoss()
bce_logits_loss = nn.BCEWithLogitsLoss()
# 多分类示例
logits = torch.randn(4, 10) # 4个样本,10个类别
labels = torch.randint(0, 10, (4,)) # 真实标签
ce = ce_loss(logits, labels)
print(f"\n交叉熵损失: {ce.item():.4f}")
# 二分类示例
sigmoid_output = torch.rand(4, 1) # 经过sigmoid的输出
binary_labels = torch.randint(0, 2, (4, 1)).float()
bce = bce_loss(sigmoid_output, binary_labels)
print(f"二元交叉熵损失: {bce.item():.4f}")
# 使用BCEWithLogitsLoss(更稳定)
logits = torch.randn(4, 1) # 未经过sigmoid
bce_logits = bce_logits_loss(logits, binary_labels)
print(f"BCEWithLogits损失: {bce_logits.item():.4f}")
# 4.3 特定任务损失
# Triplet损失(人脸识别、度量学习)
triplet_loss = nn.TripletMarginLoss(margin=1.0)
anchor = torch.randn(4, 128) # 锚点样本
positive = torch.randn(4, 128) # 正样本
negative = torch.randn(4, 128) # 负样本
triplet = triplet_loss(anchor, positive, negative)
print(f"\nTriplet损失: {triplet.item():.4f}")
优化器
| 优化器 |
类名 |
主要参数 |
| SGD |
torch.optim.SGD() |
params, lr, momentum, weight_decay |
| Adam |
torch.optim.Adam() |
params, lr, betas, eps, weight_decay |
| AdamW |
torch.optim.AdamW() |
Adam with decoupled weight decay |
| RMSprop |
torch.optim.RMSprop() |
params, lr, alpha, weight_decay |
| Adagrad |
torch.optim.Adagrad() |
params, lr, lr_decay |
| 学习率调度 |
各种scheduler |
调整学习率 |
python
复制代码
import torch.optim as optim
print("\n=== 优化器 ===")
# 创建简单模型
model = nn.Sequential(
nn.Linear(10, 20),
nn.ReLU(),
nn.Linear(20, 2)
)
# 4.1 SGD优化器
optimizer_sgd = optim.SGD(
model.parameters(),
lr=0.01,
momentum=0.9,
weight_decay=1e-4 # L2正则化
)
print(f"SGD优化器: {optimizer_sgd}")
# 4.2 Adam优化器
optimizer_adam = optim.Adam(
model.parameters(),
lr=0.001,
betas=(0.9, 0.999), # 一阶和二阶矩估计的衰减率
eps=1e-8, # 数值稳定性
weight_decay=0
)
print(f"Adam优化器: {optimizer_adam}")
# 4.3 AdamW优化器(推荐)
optimizer_adamw = optim.AdamW(
model.parameters(),
lr=0.001,
weight_decay=0.01 # 解耦权重衰减
)
print(f"AdamW优化器: {optimizer_adamw}")
# 4.4 优化器使用示例
criterion = nn.CrossEntropyLoss()
# 模拟训练步骤
for epoch in range(3):
# 模拟数据
inputs = torch.randn(4, 10)
targets = torch.randint(0, 2, (4,))
# 前向传播
outputs = model(inputs)
loss = criterion(outputs, targets)
# 反向传播
optimizer_adamw.zero_grad() # 清零梯度
loss.backward() # 计算梯度
# 梯度裁剪(防止梯度爆炸)
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
# 参数更新
optimizer_adamw.step()
print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")
# 4.5 学习率调度器
print(f"\n学习率调度器:")
# 创建优化器
optimizer = optim.Adam(model.parameters(), lr=0.01)
# StepLR: 每隔step_size个epoch,lr乘以gamma
scheduler_step = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
# MultiStepLR: 在指定epochs调整学习率
scheduler_multi = optim.lr_scheduler.MultiStepLR(
optimizer, milestones=[30, 80], gamma=0.1
)
# CosineAnnealingLR: 余弦退火
scheduler_cosine = optim.lr_scheduler.CosineAnnealingLR(
optimizer, T_max=50, eta_min=0
)
# ReduceLROnPlateau: 当指标停止改善时降低学习率
scheduler_plateau = optim.lr_scheduler.ReduceLROnPlateau(
optimizer, mode='min', factor=0.1, patience=5
)
print(f"初始学习率: {optimizer.param_groups[0]['lr']}")
# 模拟调度器使用
for epoch in range(5):
# 模拟训练
loss = torch.tensor(0.5 - epoch*0.1)
# StepLR更新
scheduler_step.step()
# Plateau更新(需要监控指标)
scheduler_plateau.step(loss)
print(f"Epoch {epoch+1}, LR: {optimizer.param_groups[0]['lr']:.6f}")
数据加载与处理
Dataset和DataLoader
| 类/函数 |
作用 |
主要参数 |
Dataset |
数据集基类 |
需要实现__len__和__getitem__ |
DataLoader |
数据加载器 |
dataset, batch_size, shuffle, num_workers |
TensorDataset |
张量数据集 |
*tensors |
Subset |
子集 |
dataset, indices |
random_split() |
随机分割 |
dataset, lengths |
python
复制代码
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
import torchvision.transforms as transforms
print("=== 数据加载与处理 ===")
# 5.1 自定义Dataset
class ImageDataset(Dataset):
"""自定义图像数据集"""
def __init__(self, data, labels, transform=None):
"""
参数:
data: 图像数据,形状为(N, H, W, C)或(N, C, H, W)
labels: 标签
transform: 数据增强变换
"""
self.data = data
self.labels = labels
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
image = self.data[idx]
label = self.labels[idx]
if self.transform:
image = self.transform(image)
return image, label
# 创建模拟数据
num_samples = 100
image_data = torch.randn(num_samples, 3, 32, 32) # 100张32x32 RGB图像
image_labels = torch.randint(0, 10, (num_samples,)) # 10个类别
# 5.2 创建Dataset实例
transform = transforms.Compose([
transforms.ToPILImage(), # 转为PIL图像
transforms.RandomHorizontalFlip(), # 随机水平翻转
transforms.RandomRotation(10), # 随机旋转±10度
transforms.ToTensor(), # 转为Tensor
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) # 归一化
])
dataset = ImageDataset(image_data, image_labels, transform=transform)
print(f"数据集大小: {len(dataset)}")
print(f"单个样本形状: {dataset[0][0].shape}") # (C, H, W)
# 5.3 创建DataLoader
dataloader = DataLoader(
dataset=dataset,
batch_size=16,
shuffle=True,
num_workers=2, # 多进程加载
pin_memory=True # 使用锁页内存,加速GPU传输
)
print(f"\nDataLoader信息:")
print(f"批次大小: {dataloader.batch_size}")
print(f"总批次数: {len(dataloader)}")
# 5.4 使用DataLoader
for batch_idx, (images, labels) in enumerate(dataloader):
print(f"\n批次 {batch_idx}:")
print(f" 图像形状: {images.shape}") # (16, 3, 32, 32)
print(f" 标签形状: {labels.shape}") # (16,)
if batch_idx == 2: # 只查看前3个批次
break
# 5.5 TensorDataset(简单情况)
tensor_dataset = TensorDataset(image_data, image_labels)
tensor_dataloader = DataLoader(tensor_dataset, batch_size=8, shuffle=True)
print(f"\nTensorDataset批次:")
for images, labels in tensor_dataloader:
print(f" 形状: {images.shape}, {labels.shape}")
break
# 5.6 数据集分割
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
print(f"\n数据集分割:")
print(f"训练集: {len(train_dataset)} 样本")
print(f"验证集: {len(val_dataset)} 样本")
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
| 变换 |
类名 |
作用 |
| 裁剪 |
transforms.RandomCrop() |
随机裁剪 |
| 翻转 |
transforms.RandomHorizontalFlip() |
随机水平翻转 |
| 旋转 |
transforms.RandomRotation() |
随机旋转 |
| 颜色变换 |
transforms.ColorJitter() |
调整亮度、对比度等 |
| 归一化 |
transforms.Normalize() |
标准化 |
| 大小调整 |
transforms.Resize() |
调整大小 |
| 转为Tensor |
transforms.ToTensor() |
PIL/Numpy转为Tensor |
| 随机擦除 |
transforms.RandomErasing() |
Cutout数据增强 |
| 组合变换 |
transforms.Compose() |
组合多个变换 |
| 自定义变换 |
transforms.Lambda() |
自定义变换 |
python
复制代码
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
print("\n=== 数据增强变换 ===")
# 5.1 创建模拟图像(PIL格式)
np_image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
pil_image = Image.fromarray(np_image)
print(f"原始图像: {pil_image.size}, 模式: {pil_image.mode}")
# 5.2 定义增强管道
train_transform = transforms.Compose([
transforms.Resize((128, 128)), # 调整大小
transforms.RandomCrop(112), # 随机裁剪到112x112
transforms.RandomHorizontalFlip(p=0.5), # 50%概率水平翻转
transforms.RandomRotation(degrees=15), # 随机旋转±15度
transforms.ColorJitter(
brightness=0.2, # 亮度调整
contrast=0.2, # 对比度调整
saturation=0.2, # 饱和度调整
hue=0.1 # 色调调整
),
transforms.ToTensor(), # 转为Tensor
transforms.Normalize(
mean=[0.485, 0.456, 0.406], # ImageNet均值
std=[0.229, 0.224, 0.225] # ImageNet标准差
),
transforms.RandomErasing( # Cutout
p=0.5, # 应用概率
scale=(0.02, 0.1), # 擦除面积比例
ratio=(0.3, 3.3), # 宽高比范围
value=0 # 填充值
)
])
# 5.3 验证集变换(通常更简单)
val_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.CenterCrop(112),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
# 5.4 应用变换
augmented_image = train_transform(pil_image)
print(f"\n增强后图像形状: {augmented_image.shape}") # (3, 112, 112)
print(f"像素值范围: [{augmented_image.min():.3f}, {augmented_image.max():.3f}]")
# 5.5 自定义变换
def custom_normalize(tensor):
"""自定义归一化,将值缩放到[-1, 1]"""
return tensor * 2 - 1
custom_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Lambda(custom_normalize)
])
custom_image = custom_transform(pil_image)
print(f"\n自定义归一化范围: [{custom_image.min():.3f}, {custom_image.max():.3f}]")
# 5.6 多裁剪(常用于测试时增强)
ten_crop = transforms.TenCrop(size=112) # 4个角+中心,再水平翻转
image_tensor = transforms.ToTensor()(pil_image.resize((224, 224)))
crops = ten_crop(image_tensor)
print(f"\nTenCrop输出数量: {len(crops)}")
print(f"每个裁剪形状: {crops[0].shape}")
模型构建
模型定义方式
| 方式 |
类/方法 |
适用场景 |
| Sequential |
nn.Sequential() |
简单线性堆叠 |
| Module子类 |
class Model(nn.Module) |
复杂模型 |
| ModuleList |
nn.ModuleList() |
动态层列表 |
| ModuleDict |
nn.ModuleDict() |
字典形式组织层 |
python
复制代码
print("=== 模型构建 ===")
# 6.1 Sequential方式(简单模型)
model_seq = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(inplace=True),
nn.MaxPool2d(2),
nn.Conv2d(16, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(32 * 8 * 8, 128), # 假设输入为32x32,经过两次池化后为8x8
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(128, 10) # 10个类别
)
print("Sequential模型:")
print(model_seq)
# 测试前向传播
x = torch.randn(4, 3, 32, 32)
output = model_seq(x)
print(f"\n输入形状: {x.shape}")
print(f"输出形状: {output.shape}")
# 6.2 Module子类方式(推荐)
class CNNClassifier(nn.Module):
"""自定义CNN分类器"""
def __init__(self, num_classes=10):
super(CNNClassifier, self).__init__()
# 特征提取器
self.features = nn.Sequential(
# 第一个卷积块
nn.Conv2d(3, 16, kernel_size=3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(inplace=True),
nn.MaxPool2d(2),
# 第二个卷积块
nn.Conv2d(16, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(2),
# 第三个卷积块
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(2),
)
# 分类器
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(64 * 4 * 4, 256), # 假设输入32x32,经过3次池化后为4x4
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(128, num_classes)
)
# 初始化权重
self._initialize_weights()
def _initialize_weights(self):
"""初始化权重"""
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def forward(self, x):
"""前向传播"""
features = self.features(x)
output = self.classifier(features)
return output
def get_features(self, x):
"""提取特征(用于迁移学习)"""
return self.features(x)
# 创建模型实例
model = CNNClassifier(num_classes=10)
print(f"\n自定义CNN模型:")
print(model)
# 统计模型参数
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\n模型参数统计:")
print(f"总参数: {total_params:,}")
print(f"可训练参数: {trainable_params:,}")
# 前向传播
x = torch.randn(4, 3, 32, 32)
output = model(x)
print(f"\n输入形状: {x.shape}")
print(f"输出形状: {output.shape}")
# 提取特征
features = model.get_features(x)
print(f"特征图形状: {features.shape}")
# 6.3 ModuleList和ModuleDict
class DynamicModel(nn.Module):
"""使用ModuleList和ModuleDict的动态模型"""
def __init__(self, num_layers=3, num_classes=10):
super(DynamicModel, self).__init__()
# 使用ModuleList存储动态数量的层
self.layers = nn.ModuleList([
nn.Linear(10, 20) if i == 0 else nn.Linear(20, 20)
for i in range(num_layers)
])
# 使用ModuleDict存储不同类型的层
self.operations = nn.ModuleDict({
'relu': nn.ReLU(),
'dropout': nn.Dropout(0.5),
'final': nn.Linear(20, num_classes)
})
def forward(self, x):
for layer in self.layers:
x = layer(x)
x = self.operations['relu'](x)
x = self.operations['dropout'](x)
x = self.operations['final'](x)
return x
dynamic_model = DynamicModel(num_layers=4)
print(f"\n动态模型:")
print(dynamic_model)
x = torch.randn(4, 10)
output = dynamic_model(x)
print(f"输出形状: {output.shape}")
预训练模型
python
复制代码
import torchvision.models as models
print("\n=== 预训练模型 ===")
# 6.1 加载预训练模型
# ResNet
resnet18 = models.resnet18(pretrained=True)
resnet50 = models.resnet50(pretrained=True)
# VGG
vgg16 = models.vgg16(pretrained=True)
# EfficientNet
try:
efficientnet = models.efficientnet_b0(pretrained=True)
except:
print("EfficientNet需要torchvision>=0.11")
# Vision Transformer (ViT)
try:
vit = models.vit_b_16(pretrained=True)
except:
print("ViT需要torchvision>=0.12")
print(f"ResNet18结构:")
print(resnet18)
# 6.2 修改预训练模型
num_classes = 100 # 新任务类别数
# 方法1: 替换最后一层
resnet18.fc = nn.Linear(resnet18.fc.in_features, num_classes)
# 方法2: 添加新层
class CustomResNet(nn.Module):
def __init__(self, num_classes=100):
super(CustomResNet, self).__init__()
# 加载预训练特征提取器
self.features = nn.Sequential(*list(resnet50.children())[:-1])
# 冻结特征提取器参数
for param in self.features.parameters():
param.requires_grad = False
# 添加自定义分类器
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(2048, 512), # ResNet50最后一层特征维度为2048
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, num_classes)
)
def forward(self, x):
features = self.features(x)
output = self.classifier(features)
return output
custom_resnet = CustomResNet(num_classes=100)
print(f"\n自定义ResNet:")
print(f"总参数: {sum(p.numel() for p in custom_resnet.parameters()):,}")
print(f"可训练参数: {sum(p.numel() for p in custom_resnet.parameters() if p.requires_grad):,}")
# 6.3 模型保存与加载
# 保存整个模型
torch.save(custom_resnet.state_dict(), 'model.pth')
# 保存检查点(包含优化器状态等)
checkpoint = {
'epoch': 10,
'model_state_dict': custom_resnet.state_dict(),
'optimizer_state_dict': optimizer_adamw.state_dict(),
'loss': 0.123,
}
torch.save(checkpoint, 'checkpoint.pth')
# 加载模型
loaded_model = CustomResNet(num_classes=100)
loaded_model.load_state_dict(torch.load('model.pth'))
loaded_model.eval() # 设置为评估模式
# 加载检查点
checkpoint = torch.load('checkpoint.pth')
loaded_model.load_state_dict(checkpoint['model_state_dict'])
print(f"\n模型保存与加载完成")
训练与验证
训练循环模板
python
复制代码
print("=== 训练与验证 ===")
def train_epoch(model, dataloader, criterion, optimizer, device):
"""训练一个epoch"""
model.train() # 设置为训练模式
running_loss = 0.0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(dataloader):
# 移动数据到设备
inputs, targets = inputs.to(device), targets.to(device)
# 清零梯度
optimizer.zero_grad()
# 前向传播
outputs = model(inputs)
loss = criterion(outputs, targets)
# 反向传播
loss.backward()
# 梯度裁剪
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
# 参数更新
optimizer.step()
# 统计
running_loss += loss.item() * inputs.size(0)
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
# 打印进度
if (batch_idx + 1) % 10 == 0:
print(f' 批次: {batch_idx+1}/{len(dataloader)}, '
f'损失: {loss.item():.4f}')
epoch_loss = running_loss / total
epoch_acc = 100. * correct / total
return epoch_loss, epoch_acc
def validate_epoch(model, dataloader, criterion, device):
"""验证一个epoch"""
model.eval() # 设置为评估模式
running_loss = 0.0
correct = 0
total = 0
with torch.no_grad(): # 禁用梯度计算
for inputs, targets in dataloader:
inputs, targets = inputs.to(device), targets.to(device)
# 前向传播
outputs = model(inputs)
loss = criterion(outputs, targets)
# 统计
running_loss += loss.item() * inputs.size(0)
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
epoch_loss = running_loss / total
epoch_acc = 100. * correct / total
return epoch_loss, epoch_acc
# 7.2 完整训练流程
def train_model(model, train_loader, val_loader, criterion, optimizer,
scheduler, num_epochs, device):
"""完整训练流程"""
train_losses, train_accs = [], []
val_losses, val_accs = [], []
best_acc = 0.0
best_model_wts = None
for epoch in range(num_epochs):
print(f'\nEpoch {epoch+1}/{num_epochs}')
print('-' * 40)
# 训练
train_loss, train_acc = train_epoch(
model, train_loader, criterion, optimizer, device
)
# 验证
val_loss, val_acc = validate_epoch(
model, val_loader, criterion, device
)
# 学习率调度
if scheduler is not None:
scheduler.step(val_loss) # 对于ReduceLROnPlateau
# 或者 scheduler.step() 对于其他调度器
# 记录结果
train_losses.append(train_loss)
train_accs.append(train_acc)
val_losses.append(val_loss)
val_accs.append(val_acc)
print(f'训练 - 损失: {train_loss:.4f}, 准确率: {train_acc:.2f}%')
print(f'验证 - 损失: {val_loss:.4f}, 准确率: {val_acc:.2f}%')
# 保存最佳模型
if val_acc > best_acc:
best_acc = val_acc
best_model_wts = model.state_dict().copy()
torch.save(best_model_wts, 'best_model.pth')
print(f' 保存最佳模型,准确率: {best_acc:.2f}%')
# 加载最佳模型权重
model.load_state_dict(best_model_wts)
return {
'model': model,
'train_losses': train_losses,
'train_accs': train_accs,
'val_losses': val_losses,
'val_accs': val_accs,
'best_acc': best_acc
}
# 7.3 模拟训练
print("模拟训练流程:")
# 创建模拟数据
train_dataset = TensorDataset(
torch.randn(100, 3, 32, 32),
torch.randint(0, 10, (100,))
)
val_dataset = TensorDataset(
torch.randn(20, 3, 32, 32),
torch.randint(0, 10, (20,))
)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
# 创建简单模型
simple_model = nn.Sequential(
nn.Flatten(),
nn.Linear(3*32*32, 128),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(128, 10)
)
# 训练配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = simple_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
optimizer, mode='min', factor=0.5, patience=3
)
# 运行训练(这里只运行2个epoch作为演示)
print(f"使用设备: {device}")
results = train_model(
model=model,
train_loader=train_loader,
val_loader=val_loader,
criterion=criterion,
optimizer=optimizer,
scheduler=scheduler,
num_epochs=2,
device=device
)
print(f"\n训练完成,最佳验证准确率: {results['best_acc']:.2f}%")
混合精度训练
python
复制代码
print("\n=== 混合精度训练 ===")
try:
from torch.cuda.amp import autocast, GradScaler
# 初始化梯度缩放器
scaler = GradScaler()
def train_epoch_amp(model, dataloader, criterion, optimizer, device, scaler):
"""使用混合精度训练的epoch"""
model.train()
running_loss = 0.0
correct = 0
total = 0
for inputs, targets in dataloader:
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
# 使用autocast进行混合精度前向传播
with autocast():
outputs = model(inputs)
loss = criterion(outputs, targets)
# 使用scaler进行梯度缩放和反向传播
scaler.scale(loss).backward()
# 梯度裁剪(需要先unscale)
scaler.unscale_(optimizer)
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
# 更新参数
scaler.step(optimizer)
scaler.update()
# 统计
running_loss += loss.item() * inputs.size(0)
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
epoch_loss = running_loss / total
epoch_acc = 100. * correct / total
return epoch_loss, epoch_acc
print("混合精度训练可用")
except ImportError:
print("混合精度训练需要PyTorch>=1.6")
分布式训练
python
复制代码
print("\n=== 分布式训练 ===")
def setup_distributed():
"""设置分布式训练环境"""
import os
# 检查是否在分布式环境中
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
rank = int(os.environ["RANK"])
world_size = int(os.environ['WORLD_SIZE'])
gpu = int(os.environ['LOCAL_RANK'])
torch.cuda.set_device(gpu)
torch.distributed.init_process_group(
backend='nccl',
init_method='env://',
world_size=world_size,
rank=rank
)
return True, rank, world_size, gpu
return False, 0, 1, 0
# 检查分布式环境
is_distributed, rank, world_size, gpu = setup_distributed()
if is_distributed:
print(f"分布式训练: rank={rank}, world_size={world_size}, gpu={gpu}")
# 创建分布式模型
model = simple_model.cuda(gpu)
model = torch.nn.parallel.DistributedDataParallel(
model, device_ids=[gpu]
)
# 创建分布式采样器
from torch.utils.data.distributed import DistributedSampler
train_sampler = DistributedSampler(
train_dataset,
num_replicas=world_size,
rank=rank
)
train_loader = DataLoader(
train_dataset,
batch_size=16,
sampler=train_sampler,
num_workers=2
)
print(f"使用分布式数据采样器")
else:
print("单机训练")
模型部署与推理
模型推理
python
复制代码
print("\n=== 模型推理 ===")
# 8.1 基本推理
model.eval() # 确保模型在评估模式
# 创建测试数据
test_input = torch.randn(1, 3, 32, 32).to(device)
# 推理(不计算梯度)
with torch.no_grad():
output = model(test_input)
probabilities = torch.softmax(output, dim=1)
predicted_class = torch.argmax(probabilities, dim=1)
print(f"输入形状: {test_input.shape}")
print(f"输出形状: {output.shape}")
print(f"预测类别: {predicted_class.item()}")
print(f"类别概率: {probabilities[0]}")
# 8.2 批量推理
def batch_inference(model, dataloader, device):
"""批量推理"""
model.eval()
all_predictions = []
all_probabilities = []
with torch.no_grad():
for inputs, _ in dataloader:
inputs = inputs.to(device)
outputs = model(inputs)
probs = torch.softmax(outputs, dim=1)
predictions = torch.argmax(probs, dim=1)
all_predictions.append(predictions.cpu())
all_probabilities.append(probs.cpu())
all_predictions = torch.cat(all_predictions, dim=0)
all_probabilities = torch.cat(all_probabilities, dim=0)
return all_predictions, all_probabilities
# 测试批量推理
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
predictions, probabilities = batch_inference(model, val_loader, device)
print(f"\n批量推理结果:")
print(f"预测形状: {predictions.shape}")
print(f"概率形状: {probabilities.shape}")
print(f"前5个预测: {predictions[:5]}")
# 8.3 置信度阈值
confidence_threshold = 0.8
high_conf_mask = probabilities.max(dim=1)[0] > confidence_threshold
high_conf_predictions = predictions[high_conf_mask]
print(f"\n置信度阈值过滤:")
print(f"总样本数: {len(predictions)}")
print(f"高置信度样本数: {len(high_conf_predictions)}")
print(f"高置信度比例: {len(high_conf_predictions)/len(predictions):.2%}")
模型导出与部署
python
复制代码
print("\n=== 模型导出与部署 ===")
# 8.1 导出为TorchScript
# 方法1: Tracing(跟踪)
example_input = torch.randn(1, 3, 32, 32).to(device)
traced_model = torch.jit.trace(model, example_input)
traced_model.save("model_traced.pt")
print("TorchScript (Tracing) 导出完成")
# 方法2: Scripting(脚本)
# scripted_model = torch.jit.script(model)
# scripted_model.save("model_scripted.pt")
# print("TorchScript (Scripting) 导出完成")
# 8.2 加载TorchScript模型
loaded_traced_model = torch.jit.load("model_traced.pt")
loaded_traced_model.eval()
# 使用导出的模型推理
with torch.no_grad():
traced_output = loaded_traced_model(test_input)
print(f"原始模型输出: {output[0, :3]}")
print(f"导出模型输出: {traced_output[0, :3]}")
print(f"输出是否一致: {torch.allclose(output, traced_output, rtol=1e-3)}")
# 8.3 ONNX导出(跨平台部署)
try:
import onnx
import onnxruntime
# 导出为ONNX格式
torch.onnx.export(
model, # 模型
test_input, # 示例输入
"model.onnx", # 保存路径
export_params=True, # 导出参数
opset_version=11, # ONNX算子集版本
do_constant_folding=True, # 优化常量
input_names=['input'], # 输入名称
output_names=['output'], # 输出名称
dynamic_axes={ # 动态维度
'input': {0: 'batch_size'},
'output': {0: 'batch_size'}
}
)
print("ONNX导出完成")
# 使用ONNX Runtime推理
ort_session = onnxruntime.InferenceSession("model.onnx")
# 准备输入
ort_inputs = {ort_session.get_inputs()[0].name: test_input.cpu().numpy()}
# 推理
ort_outputs = ort_session.run(None, ort_inputs)
print(f"ONNX Runtime输出形状: {ort_outputs[0].shape}")
except ImportError:
print("ONNX导出需要安装onnx和onnxruntime")
# 8.4 模型量化(减小模型大小,加速推理)
print("\n=== 模型量化 ===")
# 动态量化(推理时量化)
quantized_model = torch.quantization.quantize_dynamic(
model, # 原始模型
{torch.nn.Linear}, # 需要量化的模块类型
dtype=torch.qint8 # 量化数据类型
)
print(f"量化模型大小对比:")
print(f" 原始模型: {sum(p.numel() for p in model.parameters()):,} 参数")
print(f" 量化模型: {sum(p.numel() for p in quantized_model.parameters()):,} 参数")
# 保存量化模型
torch.save(quantized_model.state_dict(), "quantized_model.pth")
# 8.5 模型剪枝
print("\n=== 模型剪枝 ===")
# 简单的L1 unstructured剪枝
parameters_to_prune = []
for name, module in model.named_modules():
if isinstance(module, torch.nn.Linear):
parameters_to_prune.append((module, 'weight'))
# 应用剪枝
torch.nn.utils.prune.global_unstructured(
parameters_to_prune,
pruning_method=torch.nn.utils.prune.L1Unstructured,
amount=0.2, # 剪枝20%的参数
)
# 检查剪枝效果
pruned_params = 0
total_params = 0
for module, _ in parameters_to_prune:
mask = module.weight_mask # 剪枝掩码
pruned_params += (mask == 0).sum().item()
total_params += mask.numel()
print(f"剪枝比例: {pruned_params/total_params:.2%}")
计算机视觉特定功能
图像处理工具
python
复制代码
print("\n=== 计算机视觉特定功能 ===")
# 9.1 torchvision.transforms.functional
import torchvision.transforms.functional as F
# 创建测试图像
test_image = torch.randn(3, 224, 224)
# 图像变换
resized = F.resize(test_image, (112, 112))
cropped = F.center_crop(test_image, (200, 200))
hflipped = F.hflip(test_image)
rotated = F.rotate(test_image, angle=45)
adjusted = F.adjust_brightness(test_image, brightness_factor=1.5)
print(f"原始图像形状: {test_image.shape}")
print(f"调整大小后: {resized.shape}")
print(f"中心裁剪后: {cropped.shape}")
# 9.2 torchvision.ops(图像操作)
try:
from torchvision.ops import nms, roi_align, roi_pool
# 模拟目标检测结果
boxes = torch.tensor([
[10, 10, 50, 50], # [x1, y1, x2, y2]
[15, 15, 55, 55],
[100, 100, 150, 150]
], dtype=torch.float32)
scores = torch.tensor([0.9, 0.8, 0.7])
# NMS(非极大值抑制)
keep = nms(boxes, scores, iou_threshold=0.5)
print(f"\nNMS保留的框索引: {keep}")
# ROI Align
features = torch.randn(1, 256, 32, 32) # 特征图
rois = torch.tensor([[0, 10, 10, 50, 50]]) # [batch_idx, x1, y1, x2, y2]
roi_features = roi_align(
features, rois,
output_size=(7, 7),
spatial_scale=1/16.0, # 特征图相对于原图的比例
sampling_ratio=2
)
print(f"ROI Align输出形状: {roi_features.shape}")
except ImportError:
print("torchvision.ops需要torchvision>=0.3")
# 9.3 可视化工具
import matplotlib.pyplot as plt
def visualize_feature_maps(feature_maps, num_maps=16):
"""可视化特征图"""
feature_maps = feature_maps.detach().cpu()
# 选择前num_maps个特征图
if feature_maps.shape[1] > num_maps:
feature_maps = feature_maps[:, :num_maps, :, :]
# 创建子图
fig, axes = plt.subplots(4, 4, figsize=(12, 12))
axes = axes.ravel()
for idx in range(min(num_maps, len(axes))):
feature_map = feature_maps[0, idx] # 取第一个样本
axes[idx].imshow(feature_map, cmap='viridis')
axes[idx].axis('off')
axes[idx].set_title(f'Channel {idx}')
plt.tight_layout()
return fig
# 测试特征图可视化
with torch.no_grad():
test_input = torch.randn(1, 3, 32, 32)
features = model.get_features(test_input) # 假设模型有get_features方法
print(f"特征图形状: {features.shape}")
# 可视化(实际使用时取消注释)
# fig = visualize_feature_maps(features)
# plt.show()
常用CV模型实现
python
复制代码
print("\n=== 常用CV模型实现 ===")
# 9.1 ResNet残差块
class ResidualBlock(nn.Module):
"""ResNet残差块"""
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_channels, out_channels,
kernel_size=3, stride=stride, padding=1, bias=False
)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(
out_channels, out_channels,
kernel_size=3, stride=1, padding=1, bias=False
)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
# 测试残差块
res_block = ResidualBlock(64, 128, stride=2)
test_input = torch.randn(4, 64, 32, 32)
output = res_block(test_input)
print(f"ResidualBlock:")
print(f" 输入形状: {test_input.shape}")
print(f" 输出形状: {output.shape}")
# 9.2 注意力机制(SE Block)
class SEBlock(nn.Module):
"""Squeeze-and-Excitation注意力块"""
def __init__(self, channel, reduction=16):
super(SEBlock, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction, bias=False),
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel, bias=False),
nn.Sigmoid()
)
def forward(self, x):
b, c, _, _ = x.size()
# Squeeze
y = self.avg_pool(x).view(b, c)
# Excitation
y = self.fc(y).view(b, c, 1, 1)
# Scale
return x * y.expand_as(x)
# 测试SE Block
se_block = SEBlock(128)
se_output = se_block(output)
print(f"\nSEBlock输出形状: {se_output.shape}")
# 9.3 U-Net编码器块
class UNetEncoderBlock(nn.Module):
"""U-Net编码器块"""
def __init__(self, in_channels, out_channels):
super(UNetEncoderBlock, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, 3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
self.pool = nn.MaxPool2d(2)
def forward(self, x):
# 编码路径
x = self.conv(x)
pooled = self.pool(x)
return x, pooled
# 9.4 深度可分离卷积
class DepthwiseSeparableConv(nn.Module):
"""深度可分离卷积(MobileNet使用)"""
def __init__(self, in_channels, out_channels, stride=1):
super(DepthwiseSeparableConv, self).__init__()
# 深度卷积
self.depthwise = nn.Conv2d(
in_channels, in_channels,
kernel_size=3, stride=stride,
padding=1, groups=in_channels, # groups=in_channels实现深度卷积
bias=False
)
self.bn1 = nn.BatchNorm2d(in_channels)
# 逐点卷积
self.pointwise = nn.Conv2d(
in_channels, out_channels,
kernel_size=1, bias=False
)
self.bn2 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.depthwise(x)
x = self.bn1(x)
x = self.relu(x)
x = self.pointwise(x)
x = self.bn2(x)
x = self.relu(x)
return x
# 测试深度可分离卷积
ds_conv = DepthwiseSeparableConv(32, 64, stride=2)
test_input = torch.randn(4, 32, 32, 32)
output = ds_conv(test_input)
print(f"\nDepthwiseSeparableConv:")
print(f" 输入形状: {test_input.shape}")
print(f" 输出形状: {output.shape}")
# 统计参数数量对比
standard_conv = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
print(f"\n参数数量对比:")
print(f" 标准卷积: {sum(p.numel() for p in standard_conv.parameters()):,}")
print(f" 深度可分离卷积: {sum(p.numel() for p in ds_conv.parameters()):,}")