.requires_grad,.detach(),torch.no_grad()

让模型参数的requires_grad=False:

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')


class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer2 = nn.Linear(2, 2)
        # let the parameters of layer2 not require gradients
        for param in self.layer2.parameters():
            param.requires_grad = False
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        check_tensor_properties(x, "x")  # requires_grad=False, is_leaf=True. just because it's the input tensor

        x_ = torch.relu(self.layer1(x))
        check_tensor_properties(x_, "x_")  # requires_grad=True, is_leaf=False

        y = torch.relu(self.layer2(x_))  # layer2's parameters do not require gradients
        check_tensor_properties(y, "y")  # requires_grad=True, is_leaf=False

        t = self.layer3(y)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
check_tensor_properties(inputs, "inputs")  # requires_grad=False, is_leaf=True

targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # not None

print("Layer2 gradients:")
for param in model.layer2.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None

detach:

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')

class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer2 = nn.Linear(2, 2)
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        x = torch.relu(self.layer1(x))

        x = torch.relu(self.layer2(x))
        check_tensor_properties(x, "x")  # requires_grad=True, is_leaf=False

        z = x.detach()  # detach x from the computation graph
        check_tensor_properties(z, "z")  # requires_grad=False, is_leaf=True
        
        t = self.layer3(z)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # None

print("Layer2 gradients:")
for param in model.layer2.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None

torch.no_grad():

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')


class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer20 = nn.Linear(2, 2)
        self.layer21 = nn.Linear(2, 2)
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        check_tensor_properties(x, "x")  # requires_grad=True, is_leaf=False

        with torch.no_grad():  # all the tensors created in this block will not require gradients and be leaf tensors
            y = torch.relu(self.layer20(x))
            check_tensor_properties(y, "y")  # requires_grad=False, is_leaf=True

            z = torch.relu(self.layer21(y))
            check_tensor_properties(z, "z")  # requires_grad=False, is_leaf=True

        t = self.layer3(z)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(model.parameters(), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # None

print("Layer20 gradients:")
for param in model.layer20.parameters():
    print(param.grad)  # None

print("Layer21 gradients:")
for param in model.layer21.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None
相关推荐
CoovallyAIHub1 天前
推理提速一倍!SegDT:轻量化扩散 Transformer,医学图像分割的技术跨越
深度学习·算法·计算机视觉
CoovallyAIHub1 天前
无人机方案如何让桥梁监测更安全、更智能?融合RTK与超高分辨率成像,优于毫米精度
深度学习·算法·计算机视觉
Calihen的学习日志1 天前
【Pandas】3.1-数据预处理:列的基本操作
python·pandas
打螺丝否1 天前
稠密矩阵和稀疏矩阵的对比
python·机器学习·矩阵
这里有鱼汤1 天前
你以为 FastAPI 足够强?其实 Litestar 能让你的项目更轻量高效
后端·python
大学生毕业题目1 天前
毕业项目推荐:83-基于yolov8/yolov5/yolo11的农作物杂草检测识别系统(Python+卷积神经网络)
人工智能·python·yolo·目标检测·cnn·pyqt·杂草识别
㱘郳1 天前
cifar10分类对比:使用PyTorch卷积神经网络和SVM
pytorch·分类·cnn
Kyln.Wu1 天前
【python实用小脚本-205】[HR揭秘]手工党逐行查Bug的终结者|Python版代码质量“CT机”加速器(建议收藏)
开发语言·python·bug
计算机毕业设计木哥1 天前
Python毕业设计推荐:基于Django的饮食计划推荐与交流分享平台 饮食健康系统 健康食谱计划系统
开发语言·hadoop·spring boot·后端·python·django·课程设计
Deng_Xian_Sheng1 天前
有哪些任务可以使用无监督的方式训练深度学习模型?
人工智能·深度学习·无监督