.requires_grad,.detach(),torch.no_grad()

让模型参数的requires_grad=False:

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')


class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer2 = nn.Linear(2, 2)
        # let the parameters of layer2 not require gradients
        for param in self.layer2.parameters():
            param.requires_grad = False
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        check_tensor_properties(x, "x")  # requires_grad=False, is_leaf=True. just because it's the input tensor

        x_ = torch.relu(self.layer1(x))
        check_tensor_properties(x_, "x_")  # requires_grad=True, is_leaf=False

        y = torch.relu(self.layer2(x_))  # layer2's parameters do not require gradients
        check_tensor_properties(y, "y")  # requires_grad=True, is_leaf=False

        t = self.layer3(y)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
check_tensor_properties(inputs, "inputs")  # requires_grad=False, is_leaf=True

targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # not None

print("Layer2 gradients:")
for param in model.layer2.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None

detach:

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')

class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer2 = nn.Linear(2, 2)
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        x = torch.relu(self.layer1(x))

        x = torch.relu(self.layer2(x))
        check_tensor_properties(x, "x")  # requires_grad=True, is_leaf=False

        z = x.detach()  # detach x from the computation graph
        check_tensor_properties(z, "z")  # requires_grad=False, is_leaf=True
        
        t = self.layer3(z)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # None

print("Layer2 gradients:")
for param in model.layer2.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None

torch.no_grad():

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')


class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer20 = nn.Linear(2, 2)
        self.layer21 = nn.Linear(2, 2)
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        check_tensor_properties(x, "x")  # requires_grad=True, is_leaf=False

        with torch.no_grad():  # all the tensors created in this block will not require gradients and be leaf tensors
            y = torch.relu(self.layer20(x))
            check_tensor_properties(y, "y")  # requires_grad=False, is_leaf=True

            z = torch.relu(self.layer21(y))
            check_tensor_properties(z, "z")  # requires_grad=False, is_leaf=True

        t = self.layer3(z)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(model.parameters(), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # None

print("Layer20 gradients:")
for param in model.layer20.parameters():
    print(param.grad)  # None

print("Layer21 gradients:")
for param in model.layer21.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None
相关推荐
2601_9563198812 小时前
期货夜盘无人值守监控什么:断线、无成交与拒单信号
python·区块链
CTA终结者12 小时前
期货量化目标仓和净持仓对不齐:天勤 TargetPosTask 与 pos 偏差排查
python·区块链
科技林总12 小时前
解决vllm服务漏扫问题
python·安全
祭曦念13 小时前
古诗小集开发实战:从零开发一款 HarmonyOS 古诗鉴赏应用
pytorch·深度学习·harmonyos
财经资讯数据_灵砚智能14 小时前
基于全球经济类多源新闻的NLP情感分析与数据可视化(夜间-次晨)2026年6月10日
大数据·人工智能·python·ai·信息可视化·自然语言处理·灵砚智能
namexingyun14 小时前
拆解Fable 5三重安全护栏:模型路由、蒸馏防护与生物安全分类器的技术原理 - 微元算力(weytoken)
java·人工智能·python·安全·架构·ai编程
chenment14 小时前
别再为每个模型单独写一套队列了:用 200 行代码封装多模态统一调用层
人工智能·python·产品
啊森要自信14 小时前
【GUI自动化测试】控件、鼠标键盘操作与多场景自动化
c语言·开发语言·python·adb·ipython
YJlio14 小时前
《Sysinternals实战指南》16.5 Ctrl2Cap 工具详解:把 Caps Lock 变成 Ctrl 的键盘改造与回退方法
linux·运维·服务器·网络·python·学习·计算机外设
某林21214 小时前
从底层硬件死锁到 QoS 通信底层的全链路复盘
python·ros2·qos