.requires_grad,.detach(),torch.no_grad()

让模型参数的requires_grad=False:

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')


class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer2 = nn.Linear(2, 2)
        # let the parameters of layer2 not require gradients
        for param in self.layer2.parameters():
            param.requires_grad = False
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        check_tensor_properties(x, "x")  # requires_grad=False, is_leaf=True. just because it's the input tensor

        x_ = torch.relu(self.layer1(x))
        check_tensor_properties(x_, "x_")  # requires_grad=True, is_leaf=False

        y = torch.relu(self.layer2(x_))  # layer2's parameters do not require gradients
        check_tensor_properties(y, "y")  # requires_grad=True, is_leaf=False

        t = self.layer3(y)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
check_tensor_properties(inputs, "inputs")  # requires_grad=False, is_leaf=True

targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # not None

print("Layer2 gradients:")
for param in model.layer2.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None

detach:

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')

class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer2 = nn.Linear(2, 2)
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        x = torch.relu(self.layer1(x))

        x = torch.relu(self.layer2(x))
        check_tensor_properties(x, "x")  # requires_grad=True, is_leaf=False

        z = x.detach()  # detach x from the computation graph
        check_tensor_properties(z, "z")  # requires_grad=False, is_leaf=True
        
        t = self.layer3(z)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # None

print("Layer2 gradients:")
for param in model.layer2.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None

torch.no_grad():

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')


class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer20 = nn.Linear(2, 2)
        self.layer21 = nn.Linear(2, 2)
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        check_tensor_properties(x, "x")  # requires_grad=True, is_leaf=False

        with torch.no_grad():  # all the tensors created in this block will not require gradients and be leaf tensors
            y = torch.relu(self.layer20(x))
            check_tensor_properties(y, "y")  # requires_grad=False, is_leaf=True

            z = torch.relu(self.layer21(y))
            check_tensor_properties(z, "z")  # requires_grad=False, is_leaf=True

        t = self.layer3(z)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(model.parameters(), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # None

print("Layer20 gradients:")
for param in model.layer20.parameters():
    print(param.grad)  # None

print("Layer21 gradients:")
for param in model.layer21.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None
相关推荐
weixin_4597539415 分钟前
golang如何实现Trace上下文传播_golang Trace上下文传播实现思路
jvm·数据库·python
weixin_4440129324 分钟前
PHP 中逻辑或(--)运算符的正确使用与条件逻辑重构指南
jvm·数据库·python
iAm_Ike6 小时前
Go 中自定义类型与基础类型间的显式类型转换详解
jvm·数据库·python
iuvtsrt6 小时前
Golang怎么实现方法集与接口的匹配_Golang如何理解值类型和指针类型实现接口的区别【详解】
jvm·数据库·python
牧子川6 小时前
009-Transformer-Architecture
人工智能·深度学习·transformer
旦莫7 小时前
AI驱动的纯视觉自动化测试:知识库里应该积累什么知识内容
人工智能·python·测试开发·pytest·ai测试
dfsj660118 小时前
第四章:深度学习革命
人工智能·深度学习
知识领航员8 小时前
蘑兔AI音乐深度实测:功能拆解、实测表现与适用场景
java·c语言·c++·人工智能·python·算法·github
cskywit8 小时前
【CVPR2024】用Diffusion“造”遥感分割数据:SatSynth论文解读
人工智能·深度学习·计算机视觉
薛定e的猫咪8 小时前
因果推理研究方向综述笔记
人工智能·笔记·深度学习·算法