.requires_grad,.detach(),torch.no_grad()

让模型参数的requires_grad=False:

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')


class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer2 = nn.Linear(2, 2)
        # let the parameters of layer2 not require gradients
        for param in self.layer2.parameters():
            param.requires_grad = False
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        check_tensor_properties(x, "x")  # requires_grad=False, is_leaf=True. just because it's the input tensor

        x_ = torch.relu(self.layer1(x))
        check_tensor_properties(x_, "x_")  # requires_grad=True, is_leaf=False

        y = torch.relu(self.layer2(x_))  # layer2's parameters do not require gradients
        check_tensor_properties(y, "y")  # requires_grad=True, is_leaf=False

        t = self.layer3(y)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
check_tensor_properties(inputs, "inputs")  # requires_grad=False, is_leaf=True

targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # not None

print("Layer2 gradients:")
for param in model.layer2.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None

detach:

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')

class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer2 = nn.Linear(2, 2)
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        x = torch.relu(self.layer1(x))

        x = torch.relu(self.layer2(x))
        check_tensor_properties(x, "x")  # requires_grad=True, is_leaf=False

        z = x.detach()  # detach x from the computation graph
        check_tensor_properties(z, "z")  # requires_grad=False, is_leaf=True
        
        t = self.layer3(z)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # None

print("Layer2 gradients:")
for param in model.layer2.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None

torch.no_grad():

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')


class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer20 = nn.Linear(2, 2)
        self.layer21 = nn.Linear(2, 2)
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        check_tensor_properties(x, "x")  # requires_grad=True, is_leaf=False

        with torch.no_grad():  # all the tensors created in this block will not require gradients and be leaf tensors
            y = torch.relu(self.layer20(x))
            check_tensor_properties(y, "y")  # requires_grad=False, is_leaf=True

            z = torch.relu(self.layer21(y))
            check_tensor_properties(z, "z")  # requires_grad=False, is_leaf=True

        t = self.layer3(z)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(model.parameters(), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # None

print("Layer20 gradients:")
for param in model.layer20.parameters():
    print(param.grad)  # None

print("Layer21 gradients:")
for param in model.layer21.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None
相关推荐
weixin_514221852 小时前
FDTD与matlab、python耦合
python·学习·matlab·fdtd
递归不收敛3 小时前
吴恩达机器学习课程(PyTorch 适配)学习笔记大纲
pytorch·学习·机器学习
递归不收敛6 小时前
吴恩达机器学习课程(PyTorch适配)学习笔记:2.4 激活函数与多类别处理
pytorch·学习·机器学习
F_D_Z7 小时前
数据集相关类代码回顾理解 | StratifiedShuffleSplit\transforms.ToTensor\Counter
python·torchvision·transforms
tao3556678 小时前
【Python刷力扣hot100】283. Move Zeroes
开发语言·python·leetcode
小宁爱Python8 小时前
从零搭建 RAG 智能问答系统1:基于 LlamaIndex 与 Chainlit实现最简单的聊天助手
人工智能·后端·python
湖南人爱科技有限公司9 小时前
RaPhp和Python某音最新bd-ticket-guard-client-data加密算法解析(视频评论)
android·python·php·音视频·爬山算法·raphp
eqwaak010 小时前
数据预处理与可视化流水线:Pandas Profiling + Altair 实战指南
开发语言·python·信息可视化·数据挖掘·数据分析·pandas
PKNLP10 小时前
深度学习之神经网络1(Neural Network)
人工智能·深度学习·神经网络
心态特好11 小时前
详解WebSocket及其妙用
java·python·websocket·网络协议