让模型参数的requires_grad=False:
python
import torch
import torch.nn as nn
import torch.optim as optim
def check_tensor_properties(tensor, name):
print(f"Tensor name: {name}")
print("Requires gradient:", tensor.requires_grad)
print("Is leaf:", tensor.is_leaf)
print('\n')
class SelectiveGradientModel(nn.Module):
def __init__(self):
super(SelectiveGradientModel, self).__init__()
self.layer1 = nn.Linear(2, 2)
self.layer2 = nn.Linear(2, 2)
# let the parameters of layer2 not require gradients
for param in self.layer2.parameters():
param.requires_grad = False
self.layer3 = nn.Linear(2, 2)
def forward(self, x):
check_tensor_properties(x, "x") # requires_grad=False, is_leaf=True. just because it's the input tensor
x_ = torch.relu(self.layer1(x))
check_tensor_properties(x_, "x_") # requires_grad=True, is_leaf=False
y = torch.relu(self.layer2(x_)) # layer2's parameters do not require gradients
check_tensor_properties(y, "y") # requires_grad=True, is_leaf=False
t = self.layer3(y)
check_tensor_properties(t, "t") # requires_grad=True, is_leaf=False
return t
torch.manual_seed(2)
model = SelectiveGradientModel()
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
criterion = nn.MSELoss()
inputs = torch.randn(1, 2)
check_tensor_properties(inputs, "inputs") # requires_grad=False, is_leaf=True
targets = torch.randn(1, 2)
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
print("Layer1 gradients:")
for param in model.layer1.parameters():
print(param.grad) # not None
print("Layer2 gradients:")
for param in model.layer2.parameters():
print(param.grad) # None
print("Layer3 gradients:")
for param in model.layer3.parameters():
print(param.grad) # not None
detach:
python
import torch
import torch.nn as nn
import torch.optim as optim
def check_tensor_properties(tensor, name):
print(f"Tensor name: {name}")
print("Requires gradient:", tensor.requires_grad)
print("Is leaf:", tensor.is_leaf)
print('\n')
class SelectiveGradientModel(nn.Module):
def __init__(self):
super(SelectiveGradientModel, self).__init__()
self.layer1 = nn.Linear(2, 2)
self.layer2 = nn.Linear(2, 2)
self.layer3 = nn.Linear(2, 2)
def forward(self, x):
x = torch.relu(self.layer1(x))
x = torch.relu(self.layer2(x))
check_tensor_properties(x, "x") # requires_grad=True, is_leaf=False
z = x.detach() # detach x from the computation graph
check_tensor_properties(z, "z") # requires_grad=False, is_leaf=True
t = self.layer3(z)
check_tensor_properties(t, "t") # requires_grad=True, is_leaf=False
return t
torch.manual_seed(2)
model = SelectiveGradientModel()
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
criterion = nn.MSELoss()
inputs = torch.randn(1, 2)
targets = torch.randn(1, 2)
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
print("Layer1 gradients:")
for param in model.layer1.parameters():
print(param.grad) # None
print("Layer2 gradients:")
for param in model.layer2.parameters():
print(param.grad) # None
print("Layer3 gradients:")
for param in model.layer3.parameters():
print(param.grad) # not None
torch.no_grad():
python
import torch
import torch.nn as nn
import torch.optim as optim
def check_tensor_properties(tensor, name):
print(f"Tensor name: {name}")
print("Requires gradient:", tensor.requires_grad)
print("Is leaf:", tensor.is_leaf)
print('\n')
class SelectiveGradientModel(nn.Module):
def __init__(self):
super(SelectiveGradientModel, self).__init__()
self.layer1 = nn.Linear(2, 2)
self.layer20 = nn.Linear(2, 2)
self.layer21 = nn.Linear(2, 2)
self.layer3 = nn.Linear(2, 2)
def forward(self, x):
x = torch.relu(self.layer1(x))
check_tensor_properties(x, "x") # requires_grad=True, is_leaf=False
with torch.no_grad(): # all the tensors created in this block will not require gradients and be leaf tensors
y = torch.relu(self.layer20(x))
check_tensor_properties(y, "y") # requires_grad=False, is_leaf=True
z = torch.relu(self.layer21(y))
check_tensor_properties(z, "z") # requires_grad=False, is_leaf=True
t = self.layer3(z)
check_tensor_properties(t, "t") # requires_grad=True, is_leaf=False
return t
torch.manual_seed(2)
model = SelectiveGradientModel()
optimizer = optim.SGD(model.parameters(), lr=0.1)
criterion = nn.MSELoss()
inputs = torch.randn(1, 2)
targets = torch.randn(1, 2)
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
print("Layer1 gradients:")
for param in model.layer1.parameters():
print(param.grad) # None
print("Layer20 gradients:")
for param in model.layer20.parameters():
print(param.grad) # None
print("Layer21 gradients:")
for param in model.layer21.parameters():
print(param.grad) # None
print("Layer3 gradients:")
for param in model.layer3.parameters():
print(param.grad) # not None