问题描述
使用如下的自定义的多层嵌套网络进行训练:
python
class FC1_bot(nn.Module):
def __init__(self):
super(FC1_bot, self).__init__()
self.embeddings = nn.Sequential(
nn.Linear(10, 10)
)
def forward(self, x):
emb = self.embeddings(x)
return emb
class FC1_top(nn.Module):
def __init__(self):
super(FC1_top, self).__init__()
self.prediction = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(10, 10)
)
def forward(self, x):
logit = self.prediction(x)
return logit
class FC1(nn.Module):
def __init__(self, num):
super(FC1, self).__init__()
self.num = num
self.bot = []
for _ in range(num):
self.bot.append(FC1_bot())
self.top = FC1_top()
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
x = list(x)
emb = []
for i in range(self.num):
emb.append(self.bot[i](x[i]))
agg_emb = self._aggregate(emb)
logit = self.top(agg_emb)
pred = self.softmax(logit)
return emb, pred
def _aggregate(self, x):
# Note: x is a list of tensors.
return torch.cat(x, dim=1)
训练的代码如下:
python
def train(self):
# train entire model
self.model.train()
for epoch in range(self.args.epochs):
...
解决办法
需要把所有用到的模型都变成训练模式,否则只有top模型在被训练。
python
def train(self):
# train entire model
self.model.train()
self.model.top.train()
for i in range(self.args.num):
self.model.bot[i].train()
for epoch in range(self.args.epochs):
...