🧡💛💚TensorFlow2实战-系列教程 总目录
有任何问题欢迎在下面留言
本篇文章的代码运行界面均在Jupyter Notebook中进行
本篇文章配套的代码资源已经上传
8、压缩版本网络模型
python
class Model(tf.keras.Model):
def __init__(self, params):
super().__init__()
self.embedding = tf.Variable(np.load('./vocab/word.npy'),
dtype=tf.float32,
name='pretrained_embedding',
trainable=False,)
self.drop1 = tf.keras.layers.Dropout(params['dropout_rate'])
self.drop2 = tf.keras.layers.Dropout(params['dropout_rate'])
self.drop3 = tf.keras.layers.Dropout(params['dropout_rate'])
self.rnn1 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(params['rnn_units'], return_sequences=True))
self.rnn2 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(params['rnn_units'], return_sequences=True))
self.rnn3 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(params['rnn_units'], return_sequences=True))
self.drop_fc = tf.keras.layers.Dropout(params['dropout_rate'])
self.fc = tf.keras.layers.Dense(2*params['rnn_units'], tf.nn.elu)
self.out_linear = tf.keras.layers.Dense(2)
def call(self, inputs, training=False):
if inputs.dtype != tf.int32:
inputs = tf.cast(inputs, tf.int32)
batch_sz = tf.shape(inputs)[0]
rnn_units = 2*params['rnn_units']
x = tf.nn.embedding_lookup(self.embedding, inputs)
x = tf.reshape(x, (batch_sz*10*10, 10, 50))
x = self.drop1(x, training=training)
x = self.rnn1(x)
x = tf.reduce_max(x, 1)
x = tf.reshape(x, (batch_sz*10, 10, rnn_units))
x = self.drop2(x, training=training)
x = self.rnn2(x)
x = tf.reduce_max(x, 1)
x = tf.reshape(x, (batch_sz, 10, rnn_units))
x = self.drop3(x, training=training)
x = self.rnn3(x)
x = tf.reduce_max(x, 1)
x = self.drop_fc(x, training=training)
x = self.fc(x)
x = self.out_linear(x)
return x
这是另外一个版本的自定义网络,网络定义部分是一样的,只是在前向传播的过程中,对每一个rnn的输出做了特征压缩,每次只取10个特征中值最大的,特征数量因此变为了1/10,所以这个版本的训练速度回更快
9、模型训练参数
python
params = {
'vocab_path': './vocab/word.txt',
'train_path': './data/train.txt',
'test_path': './data/test.txt',
'num_samples': 25000,
'num_labels': 2,
'batch_size': 32,
'max_len': 1000,
'rnn_units': 200,
'dropout_rate': 0.2,
'clip_norm': 10.,
'num_patience': 3,
'lr': 3e-4,
}
语料表路径、训练数据路径、验证数据路径
句子数量、标签输出值个数、batch_size
句子最大长度、rnn_units隐层神经元个数、dropout比例
梯度截断(避免梯度剧烈变化,控制过拟合)、多少次损失没下降停止训练、学习率
python
def is_descending(history: list):
history = history[-(params['num_patience']+1):]
for i in range(1, len(history)):
if history[i-1] <= history[i]:
return False
return True
根据损失值、准确率来判断有没有提升效果,如果num_patience次数都没提升,就停止训练
python
word2idx = {}
with open(params['vocab_path'],encoding='utf-8') as f:
for i, line in enumerate(f):
line = line.rstrip()
word2idx[line] = i
params['word2idx'] = word2idx
params['vocab_size'] = len(word2idx) + 1
读进语料表进行id映射
python
model = Model(params)
model.build(input_shape=(None, None))
decay_lr = tf.optimizers.schedules.ExponentialDecay(params['lr'], 1000, 0.95)#相当于加了一个指数衰减函数
optim = tf.optimizers.Adam(params['lr'])
global_step = 0
history_acc = []
best_acc = .0
t0 = time.time()
logger = logging.getLogger('tensorflow')
logger.setLevel(logging.INFO)
- 构建模型
- 设置输入的大小,或者fit时候也能自动找到
- 学习率衰减
- 优化器
- 迭代次数计数变量
- 保存历史准确率
- 最佳准确率
- 获取当前时间
- 打印日志的设置参数
10、模型训练
python
while True:
# 训练模型
for texts, labels in dataset(is_training=True, params=params):
with tf.GradientTape() as tape:
logits = model(texts, training=True)
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)
loss = tf.reduce_mean(loss)
optim.lr.assign(decay_lr(global_step))
grads = tape.gradient(loss, model.trainable_variables)
grads, _ = tf.clip_by_global_norm(grads, params['clip_norm'])
optim.apply_gradients(zip(grads, model.trainable_variables))
if global_step % 50 == 0:
logger.info("Step {} | Loss: {:.4f} | Spent: {:.1f} secs | LR: {:.6f}".format(
global_step, loss.numpy().item(), time.time()-t0, optim.lr.numpy().item()))
t0 = time.time()
global_step += 1
# 验证集效果
m = tf.keras.metrics.Accuracy()
for texts, labels in dataset(is_training=False, params=params):
logits = model(texts, training=False)
y_pred = tf.argmax(logits, axis=-1)
m.update_state(y_true=labels, y_pred=y_pred)
acc = m.result().numpy()
logger.info("Evaluation: Testing Accuracy: {:.3f}".format(acc))
history_acc.append(acc)
if acc > best_acc:
best_acc = acc
logger.info("Best Accuracy: {:.3f}".format(best_acc))
if len(history_acc) > params['num_patience'] and is_descending(history_acc):
logger.info("Testing Accuracy not improved over {} epochs, Early Stop".format(params['num_patience']))
break
- 按照batch取数据
- 梯度带,记录所有在上下文中的操作,并且通过调用.gradient()获得任何上下文中计算得出的张量的梯度
- 当前输入经过模型的输出结果
- 计算损失
- 计算平均损失
- 根据自定义的学习率更新策略 更新学习率
- 根据梯度带计算梯度值
- 将梯度限制一下,有的时候回更新太猛,防止过拟合
- 更新梯度
- 每隔50次打印一下当前训练的结果
- 使用当前训练的网络对验证集的数据进行测试
- 3次没有提升准确率就停止训练
- 如果准确率超过阈值后停止训练
部分训练过程日志:
Reading ./data/train.txt
INFO:tensorflow:Step 0 | Loss: 0.6997 | Spent: 7.5 secs | LR: 0.000300
...
INFO:tensorflow:Evaluation: Testing Accuracy: 0.872
INFO:tensorflow:Best Accuracy: 0.879
Reading ./data/train.txt
INFO:tensorflow:Step 10200 | Loss: 0.2801 | Spent: 640.2 secs | LR: 0.000178
INFO:tensorflow:Step 10250 | Loss: 0.1747 | Spent: 77.9 secs | LR: 0.000177
INFO:tensorflow:Step 10300 | Loss: 0.2829 | Spent: 77.7 secs | LR: 0.000177
...
INFO:tensorflow:Step 10900 | Loss: 0.2204 | Spent: 77.7 secs | LR: 0.000172
Reading ./data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.863
INFO:tensorflow:Best Accuracy: 0.879
INFO:tensorflow:Testing Accuracy not improved over 3 epochs, Early Stop