深度学习模型

Embedding层
Embedding矩阵是可训练的参数,一般会在模型构建时随机初始化
也可以使用预训练的词向量来做初始化,此时也可以选择不训练Embedding层中的参数
输入的整数序列可以有重复,但取值不能超过Embedding矩阵的列数
核心价值:将离散值转化为向量
在nlp任务和各类特征工程中应用广泛
池化层2D

池化层1D

RNN层

归一化层 Normalization
Dropout层

- 如何理解其作用:
1.强迫一个神经单元,和随机挑选出来的其他神经单元共同工作,消除减弱了神经元节点间的联合适应性,增强了泛化能力
2.可以看做是一种模型平均,由于每次随机忽略的隐层节点都不同,这样就使每次训练的网络都是不一样的,每次训练都可以单做一个"新"的模型 - 启示:计算方式并不是越复杂就越好
案例
python
import torch
import torch.nn as nn
import numpy as np
import random
import json
'''
构造随机包含a的字符串,使用rnn进行多分类,类别为a第一次出现在字符串中的位置。
'''
class TorchModel(nn.Module):
def __init__(self, vector_dim, sentence_length, vocab):
super(TorchModel, self).__init__()
self.embedding = nn.Embedding(len(vocab), vector_dim, padding_idx=0)
self.pool = nn.AvgPool1d(sentence_length)# 池化层
self.rnn_layer = nn.RNN(vector_dim, vector_dim, bias=False, batch_first=True)
self.classify = nn.Linear(vector_dim, sentence_length)
self.loss = nn.CrossEntropyLoss()
def forward(self, x, y=None):
x = self.embedding(x) # (batchSize, sentenceLength) --> (batchSize, sentenceLength, vectorDim)
# x = x.transpose(1, 2) # (batchSize, sentenceLength, vectorDim) --> (batchSize, vectorDim, sentenceLength)
# x = self.pool(x) # (batchSize, vectorDim, sentenceLength) --> (batchSize, vectorDim, 1)
# x = x.squeeze() # (batchSize, vectorDim, 1) --> (batchSize, vectorDim)
output, h = self.rnn_layer(x)
x = output[:,-1,:]# (batchSize,vectorDim)
y_pred = self.classify(x)# (batchSize,sentenceLength)
print(x)
print(y_pred)
if y is not None:
return self.loss(y_pred, y)
else:
return torch.softmax(y_pred, dim=1)
#为每个字生成一个标号
def build_vocab():
chars = 'abcdefghijklmnopqrstuvwxyz'
vocab = {'[pad]': 0}
for i, char in enumerate(chars):
vocab[char] = i+1
vocab['[unk]'] = len(vocab)
return vocab
#随机生成一个样本
#从所有字中选取sentence_length个字
#a字符在哪个位置就为第几类
def build_sample(vocab, sentence_length):
x = [random.choice(list(vocab.keys())) for _ in range(sentence_length)]
if 'a' not in x:
x[random.randint(0, sentence_length-1)] = 'a'
y = x.index('a')
x = [vocab.get(char, vocab['[unk]']) for char in x]
return x, y
#建立数据集
def build_dataset(batch_size, vocab, sentence_length):
dataset_x = []
dataset_y = []
for i in range(batch_size):
x, y = build_sample(vocab, sentence_length)
dataset_x.append(x)
dataset_y.append(y)
return torch.LongTensor(dataset_x), torch.LongTensor(dataset_y)
#建立模型
def build_model(vocab, char_dim, sentence_length):
model = TorchModel(char_dim, sentence_length, vocab)
return model
#测试每轮的准确率
def evaluate(model, vocab, sentence_length):
model.eval()
x, y = build_dataset(200, vocab, sentence_length)
correct, wrong = 0, 0
with torch.no_grad():
y_pred = model(x)
for y_pred, y_true in zip(y_pred, y):
if torch.argmax(y_pred) == y_true:
correct += 1
else:
wrong += 1
print(f'正确预测个数{correct},准确率{correct/(correct+wrong)}')
return correct/(correct+wrong)
def main():
#配置参数
epoch_num = 20 # 轮数
batch_size = 50 # 每次训练样本数
train_sample_num = 500 # 每轮训练样本总数
char_dim = 20 # 每个字的向量维度
sentence_length = 10 # 文本句子长度
learning_rate = 0.005
# 建立词表
vocab = build_vocab()
#建立模型
model = build_model(vocab, char_dim, sentence_length)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # 优化器
log = []
for epoch in range(epoch_num):
model.train()
loss_list = []
for i in range(train_sample_num//batch_size):
x, y = build_dataset(batch_size, vocab, sentence_length)
loss = model(x, y)
loss.backward()
optimizer.step()
optimizer.zero_grad()
loss_list.append(loss.item())
print(f'第{epoch+1}轮, 平均loss:{np.mean(loss_list)}')
acc = evaluate(model, vocab, sentence_length)
log.append([acc, np.mean(loss_list)])
#保存模型
torch.save(model.state_dict(), 'model.bin')
#保存词表
writer = open('vocab1.json', 'w', encoding='utf-8')
writer.write(json.dumps(vocab, ensure_ascii=False))
writer.close()
#使用训练好的模型做预测
def predict(model_path, vocab_path, input_strings):
char_dim = 20 # 每个字的维度
sentence_length = 10 # 样本文本长度
vocab = json.load(open(vocab_path, "r", encoding="utf8")) #加载字符表
model = build_model(vocab, char_dim, sentence_length) #建立模型
model.load_state_dict(torch.load(model_path)) #加载训练好的权重
x = []
for input_string in input_strings:
x.append([vocab.get(char, vocab['[unk]']) for char in input_string])
model.eval()#测试模式
with torch.no_grad():
result = model.forward(torch.LongTensor(x))
for i, input_string in enumerate(input_strings):
print(f'输入:{input_string},预测类别:{torch.argmax(result[i])},概率值:{result[i]}')
if __name__ == '__main__':
main()
test_strings = ["fnvfeeaiok", "wztafgulko", "arqwdegthj", "ntawwwpijn"]
predict("model.bin", "vocab1.json", test_strings)