PyTorch深度学习(六)【循环神经网络-基础】

RNN Cell

h0和x1生成h1,把h1作为输出送到下一次的RNN Cell里面。(h1=linear(h0,x1))

RNN计算过程:

输入先做线性变换,循环神经网络常用的激活函数是tanh(±1区间)。

构造RNN Cell:

代码:

import torch​batch_size = 1seq_len = 3input_size = 4hidden_size = 2​# Construction of RNNCellcell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)# Wrapping the sequence into:(seqLen,batchSize,InputSize)dataset = torch.randn(seq_len, batch_size, input_size)  # (3,1,4)    ;  序列数据# Initializing the hidden to zerohidden = torch.zeros(batch_size, hidden_size)  # (1,2)  ;隐层,全0​for idx, input in enumerate(dataset):    print('=' * 20, idx, '=' * 20)  #分割线,20个=号    print('Input size:', input.shape)  # (batch_size, input_size)    # 按序列依次输入到cell中,seq_len=3,故循环3次    hidden = cell(input, hidden)  # 返回的hidden是下一次的输入之一,循环使用同一个cell;这一次的输入+上一次的隐层    #隐层维度是batch_size×hidden_size,就是"torch.Size([1,2])"    print('output size:', hidden.shape)  # (batch_size, hidden_size)    print(hidden)

RNN本质是一个线性层。

用RNN首先要把维度搞清楚,数据集的维度多了序列这样一个维度。

每一层都有对应输出。同样颜色的RNN Cell都是一个线性层,也就是说一共只有3个线性层。

代码:

import torch​batch_size = 1    #参数构造seq_len = 3input_size = 4hidden_size = 2num_layers = 1  # RNN层数​# Construction of RNNrnn = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)# Wrapping the sequence into:(seqLen,batchSize,InputSize)inputs = torch.randn(seq_len, batch_size, input_size)  # (3,1,4)# Initializing the hidden to zerohidden = torch.zeros(num_layers, batch_size, hidden_size)  # (1,1,2),隐层维数​output, hidden = rnn(inputs, hidden)  # RNN内部包含了循环,故这里只需把整个序列输入即可​print('Output size:', output.shape)  # (seq_len, batch_size, hidden_size)print('Output:', output)print('Hidden size:', hidden.shape)  # (num_layers, batch_size, hidden_size)print('Hidden:', hidden)

预测字符串(RNN Cell):

import torch​# 1、确定参数input_size = 4hidden_size = 4batch_size = 1​# 2、准备数据index2char = ['e', 'h', 'l', 'o']  #字典x_data = [1, 0, 2, 2, 3]  #用字典中的索引(数字)表示来表示helloy_data = [3, 1, 2, 3, 2]  #标签:ohlol​one_hot_lookup = [[1, 0, 0, 0],  # 用来将x_data转换为one-hot向量的参照表                  [0, 1, 0, 0],                  [0, 0, 1, 0],                  [0, 0, 0, 1]]x_one_hot = [one_hot_lookup[x] for x in x_data]  #将x_data转换为one-hot向量inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)  #(𝒔𝒆𝒒𝑳𝒆𝒏,𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆,𝒊𝒏𝒑𝒖𝒕𝑺𝒊𝒛𝒆)labels = torch.LongTensor(y_data).view(-1, 1)  # (seqLen*batchSize,𝟏).计算交叉熵损失时标签不需要我们进行one-hot编码,其内部会自动进行处理​# 3、构建模型class Model(torch.nn.Module):    def __init__(self, input_size, hidden_size, batch_size):        super(Model, self).__init__()        self.batch_size = batch_size        self.input_size = input_size        self.hidden_size = hidden_size        self.rnncell = torch.nn.RNNCell(input_size=self.input_size, hidden_size=self.hidden_size)​    def forward(self, input, hidden):        hidden = self.rnncell(input, hidden)        return hidden​    def init_hidden(self):  #生成初始化隐藏层,需要batch_size。        return torch.zeros(self.batch_size, self.hidden_size)​net = Model(input_size, hidden_size, batch_size)​# 4、损失和优化器criterion = torch.nn.CrossEntropyLoss()optimizer = torch.optim.Adam(net.parameters(), lr=0.1)  # Adam优化器​# 5、训练for epoch in range(15):    loss = 0    optimizer.zero_grad()  #梯度清零    hidden = net.init_hidden()  # 初始化隐藏层    print('Predicted string:', end='')    for input, label in zip(inputs, labels):  #每次输入一个字符,即按序列次序进行循环。(input=seq×b×inputsize)        hidden = net(input, hidden)        loss += criterion(hidden, label)  # 计算损失,不用item(),因为后面还要反向传播        _, idx = hidden.max(dim=1)  # 选取最大值的索引        print(index2char[idx.item()], end='')  # 打印预测的字符    loss.backward()   # 反向传播    optimizer.step()  # 更新参数    print(', Epoch [%d/15] loss: %.4f' % (epoch + 1, loss.item()))

预测字符串(RNN)

import torch​# 1、确定参数seq_len = 5input_size = 4hidden_size = 4batch_size = 1​# 2、准备数据index2char = ['e', 'h', 'l', 'o']  #字典。将来可以根据索引把字母拿出来x_data = [1, 0, 2, 2, 3]  #用字典中的索引(数字)表示来表示helloy_data = [3, 1, 2, 3, 2]  #标签:ohlol​one_hot_lookup = [[1, 0, 0, 0],  # 用来将x_data转换为one-hot向量的参照表                  [0, 1, 0, 0],  # 独热向量                  [0, 0, 1, 0],                  [0, 0, 0, 1]]x_one_hot = [one_hot_lookup[x] for x in x_data]  #将x_data转换为one-hot向量inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size,                                      input_size)  #(𝒔𝒆𝒒𝑳𝒆𝒏,𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆,𝒊𝒏𝒑𝒖𝒕𝑺𝒊𝒛𝒆)labels = torch.LongTensor(y_data)​# 3、构建模型class Model(torch.nn.Module):    def __init__(self, input_size, hidden_size, batch_size, num_layers=1):        super(Model, self).__init__()        self.num_layers = num_layers   # 1        self.batch_size = batch_size   # 1        self.input_size = input_size   # 4        self.hidden_size = hidden_size # 4        self.rnn = torch.nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=num_layers)​    def forward(self, input):        hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)        out, _ = self.rnn(input, hidden)  # out: tensor of shape (seq_len, batch, hidden_size)        return out.view(-1, self.hidden_size)  # 将输出的三维张量转换为二维张量,(𝒔𝒆𝒒𝑳𝒆𝒏×𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆,𝒉𝒊𝒅𝒅𝒆𝒏𝑺𝒊𝒛𝒆)​    def init_hidden(self):  #初始化隐藏层,需要batch_size        return torch.zeros(self.batch_size, self.hidden_size)​net = Model(input_size, hidden_size, batch_size)​# 4、损失和优化器criterion = torch.nn.CrossEntropyLoss()optimizer = torch.optim.Adam(net.parameters(), lr=0.05)  # Adam优化器​# 5、训练步骤for epoch in range(15):    optimizer.zero_grad()    outputs = net(inputs)    loss = criterion(outputs, labels)    loss.backward()    optimizer.step()​    _, idx = outputs.max(dim=1)    idx = idx.data.numpy()    print('Predicted string: ', ''.join([index2char[x] for x in idx]), end='')    print(', Epoch [%d/15] loss: %.4f' % (epoch + 1, loss.item()))

使用embedding and linear layer:

import torch​# 1、确定参数num_class = 4input_size = 4hidden_size = 8embedding_size = 10num_layers = 2batch_size = 1seq_len = 5​# 2、准备数据index2char = ['e', 'h', 'l', 'o']  #字典x_data = [[1, 0, 2, 2, 3]]  # (batch_size, seq_len) 用字典中的索引(数字)表示来表示helloy_data = [3, 1, 2, 3, 2]  #  (batch_size * seq_len) 标签:ohlol​inputs = torch.LongTensor(x_data)  # (batch_size, seq_len)labels = torch.LongTensor(y_data)  # (batch_size * seq_len)​# 3、构建模型class Model(torch.nn.Module):    def __init__(self):        super(Model, self).__init__()        self.emb = torch.nn.Embedding(num_class, embedding_size)        self.rnn = torch.nn.RNN(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers,                                batch_first=True)        self.fc = torch.nn.Linear(hidden_size, num_class)​    def forward(self, x):        hidden = torch.zeros(num_layers, x.size(0), hidden_size)  # (num_layers, batch_size, hidden_size)        x = self.emb(x)  # 返回(batch_size, seq_len, embedding_size)        x, _ = self.rnn(x, hidden)  # 返回(batch_size, seq_len, hidden_size)        x = self.fc(x)  # 返回(batch_size, seq_len, num_class)        return x.view(-1, num_class)  # (batch_size * seq_len, num_class)​net = Model()​# 4、损失和优化器criterion = torch.nn.CrossEntropyLoss()optimizer = torch.optim.Adam(net.parameters(), lr=0.05)  # Adam优化器​# 5、训练for epoch in range(15):    optimizer.zero_grad()    outputs = net(inputs)    loss = criterion(outputs, labels)    loss.backward()    optimizer.step()​    _, idx = outputs.max(dim=1)    idx = idx.data.numpy()    print('Predicted string: ', ''.join([index2char[x] for x in idx]), end='')    print(', Epoch [%d/15] loss: %.4f' % (epoch + 1, loss.item()))

番外:LSTM

相关推荐
好喜欢吃红柚子44 分钟前
万字长文解读空间、通道注意力机制机制和超详细代码逐行分析(SE,CBAM,SGE,CA,ECA,TA)
人工智能·pytorch·python·计算机视觉·cnn
羊小猪~~1 小时前
神经网络基础--什么是正向传播??什么是方向传播??
人工智能·pytorch·python·深度学习·神经网络·算法·机器学习
软工菜鸡2 小时前
预训练语言模型BERT——PaddleNLP中的预训练模型
大数据·人工智能·深度学习·算法·语言模型·自然语言处理·bert
哔哩哔哩技术3 小时前
B站S赛直播中的关键事件识别与应用
深度学习
deephub3 小时前
Tokenformer:基于参数标记化的高效可扩展Transformer架构
人工智能·python·深度学习·架构·transformer
___Dream3 小时前
【CTFN】基于耦合翻译融合网络的多模态情感分析的层次学习
人工智能·深度学习·机器学习·transformer·人机交互
极客代码3 小时前
【Python TensorFlow】入门到精通
开发语言·人工智能·python·深度学习·tensorflow
王哈哈^_^4 小时前
【数据集】【YOLO】【VOC】目标检测数据集,查找数据集,yolo目标检测算法详细实战训练步骤!
人工智能·深度学习·算法·yolo·目标检测·计算机视觉·pyqt
写代码的小阿帆5 小时前
pytorch实现深度神经网络DNN与卷积神经网络CNN
pytorch·cnn·dnn
是瑶瑶子啦5 小时前
【深度学习】论文笔记:空间变换网络(Spatial Transformer Networks)
论文阅读·人工智能·深度学习·视觉检测·空间变换