基于RNN的股票市场时间序列预测(Python)

Step 1: Loading the data

复制代码
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


import torch.nn as nn
import torch
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
# Importing the training set
dataset = pd.read_csv('HistoricalData_1719412320530.csv')
dataset.head(10)
复制代码
dataset.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2516 entries, 0 to 2515
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Date        2516 non-null   object
 1   Close/Last  2516 non-null   object
 2   Volume      2516 non-null   int64 
 3   Open        2516 non-null   object
 4   High        2516 non-null   object
 5   Low         2516 non-null   object
dtypes: int64(1), object(5)
memory usage: 118.1+ KB
# change time order


dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y')


# Sort the DataFrame in ascending order
dataset = dataset.sort_values(by='Date', ascending=True)


# Reset index if necessary
dataset = dataset.reset_index(drop=True)
dataset.head(5)
复制代码
dataset['Close/Last'] = dataset['Close/Last'].str.replace('$', '').astype(float)
dataset.head(5)
复制代码
dataset_cl = dataset['Close/Last'].values
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler


sc = MinMaxScaler(feature_range = (0, 1))


# scale the data
dataset_cl = dataset_cl.reshape(dataset_cl.shape[0], 1)
dataset_cl = sc.fit_transform(dataset_cl)
dataset_cl
array([[2.91505500e-04],
       [2.95204809e-04],
       [3.24799276e-04],
       ...,
       [9.33338463e-01],
       [8.70746165e-01],
       [9.29787127e-01]])

Step 2: Cutting time series into sequences (Sliding Window)

复制代码
input_size = 7


# Create a function to process the data into 7 day look back slices
# lb is window size
def processData(data, lb):
    X, y = [], [] # X is input vector, Y is output vector
    for i in range(len(data) - lb - 1):
        X.append(data[i: (i + lb), 0])
        y.append(data[(i + lb), 0])
    return np.array(X), np.array(y)


X, y = processData(dataset_cl, input_size)

Step 3: Split training and testing sets

复制代码
X_train, X_test = X[:int(X.shape[0]*0.80)], X[int(X.shape[0]*0.80):]
y_train, y_test = y[:int(y.shape[0]*0.80)], y[int(y.shape[0]*0.80):]
print(X_train.shape[0])
print(X_test.shape[0])
print(y_train.shape[0])
print(y_test.shape[0])


# reshaping
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
2006
502
2006
502

Step 4: Build and run an RNN regression model

复制代码
class RNN(nn.Module):
    def __init__(self, i_size, h_size, n_layers, o_size, dropout=0.1, bidirectional=False):
        super().__init__()
        # super(RNN, self).__init__()


        self.num_directions = bidirectional + 1


        # LSTM module
        self.rnn = nn.LSTM(
            input_size = i_size,
            hidden_size = h_size,
            num_layers = n_layers,
            dropout = dropout,
            bidirectional = bidirectional
        )


        # self.relu = nn.ReLU()


        # Output layer
        self.out = nn.Linear(h_size, o_size)


    def forward(self, x, h_state):
      # r_out contains the LSTM output at each time step, and hidden_state
      # contains the hidden and cell states after processing the entire sequence.
        r_out, hidden_state = self.rnn(x, h_state)


        hidden_size = hidden_state[-1].size(-1)


        # Convert dimension of r_out (-1 denotes it depends on other parameters)
        r_out = r_out.view(-1, self.num_directions, hidden_size)


        # r_out = self.relu(r_out)


        outs = self.out(r_out)


        return outs, hidden_state
# Global setting
INPUT_SIZE = input_size # LSTM input size


HIDDEN_SIZE = 256


NUM_LAYERS = 3 # LSTM 'stack' layer


OUTPUT_SIZE = 1




# Hyper parameters
learning_rate = 0.001
num_epochs = 300


rnn = RNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE, bidirectional=False)
rnn.cuda()


optimiser = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
criterion = nn.MSELoss()


hidden_state = None
rnn
RNN(
  (rnn): LSTM(7, 256, num_layers=3, dropout=0.1)
  (out): Linear(in_features=256, out_features=1, bias=True)
)
history = [] # save loss in each epoch
# .cuda() copies element to the GPU memory
X_test_cuda = torch.tensor(X_test).float().cuda()
y_test_cuda = torch.tensor(y_test).float().cuda()


# Use all the data in one batch
inputs_cuda = torch.tensor(X_train).float().cuda()
labels_cuda = torch.tensor(y_train).float().cuda()


# training
for epoch in range(num_epochs):


    # Train mode
    rnn.train()


    output, _ = rnn(inputs_cuda, hidden_state)
    # print(output.size())


    loss = criterion(output[:,0,:].view(-1), labels_cuda)
    optimiser.zero_grad()
    loss.backward()   # back propagation
    optimiser.step()   # update the parameters


    if epoch % 20 == 0:
        # Convert train mode to evaluation mode (disable dropout)
        rnn.eval()


        test_output, _ = rnn(X_test_cuda, hidden_state)
        test_loss = criterion(test_output.view(-1), y_test_cuda)
        print('epoch {}, loss {}, eval loss {}'.format(epoch, loss.item(), test_loss.item()))
    else:
        print('epoch {}, loss {}'.format(epoch, loss.item()))
    history.append(loss.item())
# iterate over all the learnable parameters in the model, which include the
# weights and biases of all layers in the model
# (both the LSTM layers and the final linear layer)
for param in rnn.parameters():
    print(param.data)

Step 5: Checking model performance

复制代码
plt.plot(history)
# dplt.plot(history.history['val_loss'])
复制代码
# X_train_X_test = np.concatenate((X_train, X_test),axis=0)
# hidden_state = None
rnn.eval()
# test_inputs = torch.tensor(X_test).float().cuda()
test_predict, _ = rnn(X_test_cuda, hidden_state)
test_predict_cpu = test_predict.cpu().detach().numpy()
plt.plot(sc.inverse_transform(y_test.reshape(-1,1)))
plt.plot(sc.inverse_transform(test_predict_cpu.reshape(-1,1)))
plt.legend(['y_test','test_predict_cpu'], loc='center left', bbox_to_anchor=(1, 0.5))
复制代码
# plot original data
plt.plot(sc.inverse_transform(y.reshape(-1,1)), color='k')


# train_inputs = torch.tensor(X_train).float().cuda()
train_pred, hidden_state = rnn(inputs_cuda, None)
train_pred_cpu = train_pred.cpu().detach().numpy()


# use hidden state from previous training data
test_predict, _ = rnn(X_test_cuda, hidden_state)
test_predict_cpu = test_predict.cpu().detach().numpy()


# plt.plot(scl.inverse_transform(y_test.reshape(-1,1)))
split_pt = int(X.shape[0] * 0.80) + 7 # window_size
plt.plot(np.arange(7, split_pt, 1), sc.inverse_transform(train_pred_cpu.reshape(-1,1)), color='b')
plt.plot(np.arange(split_pt, split_pt + len(test_predict_cpu), 1), sc.inverse_transform(test_predict_cpu.reshape(-1,1)), color='r')


# pretty up graph
plt.xlabel('day')
plt.ylabel('price of Nvidia stock')
plt.legend(['original series','training fit','testing fit'], loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
复制代码
MMSE = np.sum((test_predict_cpu.reshape(1,X_test.shape[0])-y[2006:])**2)/X_test.shape[0]
print(MMSE)
0.0018420128176938062



担任《Mechanical System and Signal Processing》审稿专家,担任《中国电机工程学报》,《控制与决策》等EI期刊审稿专家,擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。

 
知乎学术咨询:https://www.zhihu.com/consult/people/792359672131756032?isMe=1
相关推荐
Java后端的Ai之路2 小时前
【Python 教程15】-Python和Web
python
那个村的李富贵2 小时前
光影魔术师:CANN加速实时图像风格迁移,让每张照片秒变大师画作
人工智能·aigc·cann
冬奇Lab4 小时前
一天一个开源项目(第15篇):MapToPoster - 用代码将城市地图转换为精美的海报设计
python·开源
腾讯云开发者4 小时前
“痛点”到“通点”!一份让 AI 真正落地产生真金白银的实战指南
人工智能
CareyWYR4 小时前
每周AI论文速递(260202-260206)
人工智能
hopsky5 小时前
大模型生成PPT的技术原理
人工智能
禁默5 小时前
打通 AI 与信号处理的“任督二脉”:Ascend SIP Boost 加速库深度实战
人工智能·信号处理·cann
池央5 小时前
CANN SIP 信号处理算子库深度解析:FFT 硬件加速、复数运算的向量化实现与端到端数据流优化
信号处理
灰子学技术5 小时前
go response.Body.close()导致连接异常处理
开发语言·后端·golang
心疼你的一切6 小时前
昇腾CANN实战落地:从智慧城市到AIGC,解锁五大行业AI应用的算力密码
数据仓库·人工智能·深度学习·aigc·智慧城市·cann