基于RNN的股票市场时间序列预测(Python)

Step 1: Loading the data

复制代码
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


import torch.nn as nn
import torch
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
# Importing the training set
dataset = pd.read_csv('HistoricalData_1719412320530.csv')
dataset.head(10)
复制代码
dataset.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2516 entries, 0 to 2515
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Date        2516 non-null   object
 1   Close/Last  2516 non-null   object
 2   Volume      2516 non-null   int64 
 3   Open        2516 non-null   object
 4   High        2516 non-null   object
 5   Low         2516 non-null   object
dtypes: int64(1), object(5)
memory usage: 118.1+ KB
# change time order


dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y')


# Sort the DataFrame in ascending order
dataset = dataset.sort_values(by='Date', ascending=True)


# Reset index if necessary
dataset = dataset.reset_index(drop=True)
dataset.head(5)
复制代码
dataset['Close/Last'] = dataset['Close/Last'].str.replace('$', '').astype(float)
dataset.head(5)
复制代码
dataset_cl = dataset['Close/Last'].values
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler


sc = MinMaxScaler(feature_range = (0, 1))


# scale the data
dataset_cl = dataset_cl.reshape(dataset_cl.shape[0], 1)
dataset_cl = sc.fit_transform(dataset_cl)
dataset_cl
array([[2.91505500e-04],
       [2.95204809e-04],
       [3.24799276e-04],
       ...,
       [9.33338463e-01],
       [8.70746165e-01],
       [9.29787127e-01]])

Step 2: Cutting time series into sequences (Sliding Window)

复制代码
input_size = 7


# Create a function to process the data into 7 day look back slices
# lb is window size
def processData(data, lb):
    X, y = [], [] # X is input vector, Y is output vector
    for i in range(len(data) - lb - 1):
        X.append(data[i: (i + lb), 0])
        y.append(data[(i + lb), 0])
    return np.array(X), np.array(y)


X, y = processData(dataset_cl, input_size)

Step 3: Split training and testing sets

复制代码
X_train, X_test = X[:int(X.shape[0]*0.80)], X[int(X.shape[0]*0.80):]
y_train, y_test = y[:int(y.shape[0]*0.80)], y[int(y.shape[0]*0.80):]
print(X_train.shape[0])
print(X_test.shape[0])
print(y_train.shape[0])
print(y_test.shape[0])


# reshaping
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
2006
502
2006
502

Step 4: Build and run an RNN regression model

复制代码
class RNN(nn.Module):
    def __init__(self, i_size, h_size, n_layers, o_size, dropout=0.1, bidirectional=False):
        super().__init__()
        # super(RNN, self).__init__()


        self.num_directions = bidirectional + 1


        # LSTM module
        self.rnn = nn.LSTM(
            input_size = i_size,
            hidden_size = h_size,
            num_layers = n_layers,
            dropout = dropout,
            bidirectional = bidirectional
        )


        # self.relu = nn.ReLU()


        # Output layer
        self.out = nn.Linear(h_size, o_size)


    def forward(self, x, h_state):
      # r_out contains the LSTM output at each time step, and hidden_state
      # contains the hidden and cell states after processing the entire sequence.
        r_out, hidden_state = self.rnn(x, h_state)


        hidden_size = hidden_state[-1].size(-1)


        # Convert dimension of r_out (-1 denotes it depends on other parameters)
        r_out = r_out.view(-1, self.num_directions, hidden_size)


        # r_out = self.relu(r_out)


        outs = self.out(r_out)


        return outs, hidden_state
# Global setting
INPUT_SIZE = input_size # LSTM input size


HIDDEN_SIZE = 256


NUM_LAYERS = 3 # LSTM 'stack' layer


OUTPUT_SIZE = 1




# Hyper parameters
learning_rate = 0.001
num_epochs = 300


rnn = RNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE, bidirectional=False)
rnn.cuda()


optimiser = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
criterion = nn.MSELoss()


hidden_state = None
rnn
RNN(
  (rnn): LSTM(7, 256, num_layers=3, dropout=0.1)
  (out): Linear(in_features=256, out_features=1, bias=True)
)
history = [] # save loss in each epoch
# .cuda() copies element to the GPU memory
X_test_cuda = torch.tensor(X_test).float().cuda()
y_test_cuda = torch.tensor(y_test).float().cuda()


# Use all the data in one batch
inputs_cuda = torch.tensor(X_train).float().cuda()
labels_cuda = torch.tensor(y_train).float().cuda()


# training
for epoch in range(num_epochs):


    # Train mode
    rnn.train()


    output, _ = rnn(inputs_cuda, hidden_state)
    # print(output.size())


    loss = criterion(output[:,0,:].view(-1), labels_cuda)
    optimiser.zero_grad()
    loss.backward()   # back propagation
    optimiser.step()   # update the parameters


    if epoch % 20 == 0:
        # Convert train mode to evaluation mode (disable dropout)
        rnn.eval()


        test_output, _ = rnn(X_test_cuda, hidden_state)
        test_loss = criterion(test_output.view(-1), y_test_cuda)
        print('epoch {}, loss {}, eval loss {}'.format(epoch, loss.item(), test_loss.item()))
    else:
        print('epoch {}, loss {}'.format(epoch, loss.item()))
    history.append(loss.item())
# iterate over all the learnable parameters in the model, which include the
# weights and biases of all layers in the model
# (both the LSTM layers and the final linear layer)
for param in rnn.parameters():
    print(param.data)

Step 5: Checking model performance

复制代码
plt.plot(history)
# dplt.plot(history.history['val_loss'])
复制代码
# X_train_X_test = np.concatenate((X_train, X_test),axis=0)
# hidden_state = None
rnn.eval()
# test_inputs = torch.tensor(X_test).float().cuda()
test_predict, _ = rnn(X_test_cuda, hidden_state)
test_predict_cpu = test_predict.cpu().detach().numpy()
plt.plot(sc.inverse_transform(y_test.reshape(-1,1)))
plt.plot(sc.inverse_transform(test_predict_cpu.reshape(-1,1)))
plt.legend(['y_test','test_predict_cpu'], loc='center left', bbox_to_anchor=(1, 0.5))
复制代码
# plot original data
plt.plot(sc.inverse_transform(y.reshape(-1,1)), color='k')


# train_inputs = torch.tensor(X_train).float().cuda()
train_pred, hidden_state = rnn(inputs_cuda, None)
train_pred_cpu = train_pred.cpu().detach().numpy()


# use hidden state from previous training data
test_predict, _ = rnn(X_test_cuda, hidden_state)
test_predict_cpu = test_predict.cpu().detach().numpy()


# plt.plot(scl.inverse_transform(y_test.reshape(-1,1)))
split_pt = int(X.shape[0] * 0.80) + 7 # window_size
plt.plot(np.arange(7, split_pt, 1), sc.inverse_transform(train_pred_cpu.reshape(-1,1)), color='b')
plt.plot(np.arange(split_pt, split_pt + len(test_predict_cpu), 1), sc.inverse_transform(test_predict_cpu.reshape(-1,1)), color='r')


# pretty up graph
plt.xlabel('day')
plt.ylabel('price of Nvidia stock')
plt.legend(['original series','training fit','testing fit'], loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
复制代码
MMSE = np.sum((test_predict_cpu.reshape(1,X_test.shape[0])-y[2006:])**2)/X_test.shape[0]
print(MMSE)
0.0018420128176938062



担任《Mechanical System and Signal Processing》审稿专家,担任《中国电机工程学报》,《控制与决策》等EI期刊审稿专家,擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。

 
知乎学术咨询:https://www.zhihu.com/consult/people/792359672131756032?isMe=1
相关推荐
pele21 小时前
PHP源码运行受主板供电影响吗_供电相数重要性说明【技巧】
jvm·数据库·python
sinat_3834373621 小时前
CSS如何实现元素悬浮在页面底部_利用fixed定位与底部间距
jvm·数据库·python
techdashen21 小时前
Cloudflare 如何把一个大型代理拆成三个小服务来提升可靠性
开发语言·rust
geovindu21 小时前
go: Chain of Responsibility Pattern
开发语言·设计模式·golang·责任链模式
cyyt21 小时前
深度学习周报(4.20~4.26)
人工智能·深度学习
gmaajt21 小时前
mysql如何备份与恢复函数定义_mysql mysqldump导出存储对象
jvm·数据库·python
十五年专注C++开发21 小时前
WaitingSpinnerWidget: 一个高度可配置的自定义Qt等待加载动画组件
开发语言·c++·qt·waitingspinner
Java小生不才21 小时前
spring AI文生图
java·人工智能·spring ai
罗西的思考21 小时前
【OpenClaw】通过 Nanobot 源码学习架构---(10)Heartbeat
人工智能·机器学习
qq_4609784021 小时前
Python爬虫怎么模拟手机端抓取_设置手机型号User-Agent字符串
jvm·数据库·python