基于RNN的股票市场时间序列预测(Python)

Step 1: Loading the data

复制代码
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


import torch.nn as nn
import torch
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
# Importing the training set
dataset = pd.read_csv('HistoricalData_1719412320530.csv')
dataset.head(10)
复制代码
dataset.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2516 entries, 0 to 2515
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Date        2516 non-null   object
 1   Close/Last  2516 non-null   object
 2   Volume      2516 non-null   int64 
 3   Open        2516 non-null   object
 4   High        2516 non-null   object
 5   Low         2516 non-null   object
dtypes: int64(1), object(5)
memory usage: 118.1+ KB
# change time order


dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y')


# Sort the DataFrame in ascending order
dataset = dataset.sort_values(by='Date', ascending=True)


# Reset index if necessary
dataset = dataset.reset_index(drop=True)
dataset.head(5)
复制代码
dataset['Close/Last'] = dataset['Close/Last'].str.replace('$', '').astype(float)
dataset.head(5)
复制代码
dataset_cl = dataset['Close/Last'].values
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler


sc = MinMaxScaler(feature_range = (0, 1))


# scale the data
dataset_cl = dataset_cl.reshape(dataset_cl.shape[0], 1)
dataset_cl = sc.fit_transform(dataset_cl)
dataset_cl
array([[2.91505500e-04],
       [2.95204809e-04],
       [3.24799276e-04],
       ...,
       [9.33338463e-01],
       [8.70746165e-01],
       [9.29787127e-01]])

Step 2: Cutting time series into sequences (Sliding Window)

复制代码
input_size = 7


# Create a function to process the data into 7 day look back slices
# lb is window size
def processData(data, lb):
    X, y = [], [] # X is input vector, Y is output vector
    for i in range(len(data) - lb - 1):
        X.append(data[i: (i + lb), 0])
        y.append(data[(i + lb), 0])
    return np.array(X), np.array(y)


X, y = processData(dataset_cl, input_size)

Step 3: Split training and testing sets

复制代码
X_train, X_test = X[:int(X.shape[0]*0.80)], X[int(X.shape[0]*0.80):]
y_train, y_test = y[:int(y.shape[0]*0.80)], y[int(y.shape[0]*0.80):]
print(X_train.shape[0])
print(X_test.shape[0])
print(y_train.shape[0])
print(y_test.shape[0])


# reshaping
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
2006
502
2006
502

Step 4: Build and run an RNN regression model

复制代码
class RNN(nn.Module):
    def __init__(self, i_size, h_size, n_layers, o_size, dropout=0.1, bidirectional=False):
        super().__init__()
        # super(RNN, self).__init__()


        self.num_directions = bidirectional + 1


        # LSTM module
        self.rnn = nn.LSTM(
            input_size = i_size,
            hidden_size = h_size,
            num_layers = n_layers,
            dropout = dropout,
            bidirectional = bidirectional
        )


        # self.relu = nn.ReLU()


        # Output layer
        self.out = nn.Linear(h_size, o_size)


    def forward(self, x, h_state):
      # r_out contains the LSTM output at each time step, and hidden_state
      # contains the hidden and cell states after processing the entire sequence.
        r_out, hidden_state = self.rnn(x, h_state)


        hidden_size = hidden_state[-1].size(-1)


        # Convert dimension of r_out (-1 denotes it depends on other parameters)
        r_out = r_out.view(-1, self.num_directions, hidden_size)


        # r_out = self.relu(r_out)


        outs = self.out(r_out)


        return outs, hidden_state
# Global setting
INPUT_SIZE = input_size # LSTM input size


HIDDEN_SIZE = 256


NUM_LAYERS = 3 # LSTM 'stack' layer


OUTPUT_SIZE = 1




# Hyper parameters
learning_rate = 0.001
num_epochs = 300


rnn = RNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE, bidirectional=False)
rnn.cuda()


optimiser = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
criterion = nn.MSELoss()


hidden_state = None
rnn
RNN(
  (rnn): LSTM(7, 256, num_layers=3, dropout=0.1)
  (out): Linear(in_features=256, out_features=1, bias=True)
)
history = [] # save loss in each epoch
# .cuda() copies element to the GPU memory
X_test_cuda = torch.tensor(X_test).float().cuda()
y_test_cuda = torch.tensor(y_test).float().cuda()


# Use all the data in one batch
inputs_cuda = torch.tensor(X_train).float().cuda()
labels_cuda = torch.tensor(y_train).float().cuda()


# training
for epoch in range(num_epochs):


    # Train mode
    rnn.train()


    output, _ = rnn(inputs_cuda, hidden_state)
    # print(output.size())


    loss = criterion(output[:,0,:].view(-1), labels_cuda)
    optimiser.zero_grad()
    loss.backward()   # back propagation
    optimiser.step()   # update the parameters


    if epoch % 20 == 0:
        # Convert train mode to evaluation mode (disable dropout)
        rnn.eval()


        test_output, _ = rnn(X_test_cuda, hidden_state)
        test_loss = criterion(test_output.view(-1), y_test_cuda)
        print('epoch {}, loss {}, eval loss {}'.format(epoch, loss.item(), test_loss.item()))
    else:
        print('epoch {}, loss {}'.format(epoch, loss.item()))
    history.append(loss.item())
# iterate over all the learnable parameters in the model, which include the
# weights and biases of all layers in the model
# (both the LSTM layers and the final linear layer)
for param in rnn.parameters():
    print(param.data)

Step 5: Checking model performance

复制代码
plt.plot(history)
# dplt.plot(history.history['val_loss'])
复制代码
# X_train_X_test = np.concatenate((X_train, X_test),axis=0)
# hidden_state = None
rnn.eval()
# test_inputs = torch.tensor(X_test).float().cuda()
test_predict, _ = rnn(X_test_cuda, hidden_state)
test_predict_cpu = test_predict.cpu().detach().numpy()
plt.plot(sc.inverse_transform(y_test.reshape(-1,1)))
plt.plot(sc.inverse_transform(test_predict_cpu.reshape(-1,1)))
plt.legend(['y_test','test_predict_cpu'], loc='center left', bbox_to_anchor=(1, 0.5))
复制代码
# plot original data
plt.plot(sc.inverse_transform(y.reshape(-1,1)), color='k')


# train_inputs = torch.tensor(X_train).float().cuda()
train_pred, hidden_state = rnn(inputs_cuda, None)
train_pred_cpu = train_pred.cpu().detach().numpy()


# use hidden state from previous training data
test_predict, _ = rnn(X_test_cuda, hidden_state)
test_predict_cpu = test_predict.cpu().detach().numpy()


# plt.plot(scl.inverse_transform(y_test.reshape(-1,1)))
split_pt = int(X.shape[0] * 0.80) + 7 # window_size
plt.plot(np.arange(7, split_pt, 1), sc.inverse_transform(train_pred_cpu.reshape(-1,1)), color='b')
plt.plot(np.arange(split_pt, split_pt + len(test_predict_cpu), 1), sc.inverse_transform(test_predict_cpu.reshape(-1,1)), color='r')


# pretty up graph
plt.xlabel('day')
plt.ylabel('price of Nvidia stock')
plt.legend(['original series','training fit','testing fit'], loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
复制代码
MMSE = np.sum((test_predict_cpu.reshape(1,X_test.shape[0])-y[2006:])**2)/X_test.shape[0]
print(MMSE)
0.0018420128176938062



担任《Mechanical System and Signal Processing》审稿专家,担任《中国电机工程学报》,《控制与决策》等EI期刊审稿专家,擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。

 
知乎学术咨询:https://www.zhihu.com/consult/people/792359672131756032?isMe=1
相关推荐
SccTsAxR几秒前
[C语言]常见排序算法①
c语言·开发语言·经验分享·笔记·其他·排序算法
折翼的恶魔10 分钟前
数据分析:合并二
python·数据分析·pandas
剪一朵云爱着21 分钟前
一文入门:机器学习
人工智能·机器学习
hi0_622 分钟前
机器学习实战(一): 什么是机器学习
人工智能·机器学习·机器人·机器学习实战
ChinaRainbowSea23 分钟前
9. LangChain4j + 整合 Spring Boot
java·人工智能·spring boot·后端·spring·langchain·ai编程
有Li35 分钟前
基于联邦学习与神经架构搜索的可泛化重建:用于加速磁共振成像|文献速递-最新医学人工智能文献
论文阅读·人工智能·文献·医学生
桃花键神44 分钟前
从传统到智能:3D 建模流程的演进与 AI 趋势 —— 以 Blender 为例
人工智能·3d·blender
星期天要睡觉1 小时前
计算机视觉(opencv)实战十七——图像直方图均衡化
人工智能·opencv·计算机视觉
大视码垛机1 小时前
速度与安全双突破:大视码垛机重构工业自动化新范式
大数据·数据库·人工智能·机器人·自动化·制造
feifeigo1231 小时前
星座SAR动目标检测(GMTI)
人工智能·算法·目标跟踪