基于RNN的股票市场时间序列预测(Python)

Step 1: Loading the data

复制代码
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


import torch.nn as nn
import torch
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
# Importing the training set
dataset = pd.read_csv('HistoricalData_1719412320530.csv')
dataset.head(10)
复制代码
dataset.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2516 entries, 0 to 2515
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Date        2516 non-null   object
 1   Close/Last  2516 non-null   object
 2   Volume      2516 non-null   int64 
 3   Open        2516 non-null   object
 4   High        2516 non-null   object
 5   Low         2516 non-null   object
dtypes: int64(1), object(5)
memory usage: 118.1+ KB
# change time order


dataset['Date'] = pd.to_datetime(dataset['Date'], format='%m/%d/%Y')


# Sort the DataFrame in ascending order
dataset = dataset.sort_values(by='Date', ascending=True)


# Reset index if necessary
dataset = dataset.reset_index(drop=True)
dataset.head(5)
复制代码
dataset['Close/Last'] = dataset['Close/Last'].str.replace('$', '').astype(float)
dataset.head(5)
复制代码
dataset_cl = dataset['Close/Last'].values
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler


sc = MinMaxScaler(feature_range = (0, 1))


# scale the data
dataset_cl = dataset_cl.reshape(dataset_cl.shape[0], 1)
dataset_cl = sc.fit_transform(dataset_cl)
dataset_cl
array([[2.91505500e-04],
       [2.95204809e-04],
       [3.24799276e-04],
       ...,
       [9.33338463e-01],
       [8.70746165e-01],
       [9.29787127e-01]])

Step 2: Cutting time series into sequences (Sliding Window)

复制代码
input_size = 7


# Create a function to process the data into 7 day look back slices
# lb is window size
def processData(data, lb):
    X, y = [], [] # X is input vector, Y is output vector
    for i in range(len(data) - lb - 1):
        X.append(data[i: (i + lb), 0])
        y.append(data[(i + lb), 0])
    return np.array(X), np.array(y)


X, y = processData(dataset_cl, input_size)

Step 3: Split training and testing sets

复制代码
X_train, X_test = X[:int(X.shape[0]*0.80)], X[int(X.shape[0]*0.80):]
y_train, y_test = y[:int(y.shape[0]*0.80)], y[int(y.shape[0]*0.80):]
print(X_train.shape[0])
print(X_test.shape[0])
print(y_train.shape[0])
print(y_test.shape[0])


# reshaping
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
2006
502
2006
502

Step 4: Build and run an RNN regression model

复制代码
class RNN(nn.Module):
    def __init__(self, i_size, h_size, n_layers, o_size, dropout=0.1, bidirectional=False):
        super().__init__()
        # super(RNN, self).__init__()


        self.num_directions = bidirectional + 1


        # LSTM module
        self.rnn = nn.LSTM(
            input_size = i_size,
            hidden_size = h_size,
            num_layers = n_layers,
            dropout = dropout,
            bidirectional = bidirectional
        )


        # self.relu = nn.ReLU()


        # Output layer
        self.out = nn.Linear(h_size, o_size)


    def forward(self, x, h_state):
      # r_out contains the LSTM output at each time step, and hidden_state
      # contains the hidden and cell states after processing the entire sequence.
        r_out, hidden_state = self.rnn(x, h_state)


        hidden_size = hidden_state[-1].size(-1)


        # Convert dimension of r_out (-1 denotes it depends on other parameters)
        r_out = r_out.view(-1, self.num_directions, hidden_size)


        # r_out = self.relu(r_out)


        outs = self.out(r_out)


        return outs, hidden_state
# Global setting
INPUT_SIZE = input_size # LSTM input size


HIDDEN_SIZE = 256


NUM_LAYERS = 3 # LSTM 'stack' layer


OUTPUT_SIZE = 1




# Hyper parameters
learning_rate = 0.001
num_epochs = 300


rnn = RNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE, bidirectional=False)
rnn.cuda()


optimiser = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
criterion = nn.MSELoss()


hidden_state = None
rnn
RNN(
  (rnn): LSTM(7, 256, num_layers=3, dropout=0.1)
  (out): Linear(in_features=256, out_features=1, bias=True)
)
history = [] # save loss in each epoch
# .cuda() copies element to the GPU memory
X_test_cuda = torch.tensor(X_test).float().cuda()
y_test_cuda = torch.tensor(y_test).float().cuda()


# Use all the data in one batch
inputs_cuda = torch.tensor(X_train).float().cuda()
labels_cuda = torch.tensor(y_train).float().cuda()


# training
for epoch in range(num_epochs):


    # Train mode
    rnn.train()


    output, _ = rnn(inputs_cuda, hidden_state)
    # print(output.size())


    loss = criterion(output[:,0,:].view(-1), labels_cuda)
    optimiser.zero_grad()
    loss.backward()   # back propagation
    optimiser.step()   # update the parameters


    if epoch % 20 == 0:
        # Convert train mode to evaluation mode (disable dropout)
        rnn.eval()


        test_output, _ = rnn(X_test_cuda, hidden_state)
        test_loss = criterion(test_output.view(-1), y_test_cuda)
        print('epoch {}, loss {}, eval loss {}'.format(epoch, loss.item(), test_loss.item()))
    else:
        print('epoch {}, loss {}'.format(epoch, loss.item()))
    history.append(loss.item())
# iterate over all the learnable parameters in the model, which include the
# weights and biases of all layers in the model
# (both the LSTM layers and the final linear layer)
for param in rnn.parameters():
    print(param.data)

Step 5: Checking model performance

复制代码
plt.plot(history)
# dplt.plot(history.history['val_loss'])
复制代码
# X_train_X_test = np.concatenate((X_train, X_test),axis=0)
# hidden_state = None
rnn.eval()
# test_inputs = torch.tensor(X_test).float().cuda()
test_predict, _ = rnn(X_test_cuda, hidden_state)
test_predict_cpu = test_predict.cpu().detach().numpy()
plt.plot(sc.inverse_transform(y_test.reshape(-1,1)))
plt.plot(sc.inverse_transform(test_predict_cpu.reshape(-1,1)))
plt.legend(['y_test','test_predict_cpu'], loc='center left', bbox_to_anchor=(1, 0.5))
复制代码
# plot original data
plt.plot(sc.inverse_transform(y.reshape(-1,1)), color='k')


# train_inputs = torch.tensor(X_train).float().cuda()
train_pred, hidden_state = rnn(inputs_cuda, None)
train_pred_cpu = train_pred.cpu().detach().numpy()


# use hidden state from previous training data
test_predict, _ = rnn(X_test_cuda, hidden_state)
test_predict_cpu = test_predict.cpu().detach().numpy()


# plt.plot(scl.inverse_transform(y_test.reshape(-1,1)))
split_pt = int(X.shape[0] * 0.80) + 7 # window_size
plt.plot(np.arange(7, split_pt, 1), sc.inverse_transform(train_pred_cpu.reshape(-1,1)), color='b')
plt.plot(np.arange(split_pt, split_pt + len(test_predict_cpu), 1), sc.inverse_transform(test_predict_cpu.reshape(-1,1)), color='r')


# pretty up graph
plt.xlabel('day')
plt.ylabel('price of Nvidia stock')
plt.legend(['original series','training fit','testing fit'], loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
复制代码
MMSE = np.sum((test_predict_cpu.reshape(1,X_test.shape[0])-y[2006:])**2)/X_test.shape[0]
print(MMSE)
0.0018420128176938062



担任《Mechanical System and Signal Processing》审稿专家,担任《中国电机工程学报》,《控制与决策》等EI期刊审稿专家,擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。

 
知乎学术咨询:https://www.zhihu.com/consult/people/792359672131756032?isMe=1
相关推荐
im_AMBER2 分钟前
杂记 15
java·开发语言·算法
通信小呆呆3 分钟前
分布式雷达 vs 多基地雷达:同频共振的“合唱团”和“乐队”
分布式·目标检测·信息与通信·信号处理·计算成像
麦麦大数据7 分钟前
F024 CNN+vue+flask电影推荐系统vue+python+mysql+CNN实现
vue.js·python·cnn·flask·推荐算法
Zzz 小生13 分钟前
编程基础学习(一)-Python基础语法+数据结构+面向对象全解析
开发语言·python
white-persist13 分钟前
JWT 漏洞全解析:从原理到实战
前端·网络·python·安全·web安全·网络安全·系统安全
算法打盹中30 分钟前
计算机视觉:卷积神经网络(CNN)图像分类从像素与色彩通道基础到特征提取、池化及预测
图像处理·神经网络·计算机视觉·cnn·图像分类
搞科研的小刘选手32 分钟前
【早稻田大学主办】2026年第三届人工智能与未来教育国际学术会议(AIFE 2026)
人工智能·机器学习·数据挖掘·机器人·未来教育·远程教育·移动学习
沐知全栈开发36 分钟前
Bootstrap4 表格详解
开发语言
数据与人工智能律师40 分钟前
解码Web3:DeFi、GameFi、SocialFi的法律风险警示与合规路径
大数据·网络·人工智能·云计算·区块链
Best_Me0741 分钟前
理解AUROC,AP,F1-scroe,PRO
人工智能·机器学习