简单的基于LSTM的黄金价格预测（Python）

复制代码

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
import tensorflow as tf
from keras import Model
from keras.layers import Input, Dense, Dropout, LSTM
df = pd.read_csv('Gold Price (2013-2023).csv' )
df

复制代码

df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2583 entries, 0 to 2582
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Date      2583 non-null   object
 1   Price     2583 non-null   object
 2   Open      2583 non-null   object
 3   High      2583 non-null   object
 4   Low       2583 non-null   object
 5   Vol.      2578 non-null   object
 6   Change %  2583 non-null   object
dtypes: object(7)
memory usage: 141.4+ KB
df.drop(['Vol.', 'Change %'], axis=1, inplace=True)
df

复制代码

# Convert the 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'])


# Sort the DataFrame by the 'Date' column in ascending order
df.sort_values(by='Date', ascending=True, inplace=True)


# Reset the index of the DataFrame
df.reset_index(drop=True, inplace=True)
numCols = df.columns.drop('Date')
df[numCols] = df[numCols].replace({',': ''}, regex=True)
df[numCols] = df[numCols].astype('float64')
df.head()
df.duplicated().sum()
df.isnull().sum()
Date     0
Price    0
Open     0
High     0
Low      0
dtype: int64
import plotly.express as px


fig = px.line(y=df['Price'], x=df['Date'])
fig.update_traces(line_color='black')
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Price',
    title={
        'text': 'Gold Price Data',
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='rgba(255,223,0,0.9)'
)
fig.show()
test_size = df[df.Date.dt.year == 2022].shape[0]
print(test_size)

260

复制代码

import matplotlib.pyplot as plt


plt.figure(figsize=(15, 6), dpi=150)
plt.rcParams['axes.facecolor'] = 'cyan'
plt.rc('axes', edgecolor='white')


plt.plot(df.Date[:-test_size], df.Price[:-test_size], color='black', lw=2)
plt.plot(df.Date[-test_size:], df.Price[-test_size:], color='red', lw=2)


plt.title('Gold Price Train and Test', fontsize=15)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price', fontsize=12)
plt.legend(['Train Set', 'Test Set'], loc='upper left', prop={'size': 15})
plt.grid(color='white')


plt.show()

复制代码

scaler = MinMaxScaler()
scaler.fit(df.Price.values.reshape(-1, 1))
MinMaxScaler()
window_size = 60
train_data = df.Price[:-test_size]
train_data = scaler.fit_transform(train_data.values.reshape(-1, 1))
window_size = 60
X_train = []
y_train = []


for i in range(window_size, len(train_data)):
    X_train.append(train_data[i-window_size:i, 0])
    y_train.append(train_data[i, 0])
test_data = df.Price[-test_size-window_size:]
test_data = scaler.transform(test_data.values.reshape(-1, 1))
X_test = []
y_test = []


for i in range(window_size, len(test_data)):
    X_test.append(test_data[i-window_size:i, 0])
    y_test.append(test_data[i, 0])
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)


X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
y_train = np.reshape(y_train, (-1, 1))
y_test = np.reshape(y_test, (-1, 1))
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)
X_train shape: (2263, 60, 1)
y_train shape: (2263, 1)
X_test shape: (260, 60, 1)
y_test shape: (260, 1)
import tensorflow as tf


def define_model():
    input1 = Input(shape=(window_size, 1))
    x = tf.keras.layers.LSTM(units=64, return_sequences=True)(input1)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.LSTM(units=64, return_sequences=True)(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.LSTM(units=64)(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(32, activation='softmax')(x)
    dnn_output = tf.keras.layers.Dense(1)(x)
    model = tf.keras.models.Model(inputs=input1, outputs=dnn_output)
    # Import and use the Nadam optimizer
    model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Nadam())
    model.summary()
    return model
model = define_model()


history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.1, verbose=1)
Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_4 (InputLayer)        [(None, 60, 1)]           0         
                                                                 
 lstm_9 (LSTM)               (None, 60, 64)            16896     
                                                                 
 dropout_9 (Dropout)         (None, 60, 64)            0         
                                                                 
 lstm_10 (LSTM)              (None, 60, 64)            33024     
                                                                 
 dropout_10 (Dropout)        (None, 60, 64)            0         
                                                                 
 lstm_11 (LSTM)              (None, 64)                33024     
                                                                 
 dropout_11 (Dropout)        (None, 64)                0         
                                                                 
 dense_6 (Dense)             (None, 32)                2080      
                                                                 
 dense_7 (Dense)             (None, 1)                 33        
                                                                 
=================================================================
Total params: 85057 (332.25 KB)
Trainable params: 85057 (332.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
result = model.evaluate(X_test, y_test)


y_pred = model.predict(X_test)
MAPE = mean_absolute_percentage_error(y_test, y_pred)
Accuracy = 1 - MAPE
print('Test Loss:', result)
print('Test MAPE:', MAPE)
print('Test Accuracy:', Accuracy)
Test Loss: 0.0008509838371537626
Test MAPE: 0.0319030650799213
Test Accuracy: 0.9680969349200788
y_test_true = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
y_test_pred = scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()
plt.figure(figsize=(15, 6), dpi=150)
plt.rcParams['axes.facecolor'] = 'cyan'
plt.rc('axes', edgecolor='white')


plt.plot(df.Date[:-test_size], df.Price[:-test_size], color='black', lw=2)
plt.plot(df.Date[-test_size:], df.Price[-test_size:], color='red', lw=2)


plt.title('Gold Price Train and Test', fontsize=15)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price', fontsize=12)
plt.legend(['Train Set', 'Test Set'], loc='upper left', prop={'size': 15})
plt.grid(color='white')


plt.show()

复制代码

知乎学术咨询：https://www.zhihu.com/consult/people/792359672131756032?isMe=1

担任《Mechanical System and Signal Processing》审稿专家，担任《中国电机工程学报》，《控制与决策》等EI期刊审稿专家，擅长领域：现代信号处理，机器学习，深度学习，数字孪生，时间序列分析，设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。