import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
import tensorflow as tf
from keras import Model
from keras.layers import Input, Dense, Dropout, LSTM
df = pd.read_csv('Gold Price (2013-2023).csv' )
df
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2583 entries, 0 to 2582
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 2583 non-null object
1 Price 2583 non-null object
2 Open 2583 non-null object
3 High 2583 non-null object
4 Low 2583 non-null object
5 Vol. 2578 non-null object
6 Change % 2583 non-null object
dtypes: object(7)
memory usage: 141.4+ KB
df.drop(['Vol.', 'Change %'], axis=1, inplace=True)
df
# Convert the 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'])
# Sort the DataFrame by the 'Date' column in ascending order
df.sort_values(by='Date', ascending=True, inplace=True)
# Reset the index of the DataFrame
df.reset_index(drop=True, inplace=True)
numCols = df.columns.drop('Date')
df[numCols] = df[numCols].replace({',': ''}, regex=True)
df[numCols] = df[numCols].astype('float64')
df.head()
df.duplicated().sum()
df.isnull().sum()
Date 0
Price 0
Open 0
High 0
Low 0
dtype: int64
import plotly.express as px
fig = px.line(y=df['Price'], x=df['Date'])
fig.update_traces(line_color='black')
fig.update_layout(
xaxis_title='Date',
yaxis_title='Price',
title={
'text': 'Gold Price Data',
'y': 0.95,
'x': 0.5,
'xanchor': 'center',
'yanchor': 'top'
},
plot_bgcolor='rgba(255,223,0,0.9)'
)
fig.show()
test_size = df[df.Date.dt.year == 2022].shape[0]
print(test_size)
260
import matplotlib.pyplot as plt
plt.figure(figsize=(15, 6), dpi=150)
plt.rcParams['axes.facecolor'] = 'cyan'
plt.rc('axes', edgecolor='white')
plt.plot(df.Date[:-test_size], df.Price[:-test_size], color='black', lw=2)
plt.plot(df.Date[-test_size:], df.Price[-test_size:], color='red', lw=2)
plt.title('Gold Price Train and Test', fontsize=15)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price', fontsize=12)
plt.legend(['Train Set', 'Test Set'], loc='upper left', prop={'size': 15})
plt.grid(color='white')
plt.show()
scaler = MinMaxScaler()
scaler.fit(df.Price.values.reshape(-1, 1))
MinMaxScaler()
window_size = 60
train_data = df.Price[:-test_size]
train_data = scaler.fit_transform(train_data.values.reshape(-1, 1))
window_size = 60
X_train = []
y_train = []
for i in range(window_size, len(train_data)):
X_train.append(train_data[i-window_size:i, 0])
y_train.append(train_data[i, 0])
test_data = df.Price[-test_size-window_size:]
test_data = scaler.transform(test_data.values.reshape(-1, 1))
X_test = []
y_test = []
for i in range(window_size, len(test_data)):
X_test.append(test_data[i-window_size:i, 0])
y_test.append(test_data[i, 0])
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
y_train = np.reshape(y_train, (-1, 1))
y_test = np.reshape(y_test, (-1, 1))
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)
X_train shape: (2263, 60, 1)
y_train shape: (2263, 1)
X_test shape: (260, 60, 1)
y_test shape: (260, 1)
import tensorflow as tf
def define_model():
input1 = Input(shape=(window_size, 1))
x = tf.keras.layers.LSTM(units=64, return_sequences=True)(input1)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.LSTM(units=64, return_sequences=True)(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.LSTM(units=64)(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(32, activation='softmax')(x)
dnn_output = tf.keras.layers.Dense(1)(x)
model = tf.keras.models.Model(inputs=input1, outputs=dnn_output)
# Import and use the Nadam optimizer
model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Nadam())
model.summary()
return model
model = define_model()
history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.1, verbose=1)
Model: "model_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) [(None, 60, 1)] 0
lstm_9 (LSTM) (None, 60, 64) 16896
dropout_9 (Dropout) (None, 60, 64) 0
lstm_10 (LSTM) (None, 60, 64) 33024
dropout_10 (Dropout) (None, 60, 64) 0
lstm_11 (LSTM) (None, 64) 33024
dropout_11 (Dropout) (None, 64) 0
dense_6 (Dense) (None, 32) 2080
dense_7 (Dense) (None, 1) 33
=================================================================
Total params: 85057 (332.25 KB)
Trainable params: 85057 (332.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
result = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
MAPE = mean_absolute_percentage_error(y_test, y_pred)
Accuracy = 1 - MAPE
print('Test Loss:', result)
print('Test MAPE:', MAPE)
print('Test Accuracy:', Accuracy)
Test Loss: 0.0008509838371537626
Test MAPE: 0.0319030650799213
Test Accuracy: 0.9680969349200788
y_test_true = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
y_test_pred = scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()
plt.figure(figsize=(15, 6), dpi=150)
plt.rcParams['axes.facecolor'] = 'cyan'
plt.rc('axes', edgecolor='white')
plt.plot(df.Date[:-test_size], df.Price[:-test_size], color='black', lw=2)
plt.plot(df.Date[-test_size:], df.Price[-test_size:], color='red', lw=2)
plt.title('Gold Price Train and Test', fontsize=15)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price', fontsize=12)
plt.legend(['Train Set', 'Test Set'], loc='upper left', prop={'size': 15})
plt.grid(color='white')
plt.show()
知乎学术咨询:https://www.zhihu.com/consult/people/792359672131756032?isMe=1
担任《Mechanical System and Signal Processing》审稿专家,担任《中国电机工程学报》,《控制与决策》等EI期刊审稿专家,擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。