简单的基于LSTM的黄金价格预测(Python)

复制代码
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
import tensorflow as tf
from keras import Model
from keras.layers import Input, Dense, Dropout, LSTM
df = pd.read_csv('Gold Price (2013-2023).csv' )
df
复制代码
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2583 entries, 0 to 2582
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Date      2583 non-null   object
 1   Price     2583 non-null   object
 2   Open      2583 non-null   object
 3   High      2583 non-null   object
 4   Low       2583 non-null   object
 5   Vol.      2578 non-null   object
 6   Change %  2583 non-null   object
dtypes: object(7)
memory usage: 141.4+ KB
df.drop(['Vol.', 'Change %'], axis=1, inplace=True)
df
复制代码
# Convert the 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'])


# Sort the DataFrame by the 'Date' column in ascending order
df.sort_values(by='Date', ascending=True, inplace=True)


# Reset the index of the DataFrame
df.reset_index(drop=True, inplace=True)
numCols = df.columns.drop('Date')
df[numCols] = df[numCols].replace({',': ''}, regex=True)
df[numCols] = df[numCols].astype('float64')
df.head()
df.duplicated().sum()
df.isnull().sum()
Date     0
Price    0
Open     0
High     0
Low      0
dtype: int64
import plotly.express as px


fig = px.line(y=df['Price'], x=df['Date'])
fig.update_traces(line_color='black')
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Price',
    title={
        'text': 'Gold Price Data',
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='rgba(255,223,0,0.9)'
)
fig.show()
test_size = df[df.Date.dt.year == 2022].shape[0]
print(test_size)

260

复制代码
import matplotlib.pyplot as plt


plt.figure(figsize=(15, 6), dpi=150)
plt.rcParams['axes.facecolor'] = 'cyan'
plt.rc('axes', edgecolor='white')


plt.plot(df.Date[:-test_size], df.Price[:-test_size], color='black', lw=2)
plt.plot(df.Date[-test_size:], df.Price[-test_size:], color='red', lw=2)


plt.title('Gold Price Train and Test', fontsize=15)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price', fontsize=12)
plt.legend(['Train Set', 'Test Set'], loc='upper left', prop={'size': 15})
plt.grid(color='white')


plt.show()
复制代码
scaler = MinMaxScaler()
scaler.fit(df.Price.values.reshape(-1, 1))
MinMaxScaler()
window_size = 60
train_data = df.Price[:-test_size]
train_data = scaler.fit_transform(train_data.values.reshape(-1, 1))
window_size = 60
X_train = []
y_train = []


for i in range(window_size, len(train_data)):
    X_train.append(train_data[i-window_size:i, 0])
    y_train.append(train_data[i, 0])
test_data = df.Price[-test_size-window_size:]
test_data = scaler.transform(test_data.values.reshape(-1, 1))
X_test = []
y_test = []


for i in range(window_size, len(test_data)):
    X_test.append(test_data[i-window_size:i, 0])
    y_test.append(test_data[i, 0])
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)


X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
y_train = np.reshape(y_train, (-1, 1))
y_test = np.reshape(y_test, (-1, 1))
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)
X_train shape: (2263, 60, 1)
y_train shape: (2263, 1)
X_test shape: (260, 60, 1)
y_test shape: (260, 1)
import tensorflow as tf


def define_model():
    input1 = Input(shape=(window_size, 1))
    x = tf.keras.layers.LSTM(units=64, return_sequences=True)(input1)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.LSTM(units=64, return_sequences=True)(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.LSTM(units=64)(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(32, activation='softmax')(x)
    dnn_output = tf.keras.layers.Dense(1)(x)
    model = tf.keras.models.Model(inputs=input1, outputs=dnn_output)
    # Import and use the Nadam optimizer
    model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Nadam())
    model.summary()
    return model
model = define_model()


history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.1, verbose=1)
Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_4 (InputLayer)        [(None, 60, 1)]           0         
                                                                 
 lstm_9 (LSTM)               (None, 60, 64)            16896     
                                                                 
 dropout_9 (Dropout)         (None, 60, 64)            0         
                                                                 
 lstm_10 (LSTM)              (None, 60, 64)            33024     
                                                                 
 dropout_10 (Dropout)        (None, 60, 64)            0         
                                                                 
 lstm_11 (LSTM)              (None, 64)                33024     
                                                                 
 dropout_11 (Dropout)        (None, 64)                0         
                                                                 
 dense_6 (Dense)             (None, 32)                2080      
                                                                 
 dense_7 (Dense)             (None, 1)                 33        
                                                                 
=================================================================
Total params: 85057 (332.25 KB)
Trainable params: 85057 (332.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
result = model.evaluate(X_test, y_test)


y_pred = model.predict(X_test)
MAPE = mean_absolute_percentage_error(y_test, y_pred)
Accuracy = 1 - MAPE
print('Test Loss:', result)
print('Test MAPE:', MAPE)
print('Test Accuracy:', Accuracy)
Test Loss: 0.0008509838371537626
Test MAPE: 0.0319030650799213
Test Accuracy: 0.9680969349200788
y_test_true = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
y_test_pred = scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()
plt.figure(figsize=(15, 6), dpi=150)
plt.rcParams['axes.facecolor'] = 'cyan'
plt.rc('axes', edgecolor='white')


plt.plot(df.Date[:-test_size], df.Price[:-test_size], color='black', lw=2)
plt.plot(df.Date[-test_size:], df.Price[-test_size:], color='red', lw=2)


plt.title('Gold Price Train and Test', fontsize=15)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price', fontsize=12)
plt.legend(['Train Set', 'Test Set'], loc='upper left', prop={'size': 15})
plt.grid(color='white')


plt.show()
复制代码
知乎学术咨询:https://www.zhihu.com/consult/people/792359672131756032?isMe=1

担任《Mechanical System and Signal Processing》审稿专家,担任《中国电机工程学报》,《控制与决策》等EI期刊审稿专家,擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。

相关推荐
谦行3 分钟前
工欲善其事,必先利其器—— PyTorch 深度学习基础操作
pytorch·深度学习·ai编程
夜夜敲码17 分钟前
C语言教程(十六): C 语言字符串详解
c语言·开发语言
宋康24 分钟前
C语言结构体和union内存对齐
c语言·开发语言
逢生博客29 分钟前
使用 Python 项目管理工具 uv 快速创建 MCP 服务(Cherry Studio、Trae 添加 MCP 服务)
python·sqlite·uv·deepseek·trae·cherry studio·mcp服务
xwz小王子32 分钟前
Nature Communications 面向形状可编程磁性软材料的数据驱动设计方法—基于随机设计探索与神经网络的协同优化框架
深度学习
居然是阿宋34 分钟前
Kotlin高阶函数 vs Lambda表达式:关键区别与协作关系
android·开发语言·kotlin
堕落似梦35 分钟前
Pydantic增强SQLALchemy序列化(FastAPI直接输出SQLALchemy查询集)
python
白熊18839 分钟前
【计算机视觉】CV实战项目 - 基于YOLOv5的人脸检测与关键点定位系统深度解析
人工智能·yolo·计算机视觉
nenchoumi311941 分钟前
VLA 论文精读(十六)FP3: A 3D Foundation Policy for Robotic Manipulation
论文阅读·人工智能·笔记·学习·vln
后端小肥肠1 小时前
文案号搞钱潜规则:日入四位数的Coze工作流我跑通了
人工智能·coze