功能与作用说明
本代码实现LSTM量化交易策略的系统化回测框架,核心功能包含:1) 时间序列数据预处理管道;2) LSTM超参数空间构建;3) 蒙特卡洛随机搜索优化;4) 多维度绩效评估矩阵;5) 统计显著性检验模块。该工具用于验证LSTM输入特征、网络结构、正则化系数等关键参数在特定市场环境下的预测有效性,为实盘部署提供量化依据。主要风险包括过拟合历史数据、幸存者偏差导致的虚假信号,以及未考虑交易成本带来的收益高估。
数据准备与预处理流程
python
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
def prepare_time_series(data, lookback=60):
"""构造监督学习数据集"""
X, y = [], []
for i in range(len(data)-lookback):
X.append(data[i:(i+lookback)].values)
y.append(data[i+lookback, 3]) # 假设第4列是收盘价
return np.array(X), np.array(y)
# 示例数据处理
df = pd.read_csv('BTC-USD.csv', parse_dates=True, index_col=0)
price_data = df[['Open','High','Low','Close']].values
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(price_data)
X, y = prepare_time_series(scaled_data)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
LSTM架构设计原则
python
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.regularizers import l2
def build_lstm_model(units=[50,50], dropout_rate=0.2, l2_lambda=0.001, bidirectional=False):
"""可配置的LSTM模型工厂函数"""
model = Sequential()
for i, u in enumerate(units):
if bidirectional:
layer = Bidirectional(LSTM(u, return_sequences=(i<len(units)-1),
kernel_regularizer=l2(l2_lambda)))
else:
layer = LSTM(u, return_sequences=(i<len(units)-1),
kernel_regularizer=l2(l2_lambda))
model.add(layer)
model.add(Dropout(dropout_rate))
model.add(Dense(1, activation='sigmoid'))
return model
参数空间探索方法论
python
from scipy.stats import randint, uniform
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
# 定义超参数搜索空间
param_dist = {
'units': [32, 64, (32,32), (64,32)],
'dropout_rate': uniform(0.1, 0.4),
'l2_lambda': uniform(1e-4, 1e-2),
'bidirectional': [True, False],
'batch_size': randint(16, 128),
'epochs': [50, 100]
}
# 创建Keras分类器包装器
model = KerasClassifier(build_fn=lambda: build_lstm_model(), verbose=0)
# 执行随机搜索
random_search = RandomizedSearchCV(
estimator=model,
param_distributions=param_dist,
n_iter=50,
cv=TimeSeriesSplit(n_splits=5),
scoring='neg_log_loss',
n_jobs=-1
)
random_search.fit(X_train, y_train)
回测引擎核心实现
python
class BacktestEngine:
def __init__(self, model, data, initial_capital=10000):
self.model = model
self.data = data
self.cash = initial_capital
self.position = 0
self.trades = []
def run_backtest(self, X_test, y_test):
"""模拟逐日交易决策"""
predictions = self.model.predict(X_test)
returns = np.diff(self.data[-len(predictions):, 3]) / self.data[-len(predictions):-1, 3]
for i, (pred, ret) in enumerate(zip(predictions, returns)):
signal = 1 if pred > 0.5 else -1
prev_pos = self.position
# 执行交易逻辑
if prev_pos == 0 and signal != 0:
shares = self.cash / self.data[-len(predictions)+i, 3]
self.position = shares
self.cash = 0
elif prev_pos != 0 and signal == 0:
self.cash = self.position * self.data[-len(predictions)+i, 3]
self.position = 0
# 记录持仓价值
self.portfolio_value = self.cash + self.position * self.data[-len(predictions)+i, 3]
self.trades.append({
'date': self.data.index[-len(predictions)+i],
'signal': signal,
'return': ret,
'position': self.position
})
return self._calculate_metrics()
def _calculate_metrics(self):
"""计算夏普比率、最大回撤等关键指标"""
returns = pd.DataFrame(self.trades)['return']
cumulative_returns = (1 + returns).cumprod()
sharpe_ratio = np.sqrt(252) * returns.mean() / returns.std()
max_drawdown = (cumulative_returns / cumulative_returns.cummax() - 1).min()
return {'Sharpe': sharpe_ratio, 'MaxDD': max_drawdown}
统计显著性检验方案
python
from statsmodels.tsa.stattools import adfuller
from scipy.stats import ttest_ind
def stationarity_test(series, threshold=0.05):
"""ADF检验判断序列平稳性"""
result = adfuller(series.dropna())
return result[1] < threshold
def strategy_comparison(strategy_returns, buy_hold_returns):
"""配对样本t检验比较策略优劣"""
t_stat, p_value = ttest_ind(strategy_returns, buy_hold_returns, equal_var=False)
return {'t_statistic': t_stat, 'p_value': p_value}
# 应用示例
btc_returns = df['Close'].pct_change().dropna()
lstm_returns = pd.Series(np.random.normal(0.001, 0.02, len(btc_returns))) # 模拟策略收益
print(stationarity_test(btc_returns)) # 检查价格序列是否适合LSTM建模
print(strategy_comparison(lstm_returns, btc_returns)) # 对比策略与基准表现
结果可视化与解释
python
import matplotlib.pyplot as plt
import seaborn as sns
def plot_learning_curves(history):
"""绘制训练/验证损失曲线"""
plt.figure(figsize=(12,6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Learning Curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
def visualize_parameter_importance(param_results):
"""热力图展示参数组合效果"""
params_df = pd.DataFrame(param_results)
corr_matrix = params_df.corr()
plt.figure(figsize=(10,8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Parameter Correlation Heatmap')
plt.show()
结论
有效的LSTM参数验证需满足以下条件:1) 在样本外测试中保持夏普比率>1.5;2) 最大回撤控制在20%以内;3) ADF检验p值<0.05确保残差平稳;4) 策略收益相对于买入持有具有统计显著性(p<0.05)。建议采用滚动窗口验证机制,每季度重新校准参数以适应市场状态变化。