1. 策略功能与核心逻辑说明
本策略基于HMM(隐马尔可夫模型)构建,旨在通过识别金融市场中隐藏的状态转换规律,捕捉指数期权与外汇市场间的双币种套利机会。其核心功能是通过多维度市场数据训练HMM,推断当前市场所处的隐含状态,并结合状态特征制定跨市场的价差交易决策。
1.1 策略作用机制
该策略主要作用于同时存在价格联动关系的两类资产:目标指数期权(如沪深300指数期权)与对应汇率期货(如USD/CNY)。当HMM检测到系统从"均衡态"向"套利态"转换时,触发双向挂单策略,利用两个市场对同一经济事件的反应速度差异获取无风险收益。
1.2 潜在风险特性
- 状态误判风险:模型可能将噪声波动识别为有效状态转换,导致错误入场
- 流动性风险:套利窗口期过短时,实际成交价格可能偏离预期
- 参数敏感性风险:HMM的观测序列长度、状态数量等超参数设置直接影响策略稳定性
- 尾部风险:极端行情下两市场联动关系失效可能导致套利失败
2. HMM基础理论框架
2.1 数学模型构成要素
HMM由五元组λ=(Q,O,A,B,π)定义:
- Q={q₁,q₂,...,qₙ}:N个隐藏状态集合
- O={o₁,o₂,...,oₘ}:M个观测值集合
- A=[aᵢⱼ]:N×N状态转移概率矩阵
- B=[bⱼ(k)]:N×M观测概率矩阵
- π=[πᵢ]:N维初始状态概率分布
2.2 关键算法原理
- 前向-后向算法:解决学习问题,通过EM迭代优化模型参数
- Viterbi算法:实现解码问题,寻找最可能的状态序列
- Baum-Welch算法:处理评估问题,计算给定模型下的观测序列概率
python
import numpy as np
from hmmlearn import hmm
class MarketStateHMM:
def __init__(self, n_states=3):
self.model = hmm.MultinomialHMM(n_components=n_states)
self.state_labels = ["Normal", "Arbitrage", "Volatility"]
def train(self, X, lengths):
"""使用Baum-Welch算法训练模型"""
self.model.fit(X, lengths)
return self.model.monitor_.converged
def predict_state(self, obs_seq):
"""Viterbi算法预测当前状态"""
logprob, state_path = self.model.decode(obs_seq)
return self.state_labels[state_path[-1]]
3. 套利时机识别方法论
3.1 观测变量设计
选取以下4类特征构建观测序列:
- 相对价值指标:(期权隐含波动率 - 历史波动率)/历史波动率
- 基差水平:现货指数与期货合约的价差比率
- 量价协同度:期权成交量/标的证券成交量比值
- 汇率偏离度:即期汇率与NDF远期汇率的差异百分比
3.2 状态空间划分标准
| 状态类型 | 转移概率阈值 | 持续时间要求 | 套利信号强度 |
|---|---|---|---|
| 正常态(Normal) | <5% | >5周期 | ±0.8σ以内 |
| 套利态(Arbit) | >15% | 1-3周期 | 突破±1.5σ |
| 波动态(Vol) | 5-15% | ≤5周期 | 持续震荡 |
3.3 套利条件判定规则
当且仅当满足以下全部条件时触发套利:
- Viterbi解码结果为"Arbit"状态
- 过去3个周期未出现连续套利信号
- 期权市场买卖价差小于当日均值的70%
- 汇率市场深度达到近20日分位数的第80位以上
python
def detect_arbitrage(hmm_model, market_data):
# 构造观测序列 (示例数据)
obs_seq = np.array([
[market_data['iv_deviation']],
[market_data['basis_ratio']],
[market_data['volume_ratio']],
[market_data['fx_deviation']]
]).reshape(-1, 1)
current_state = hmm_model.predict_state(obs_seq)
if current_state == "Arbit":
# 验证套利条件
if (not recent_signals.get('consecutive', False) and
market_data['spread'] < avg_spread * 0.7 and
market_data['depth'] > depth_quantile[0.8]):
return True
return False
4. Python实现完整流程
4.1 数据采集与预处理模块
python
import pandas as pd
from sklearn.preprocessing import StandardScaler
class DataProcessor:
def __init__(self, lookback_window=60):
self.scaler = StandardScaler()
self.lookback = lookback_window
def load_data(self, file_path):
"""加载多源异构数据"""
opt_df = pd.read_csv(f"{file_path}/options.csv")
fx_df = pd.read_csv(f"{file_path}/fx_futures.csv")
return self._align_timeseries(opt_df, fx_df)
def _align_timeseries(self, df1, df2):
"""时间轴对齐与缺失值处理"""
merged = pd.merge(df1, df2, on='timestamp', how='outer')
return merged.sort_values('timestamp').interpolate(method='linear')
def create_features(self, raw_data):
"""特征工程流水线"""
features = pd.DataFrame()
features['iv_deviation'] = (raw_data['implied_vol'] -
raw_data['hist_vol']) / raw_data['hist_vol']
features['basis_ratio'] = raw_data['spot_price'] / raw_data['future_price'] - 1
features['volume_ratio'] = raw_data['option_volume'] / raw_data['stock_volume']
features['fx_deviation'] = raw_data['spot_rate'] / raw_data['forward_rate'] - 1
# 滚动窗口统计量
for col in features.columns:
features[f'{col}_ma{self.lookback}'] = features[col].rolling(self.lookback).mean()
features[f'{col}_std{self.lookback}'] = features[col].rolling(self.lookback).std()
return self.scaler.fit_transform(features.dropna())
4.2 模型训练与验证模块
python
from sklearn.model_selection import TimeSeriesSplit
from statsmodels.stats.diagnostic import acorr_ljungbox
class ModelTrainer:
def __init__(self, n_states=3, max_iter=100):
self.hmm_model = MarketStateHMM(n_states)
self.max_iter = max_iter
self.best_bic = float('inf')
def grid_search(self, X_train, y_train):
"""网格搜索最优状态数"""
for n_states in range(2, 6):
model = MarketStateHMM(n_states)
if model.train(X_train, y_train):
bic = self._calculate_bic(model.model, X_train)
if bic < self.best_bic:
self.best_bic = bic
self.hmm_model = model
return self.hmm_model
def _calculate_bic(self, model, data):
"""贝叶斯信息准则评估"""
log_likelihood = model.score(data)[0]
n_params = model.n_components ** 2 + model.n_components * model.n_features
return -2 * log_likelihood + len(data) * np.log(n_params)
def cross_validate(self, X, y):
"""时间序列交叉验证"""
tscv = TimeSeriesSplit(n_splits=5)
scores = []
for train_idx, test_idx in tscv.split(X):
X_train, X_test = X[train_idx], X[test_idx]
if self.hmm_model.train(X_train, [len(X_train)]):
score = self.hmm_model.model.score(X_test)
scores.append(score)
return np.mean(scores), np.std(scores)
4.3 实盘交易执行模块
python
import threading
from queue import Queue
class ArbitrageExecutor:
def __init__(self, broker_api, hmm_model):
self.broker = broker_api
self.model = hmm_model
self.order_queue = Queue()
self.running = False
def start_trading(self, data_feed):
"""启动实时交易循环"""
self.running = True
processing_thread = threading.Thread(target=self._process_stream, args=(data_feed,))
processing_thread.start()
def _process_stream(self, data_feed):
"""流式数据处理与交易决策"""
while self.running:
new_tick = next(data_feed)
feature_vec = self._prepare_features(new_tick)
if self.model.detect_arbitrage(feature_vec):
order = self._generate_order(new_tick)
self.order_queue.put(order)
self._execute_order(order)
def _generate_order(self, tick_data):
"""生成套利订单组合"""
leg1 = {
'symbol': tick_data['index_option'],
'side': 'BUY' if tick_data['direction'] == 'up' else 'SELL',
'quantity': int(tick_data['position_size']),
'type': 'LIMIT',
'price': tick_data['strike_price'] * 0.995 # 设置安全边际
}
leg2 = {
'symbol': tick_data['currency_pair'],
'side': 'SELL' if tick_data['direction'] == 'up' else 'BUY',
'quantity': int(tick_data['position_size'] * tick_data['exchange_rate']),
'type': 'MARKET',
'price': tick_data['current_rate']
}
return {'legs': [leg1, leg2], 'tif': 'IOC'} # 立即或取消指令
def stop_trading(self):
"""停止交易线程"""
self.running = False
self.order_queue.join()
HMM能够有效刻画金融市场中的非线性状态转换特征,其在指数期权双币种套利中的应用显著优于传统阈值方法。重点关注状态转移矩阵的稳定性以及观测概率分布的变化,保留最核心的3个状态和4个观测变量,既能保证策略有效性,又可降低运维复杂度。