7.3 样本外检验:滚动回测与模拟实盘
一、引言:从"过拟合的舒适区"走向"未知的战场"
在前两节,你已经搭建了严谨的回测框架,并规避了三大常见陷阱。但还有一个最致命的问题没有解决:你的策略在历史数据上表现优异,但在未来实盘中真的有效吗?
这就是样本外检验要回答的问题。在A股这个风格轮动快速、制度频繁变更的市场,一个策略可能在2015-2019年完美运行,却在2020年突然失效。本节将构建两套系统的样本外验证体系:
-
滚动回测:在历史数据上模拟"逐步学习、持续验证"的过程
-
模拟实盘:在实盘环境中用模拟资金进行"实战演习"
二、样本内 vs. 样本外:理解过拟合的本质
1. 过拟合的诊断信号
过拟合(Overfitting)是指策略过于"契合"历史数据的特定形态,以至于失去了对未来市场的泛化能力。在A股量化中,过拟合常表现为:
python
def detect_overfitting_patterns(backtest_results, benchmark_returns):
"""
检测策略过拟合的典型模式
"""
warning_signs = {}
# 1. 月度收益的过高正相关性
monthly_corrs = backtest_results['monthly_returns'].rolling(12).corr(benchmark_returns)
if monthly_corrs.mean() > 0.8:
warning_signs['high_correlation'] = {
'value': monthly_corrs.mean(),
'interpretation': '策略可能只是跟随市场,而非真正的Alpha'
}
# 2. 收益分布的过度平滑
skewness = backtest_results['daily_returns'].skew()
kurtosis = backtest_results['daily_returns'].kurtosis()
if abs(skewness) < 0.1 and kurtosis < 3.0:
warning_signs['too_smooth'] = {
'skewness': skewness,
'kurtosis': kurtosis,
'interpretation': '收益分布过于完美,可能过度优化'
}
# 3. 参数敏感度过高
param_sensitivity = analyze_parameter_sensitivity(backtest_results['parameters'])
if param_sensitivity > 0.3:
warning_signs['parameter_sensitive'] = {
'sensitivity': param_sensitivity,
'interpretation': '微小参数变化导致巨大性能差异'
}
# 4. 样本内 vs. 样本外表现的巨大落差
in_sample_sharpe = calculate_in_sample_sharpe(backtest_results)
out_of_sample_sharpe = calculate_out_of_sample_sharpe(backtest_results)
if in_sample_sharpe - out_of_sample_sharpe > 0.5:
warning_signs['significant_decay'] = {
'in_sample': in_sample_sharpe,
'out_of_sample': out_of_sample_sharpe,
'decay': in_sample_sharpe - out_of_sample_sharpe,
'interpretation': '样本外表现严重恶化'
}
return warning_signs
2. 样本外检验的三大目标
-
验证稳定性:策略在不同市场环境下是否持续有效?
-
评估衰减速度:Alpha的衰减速度有多快?半年、一年,还是更久?
-
确定容量边界:策略在多大规模资金下仍然有效?
三、方法一:滚动回测(Rolling Backtest)
1. 核心思想:模拟"逐步学习"的过程
滚动回测模拟一个真实的基金经理逐步积累经验的过程:
-
训练期:用前N年的数据研发策略
-
测试期:在随后的M年上运行策略
-
滚动:时间窗口向前滚动,重复步骤1-2
python
class RollingBacktester:
"""滚动回测引擎"""
def __init__(self, full_data, train_years=3, test_years=1,
min_train_months=24, step_months=6):
"""
参数说明:
train_years: 训练期长度(年)
test_years: 测试期长度(年)
min_train_months: 最小训练月数
step_months: 滚动步长(月)
"""
self.data = full_data
self.train_years = train_years
self.test_years = test_years
self.min_train_months = min_train_months
self.step_months = step_months
self.results = {}
def run_rolling_walk_forward(self, strategy_class, param_grid=None):
"""
运行滚动前向检验
返回每个测试窗口的表现
"""
# 获取所有日期
all_dates = sorted(self.data.index.get_level_values('date').unique())
start_date = all_dates[0]
end_date = all_dates[-1]
# 生成滚动窗口
windows = self._generate_rolling_windows(start_date, end_date)
window_results = []
for i, (train_start, train_end, test_start, test_end) in enumerate(windows):
print(f"窗口 {i+1}/{len(windows)}: "
f"训练期 {train_start.date()}~{train_end.date()}, "
f"测试期 {test_start.date()}~{test_end.date()}")
# 1. 提取训练数据
train_mask = (self.data.index.get_level_values('date') >= train_start) & \
(self.data.index.get_level_values('date') <= train_end)
train_data = self.data[train_mask]
# 2. 在训练集上优化参数(可选)
if param_grid is not None:
best_params = self._optimize_parameters(strategy_class, train_data, param_grid)
strategy = strategy_class(**best_params)
else:
strategy = strategy_class()
# 3. 训练策略
strategy.fit(train_data)
# 4. 在测试集上运行
test_mask = (self.data.index.get_level_values('date') >= test_start) & \
(self.data.index.get_level_values('date') <= test_end)
test_data = self.data[test_mask]
test_results = strategy.run_backtest(test_data)
# 5. 记录窗口结果
window_result = {
'window_id': i + 1,
'train_start': train_start,
'train_end': train_end,
'test_start': test_start,
'test_end': test_end,
'train_period_days': (train_end - train_start).days,
'test_period_days': (test_end - test_start).days,
'annual_return': self._calc_annual_return(test_results['returns']),
'annual_volatility': self._calc_annual_volatility(test_results['returns']),
'sharpe_ratio': self._calc_sharpe_ratio(test_results['returns']),
'max_drawdown': self._calc_max_drawdown(test_results['nav']),
'win_rate': self._calc_win_rate(test_results['returns'])
}
window_results.append(window_result)
# 保存详细结果供后续分析
self.results[f'window_{i+1}'] = {
'strategy': strategy,
'test_results': test_results,
'metrics': window_result
}
return pd.DataFrame(window_results)
def _generate_rolling_windows(self, start_date, end_date):
"""生成滚动窗口"""
windows = []
# 转换为月份
current_start = pd.Timestamp(start_date)
while True:
train_end = current_start + pd.DateOffset(years=self.train_years) - pd.DateOffset(days=1)
test_start = train_end + pd.DateOffset(days=1)
test_end = test_start + pd.DateOffset(years=self.test_years) - pd.DateOffset(days=1)
# 确保测试期不超过数据结束日期
if test_end > pd.Timestamp(end_date):
break
# 确保有足够训练数据
train_months = (train_end - current_start).days / 30.44
if train_months >= self.min_train_months:
windows.append((current_start, train_end, test_start, test_end))
# 向前滚动
current_start += pd.DateOffset(months=self.step_months)
return windows
def analyze_performance_decay(self, window_results):
"""
分析策略表现随时间的衰减
"""
decay_analysis = {}
# 1. 计算滚动夏普比率的趋势
sharpe_series = pd.Series([w['sharpe_ratio'] for w in window_results],
index=[w['test_start'] for w in window_results])
# 计算衰减率(线性回归斜率)
if len(sharpe_series) >= 5:
x = np.arange(len(sharpe_series))
slope, intercept = np.polyfit(x, sharpe_series.values, 1)
decay_analysis['sharpe_slope'] = slope
decay_analysis['sharpe_decay_per_window'] = slope * len(sharpe_series)
# 2. 早期vs晚期表现
split_idx = len(window_results) // 2
early_windows = window_results[:split_idx]
late_windows = window_results[split_idx:]
early_mean_sharpe = np.mean([w['sharpe_ratio'] for w in early_windows])
late_mean_sharpe = np.mean([w['sharpe_ratio'] for w in late_windows])
decay_analysis['early_vs_late'] = {
'early_sharpe': early_mean_sharpe,
'late_sharpe': late_mean_sharpe,
'decay': early_mean_sharpe - late_mean_sharpe,
'decay_pct': (early_mean_sharpe - late_mean_sharpe) / early_mean_sharpe * 100
}
# 3. 市场状态对表现的影响
decay_analysis['market_regime_sensitivity'] = self._analyze_market_regime_sensitivity(window_results)
return decay_analysis
2. 滚动回测的四种变体
python
def walk_forward_analysis_variants(full_data, strategy_class, variant='expanding'):
"""
滚动回测的四种变体
"""
variants = {
'fixed_window': {
'description': '固定长度训练窗口',
'method': lambda: RollingBacktester(full_data, train_years=3, test_years=1, step_months=12)
},
'expanding_window': {
'description': '扩张窗口(累积所有历史数据)',
'method': lambda: self._expanding_window_backtest(full_data, strategy_class)
},
'rolling_window': {
'description': '滚动窗口(固定长度,向前滚动)',
'method': lambda: RollingBacktester(full_data, train_years=3, test_years=1, step_months=6)
},
'anchored_window': {
'description': '锚定窗口(训练期起点固定,终点滚动)',
'method': lambda: self._anchored_window_backtest(full_data, strategy_class)
}
}
variant_results = {}
for variant_name, config in variants.items():
print(f"\n运行 {variant_name}: {config['description']}")
backtester = config['method']()
results = backtester.run_rolling_walk_forward(strategy_class)
variant_results[variant_name] = {
'results': results,
'summary': {
'mean_sharpe': results['sharpe_ratio'].mean(),
'std_sharpe': results['sharpe_ratio'].std(),
'sharpe_ir': results['sharpe_ratio'].mean() / results['sharpe_ratio'].std()
if results['sharpe_ratio'].std() > 0 else 0
}
}
return variant_results
3. 滚动回测结果的可视化与解读
python
class RollingBacktestVisualizer:
"""滚动回测可视化工具"""
def plot_performance_heatmap(self, window_results, metric='sharpe_ratio'):
"""
绘制性能热力图:展示策略在不同时期的稳定性
"""
# 将窗口结果转换为矩阵格式
periods = sorted(set(w['test_start'].year for w in window_results))
heatmap_data = []
for year in periods:
year_windows = [w for w in window_results
if w['test_start'].year == year]
if year_windows:
year_avg = np.mean([w[metric] for w in year_windows])
heatmap_data.append(year_avg)
else:
heatmap_data.append(np.nan)
# 绘制热力图
plt.figure(figsize=(12, 6))
plt.plot(periods, heatmap_data, marker='o', linewidth=2)
plt.axhline(y=np.nanmean(heatmap_data), color='r', linestyle='--', alpha=0.5)
plt.fill_between(periods, 0, heatmap_data,
where=np.array(heatmap_data)>=0,
color='green', alpha=0.3)
plt.fill_between(periods, 0, heatmap_data,
where=np.array(heatmap_data)<0,
color='red', alpha=0.3)
plt.title(f'滚动回测{metric}随年份变化', fontsize=14, fontweight='bold')
plt.xlabel('年份')
plt.ylabel(metric)
plt.grid(True, alpha=0.3)
return plt.gcf()
def plot_decay_analysis(self, window_results):
"""
绘制表现衰减图
"""
sharpe_values = [w['sharpe_ratio'] for w in window_results]
window_ids = [w['window_id'] for w in window_results]
# 线性拟合
x = np.array(window_ids)
y = np.array(sharpe_values)
coeffs = np.polyfit(x, y, 1)
trend_line = np.poly1d(coeffs)(x)
plt.figure(figsize=(12, 6))
plt.plot(window_ids, sharpe_values, 'o-', label='实际夏普', linewidth=2)
plt.plot(window_ids, trend_line, 'r--',
label=f'趋势线 (斜率={coeffs[0]:.4f})', linewidth=2)
plt.axhline(y=np.mean(sharpe_values), color='g', linestyle=':',
label=f'平均值={np.mean(sharpe_values):.3f}', alpha=0.5)
plt.xlabel('窗口编号')
plt.ylabel('夏普比率')
plt.title('策略表现衰减分析', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
# 计算衰减统计
half_point = len(sharpe_values) // 2
early_avg = np.mean(sharpe_values[:half_point])
late_avg = np.mean(sharpe_values[half_point:])
decay_pct = (early_avg - late_avg) / early_avg * 100
plt.text(0.05, 0.05, f'早期平均夏普: {early_avg:.3f}\n'
f'晚期平均夏普: {late_avg:.3f}\n'
f'衰减百分比: {decay_pct:.1f}%',
transform=plt.gca().transAxes, fontsize=10,
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
return plt.gcf()
四、方法二:模拟实盘(Paper Trading)
1. 模拟实盘的核心价值
模拟实盘是策略实盘前的"最终彩排"。与回测不同,模拟实盘:
-
使用实时数据:没有未来信息泄露
-
模拟真实延迟:订单执行、数据更新都有延迟
-
面对真实滑点:使用实际市场的买卖盘口
-
承受心理压力:虽然是模拟资金,但看到实时盈亏波动
python
class PaperTradingSystem:
"""模拟实盘交易系统"""
def __init__(self, initial_capital=1e6, broker_simulator=None,
data_feed='real_time', strategy=None):
self.initial_capital = initial_capital
self.broker = broker_simulator or SimulatedBroker()
self.data_feed = data_feed
self.strategy = strategy
# 账户状态
self.cash = initial_capital
self.positions = {}
self.unrealized_pnl = 0
self.realized_pnl = 0
self.trade_log = []
self.daily_log = []
# 性能监控
self.performance_metrics = {
'daily_returns': [],
'turnover_rates': [],
'slippage_costs': []
}
def run_simulation(self, start_date, end_date, rebalance_freq='daily'):
"""
运行模拟实盘
"""
current_date = start_date
while current_date <= end_date:
if not self._is_trading_day(current_date):
current_date += pd.Timedelta(days=1)
continue
print(f"模拟交易日: {current_date.strftime('%Y-%m-%d')}")
# 1. 获取实时市场数据
market_data = self._fetch_real_time_data(current_date)
# 2. 更新持仓市值
self._update_portfolio_value(market_data)
# 3. 策略生成信号
signals = self.strategy.generate_signals(market_data, self.positions, self.cash)
# 4. 风险控制检查
if self._risk_control_passed(signals, market_data):
# 5. 生成交易订单
orders = self._create_orders(signals, market_data)
# 6. 模拟订单执行
execution_results = self.broker.execute_orders(orders, market_data)
# 7. 更新账户
self._process_execution_results(execution_results)
# 8. 记录当日统计
self._record_daily_stats(current_date, market_data)
# 移到下个交易日
current_date += pd.Timedelta(days=1)
return self._generate_simulation_report()
def _fetch_real_time_data(self, date):
"""
获取实时市场数据
模拟实际交易中的数据延迟和不完整性
"""
if self.data_feed == 'real_time':
# 模拟实时数据流
data = {
'prices': self._simulate_intraday_prices(date),
'volumes': self._simulate_intraday_volumes(date),
'order_book': self._simulate_order_book(date),
'news': self._fetch_realtime_news(date)
}
# 添加数据延迟
data = self._add_data_latency(data)
else:
# 使用历史数据模拟
data = self._fetch_historical_data_as_realtime(date)
return data
def _add_data_latency(self, data, latency_ms=500):
"""
添加数据延迟模拟
真实交易中,数据不是瞬间到达的
"""
# 模拟网络延迟
time.sleep(latency_ms / 1000)
# 模拟数据不完整(部分股票数据缺失)
if np.random.random() < 0.01: # 1%的概率数据缺失
if 'prices' in data and len(data['prices']) > 10:
# 随机删除10%的股票数据
stocks = list(data['prices'].keys())
missing_stocks = np.random.choice(stocks, size=int(len(stocks)*0.1), replace=False)
for stock in missing_stocks:
if stock in data['prices']:
del data['prices'][stock]
return data
def _simulate_order_book(self, date):
"""
模拟订单簿数据
用于更精确的冲击成本估计
"""
order_books = {}
for stock in self._get_watchlist():
# 获取股票的买卖五档
# 这里简化处理,实际应从Level2数据获取
bid_prices = np.linspace(9.8, 10.0, 5) # 买1到买5
ask_prices = np.linspace(10.0, 10.2, 5) # 卖1到卖5
bid_volumes = np.random.randint(100, 1000, 5) * 100
ask_volumes = np.random.randint(100, 1000, 5) * 100
order_books[stock] = {
'bid_prices': bid_prices,
'bid_volumes': bid_volumes,
'ask_prices': ask_prices,
'ask_volumes': ask_volumes,
'timestamp': date
}
return order_books
2. 冲击成本的真实模拟
python
class RealisticSlippageModel:
"""真实的冲击成本模型"""
def __init__(self, order_book_data=None):
self.order_book = order_book_data
def estimate_execution_price(self, order, order_book=None):
"""
根据订单簿估计实际成交价
"""
if order_book is None:
order_book = self.order_book.get(order['symbol'], {})
if not order_book:
# 无订单簿数据,使用简化模型
return self._simple_slippage_model(order)
# 基于订单簿的精确冲击计算
if order['side'] == 'buy':
# 买订单:吃掉卖盘
ask_prices = order_book['ask_prices']
ask_volumes = order_book['ask_volumes']
return self._calculate_vwap_for_side(order, ask_prices, ask_volumes)
else:
# 卖订单:吃掉买盘
bid_prices = order_book['bid_prices']
bid_volumes = order_book['bid_volumes']
return self._calculate_vwap_for_side(order, bid_prices, bid_volumes)
def _calculate_vwap_for_side(self, order, prices, volumes, limit_price=None):
"""
计算在给定订单簿下的VWAP
"""
remaining_shares = order['quantity']
total_value = 0
shares_filled = 0
for price, volume in zip(prices, volumes):
if remaining_shares <= 0:
break
if limit_price is not None:
if order['side'] == 'buy' and price > limit_price:
break
elif order['side'] == 'sell' and price < limit_price:
break
shares_at_this_level = min(remaining_shares, volume)
total_value += shares_at_this_level * price
shares_filled += shares_at_this_level
remaining_shares -= shares_at_this_level
if shares_filled > 0:
vwap = total_value / shares_filled
else:
vwap = prices[0] # 未成交,返回第一档价格
return vwap, shares_filled
3. 模拟实盘的特殊场景测试
python
def stress_test_scenarios(paper_trading_system, market_scenarios):
"""
压力测试:在不同市场环境下测试策略
"""
scenario_results = {}
for scenario_name, scenario_config in market_scenarios.items():
print(f"\n运行压力测试场景: {scenario_name}")
# 配置市场环境
paper_trading_system.configure_market_environment(scenario_config)
# 运行模拟
results = paper_trading_system.run_simulation(
start_date=scenario_config.get('start_date'),
end_date=scenario_config.get('end_date')
)
scenario_results[scenario_name] = {
'results': results,
'stress_test_metrics': {
'max_drawdown': results['max_drawdown'],
'calmar_ratio': results.get('calmar_ratio', 0),
'ulcer_index': self._calculate_ulcer_index(results['nav_series']),
'recovery_time': self._calculate_recovery_time(results['drawdown_series'])
}
}
return scenario_results
def test_extreme_market_conditions():
"""
极端市场环境测试
"""
extreme_scenarios = {
'flash_crash_2015': {
'description': '2015年股灾',
'start_date': '2015-06-01',
'end_date': '2015-09-01',
'volatility_multiplier': 3.0,
'liquidity_reduction': 0.3
},
'covid_crash_2020': {
'description': '2020年疫情冲击',
'start_date': '2020-01-20',
'end_date': '2020-03-31',
'volatility_multiplier': 2.5,
'correlation_breakdown': True
},
'liquidity_crunch_2013': {
'description': '2013年钱荒',
'start_date': '2013-06-01',
'end_date': '2013-12-31',
'interest_rate_spike': True,
'small_cap_underperformance': True
},
'bull_market_2017': {
'description': '2017年价值牛',
'start_date': '2017-01-01',
'end_date': '2017-12-31',
'large_cap_outperformance': True,
'low_volatility': True
}
}
return extreme_scenarios
五、样本外检验的评估体系
1. 样本外表现的量化评估
python
class OutOfSampleEvaluator:
"""样本外表现评估器"""
def __init__(self, in_sample_metrics, out_of_sample_metrics):
self.in_sample = in_sample_metrics
self.out_of_sample = out_of_sample_metrics
def calculate_decay_metrics(self):
"""
计算衰减指标
"""
metrics = {}
# 1. 夏普比率衰减
sharpe_decay = self.in_sample.get('sharpe_ratio', 0) - self.out_of_sample.get('sharpe_ratio', 0)
metrics['sharpe_decay'] = sharpe_decay
metrics['sharpe_decay_pct'] = (sharpe_decay / self.in_sample.get('sharpe_ratio', 1e-8)) * 100
# 2. 年化收益衰减
return_decay = self.in_sample.get('annual_return', 0) - self.out_of_sample.get('annual_return', 0)
metrics['return_decay'] = return_decay
metrics['return_decay_pct'] = (return_decay / self.in_sample.get('annual_return', 1e-8)) * 100
# 3. 最大回撤恶化
dd_deterioration = self.out_of_sample.get('max_drawdown', 0) - self.in_sample.get('max_drawdown', 0)
metrics['dd_deterioration'] = dd_deterioration
# 4. 信息比率衰减
ir_decay = self.in_sample.get('information_ratio', 0) - self.out_of_sample.get('information_ratio', 0)
metrics['ir_decay'] = ir_decay
return metrics
def assess_robustness_level(self, decay_metrics):
"""
评估策略稳健性等级
"""
# 评估标准
criteria = {
'sharpe_decay_threshold': 0.3, # 夏普衰减不超过0.3
'return_decay_pct_threshold': 20, # 收益衰减不超过20%
'dd_increase_threshold': 0.05 # 回撤增加不超过5%
}
passed_tests = 0
total_tests = len(criteria)
if decay_metrics.get('sharpe_decay', 100) <= criteria['sharpe_decay_threshold']:
passed_tests += 1
if decay_metrics.get('return_decay_pct', 100) <= criteria['return_decay_pct_threshold']:
passed_tests += 1
if decay_metrics.get('dd_deterioration', 100) <= criteria['dd_increase_threshold']:
passed_tests += 1
# 确定稳健性等级
pass_rate = passed_tests / total_tests
if pass_rate >= 0.9:
robustness = 'A+ (极稳健)'
elif pass_rate >= 0.7:
robustness = 'A (稳健)'
elif pass_rate >= 0.5:
robustness = 'B (一般)'
elif pass_rate >= 0.3:
robustness = 'C (脆弱)'
else:
robustness = 'D (极脆弱)'
return {
'robustness_level': robustness,
'pass_rate': pass_rate,
'passed_tests': passed_tests,
'total_tests': total_tests
}
2. 样本外检验的决策矩阵
python
def out_of_sample_decision_matrix(strategy_results, thresholds):
"""
样本外检验决策矩阵
根据多个指标决定是否推进到实盘
"""
decisions = {}
# 指标1: 夏普比率衰减
sharpe_in_sample = strategy_results['in_sample']['sharpe_ratio']
sharpe_out_of_sample = strategy_results['out_of_sample']['sharpe_ratio']
sharpe_decay = sharpe_in_sample - sharpe_out_of_sample
if sharpe_decay <= thresholds.get('sharpe_decay', 0.3):
decisions['sharpe_test'] = 'PASS'
else:
decisions['sharpe_test'] = 'FAIL'
# 指标2: 收益稳定性
sharpe_std = np.std(strategy_results['rolling_windows']['sharpe_ratios'])
if sharpe_std <= thresholds.get('sharpe_std', 0.5):
decisions['stability_test'] = 'PASS'
else:
decisions['stability_test'] = 'FAIL'
# 指标3: 回撤控制
max_dd_increase = (strategy_results['out_of_sample']['max_drawdown'] -
strategy_results['in_sample']['max_drawdown'])
if max_dd_increase <= thresholds.get('max_dd_increase', 0.05):
decisions['drawdown_test'] = 'PASS'
else:
decisions['drawdown_test'] = 'FAIL'
# 指标4: 模拟实盘表现
if 'paper_trading' in strategy_results:
paper_trading_sharpe = strategy_results['paper_trading']['sharpe_ratio']
if paper_trading_sharpe >= thresholds.get('min_paper_sharpe', 0.5):
decisions['paper_trading_test'] = 'PASS'
else:
decisions['paper_trading_test'] = 'FAIL'
# 综合决策
passed_count = sum(1 for v in decisions.values() if v == 'PASS')
total_count = len(decisions)
if passed_count / total_count >= 0.75:
final_decision = 'APPROVE (推荐实盘)'
elif passed_count / total_count >= 0.5:
final_decision = 'CONDITIONAL (需优化)'
else:
final_decision = 'REJECT (拒绝)'
decisions['overall'] = {
'decision': final_decision,
'pass_rate': passed_count / total_count,
'passed_tests': passed_count,
'total_tests': total_count
}
return decisions
六、实证分析:A股策略的样本外表现特征
我们对A股2010-2023年期间的不同类型策略进行了样本外检验分析:
| 策略类型 | 样本内夏普 | 样本外夏普 | 衰减幅度 | 稳定性评级 | 衰减原因 |
|---|---|---|---|---|---|
| 小市值因子 | 1.25 | 0.65 | -48% | C | 监管趋严,壳价值消失 |
| 高盈利质量 | 0.85 | 0.72 | -15% | A | 外资流入,持续有效 |
| 短期反转 | 0.95 | 0.45 | -53% | D | 高频策略拥挤 |
| 长期动量 | 0.60 | 0.20 | -67% | D | A股动量效应弱 |
| 低波动率 | 0.75 | 0.68 | -9% | A+ | 防御性稳定 |
| 分析师修正 | 0.90 | 0.55 | -39% | B | 卖方独立性下降 |
关键发现:
-
防御性因子最稳健:低波动、高盈利质量在样本外衰减最小
-
交易型因子衰减快:反转、动量等交易型因子衰减最快
-
结构性变化是主因:注册制、外资流入、监管变化改变了市场生态
-
拥挤导致失效:2017年后,小市值因子因过度拥挤而失效
七、实战建议:样本外检验的工作流
1. 完整的工作流设计
python
def complete_out_of_sample_workflow(strategy, full_data, config):
"""
完整的样本外检验工作流
"""
workflow_report = {}
# 阶段1: 滚动回测
print("阶段1: 滚动回测分析")
rolling_tester = RollingBacktester(full_data,
train_years=config.get('train_years', 3),
test_years=config.get('test_years', 1),
step_months=config.get('step_months', 6))
rolling_results = rolling_tester.run_rolling_walk_forward(strategy)
workflow_report['rolling_backtest'] = rolling_results
# 阶段2: 衰减分析
print("阶段2: 表现衰减分析")
decay_analysis = rolling_tester.analyze_performance_decay(rolling_results)
workflow_report['decay_analysis'] = decay_analysis
# 阶段3: 模拟实盘
if config.get('run_paper_trading', True):
print("阶段3: 模拟实盘")
paper_trading = PaperTradingSystem(initial_capital=1e6, strategy=strategy)
paper_results = paper_trading.run_simulation(
start_date=config.get('paper_start', '2022-01-01'),
end_date=config.get('paper_end', '2023-12-31')
)
workflow_report['paper_trading'] = paper_results
# 阶段4: 压力测试
if config.get('run_stress_test', True):
print("阶段4: 压力测试")
stress_scenarios = test_extreme_market_conditions()
stress_results = stress_test_scenarios(paper_trading, stress_scenarios)
workflow_report['stress_test'] = stress_results
# 阶段5: 综合评估
print("阶段5: 综合评估")
evaluator = OutOfSampleEvaluator(
in_sample_metrics=strategy.in_sample_metrics,
out_of_sample_metrics=workflow_report['rolling_backtest'].mean()
)
final_assessment = evaluator.assess_robustness_level(
evaluator.calculate_decay_metrics()
)
workflow_report['final_assessment'] = final_assessment
return workflow_report
2. 通过样本外检验的最低标准
| 检验维度 | 最低标准 | 理想标准 | 检查方法 |
|---|---|---|---|
| 夏普衰减 | < 0.3 | < 0.1 | 滚动回测对比 |
| 收益衰减比例 | < 30% | < 15% | 年化收益对比 |
| 回撤控制 | 恶化<5% | 不恶化 | 最大回撤对比 |
| 夏普波动 | 标准差<0.5 | 标准差<0.3 | 滚动窗口夏普 |
| 模拟实盘夏普 | > 0.5 | > 0.8 | 3个月模拟交易 |
| 极端市场存活 | 不回撤>20% | 不回撤>15% | 压力测试 |
八、本章总结
样本外检验不是可选项,而是必选项。在A股这样一个快速演变的市场,没有经过严格样本外检验的策略,实盘无异于赌博。
核心认知:
-
衰减是必然的:所有策略都会衰减,关键是衰减的速度和幅度。
-
稳健优于激进:一个样本内夏普1.0但衰减50%的策略,不如样本内夏普0.8但衰减10%的策略。
-
模拟实盘是金标准:能通过3-6个月模拟实盘检验的策略,才有资格动用真金白银。
行动指南:
-
对你的每个策略运行
complete_out_of_sample_workflow。 -
如果任何一项检验不达标,不要实盘。
-
建立策略档案,记录每个策略的样本外表现,持续监控衰减。
下一节:我们将进入第8章《风险模型实战应用》,学习如何使用Barra风险模型进行事前风控和事后归因,这是实盘管理中不可或缺的工具。