核心原理图
┌─────────────────────────────────────────────────────────────────────┐
│ Main Thread RunLoop │
├─────────────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │ Entry │───▶│ Timers │───▶│ Sources │───▶│ Observer │ │
│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │
│ │ │ │
│ │ ┌─────────────────────────────────────┘ │
│ │ ▼ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │ │BeforeWait│───▶│ Waiting │───▶│AfterWait │ │
│ │ └──────────┘ └──────────┘ └──────────┘ │
│ │ │ │
│ └─────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────┘
│
│ 监控
▼
┌─────────────────────────────────────────────────────────────────────┐
│ Monitor Thread │
├─────────────────────────────────────────────────────────────────────┤
│ │
│ ┌────────────────┐ ┌────────────────┐ ┌────────────────┐ │
│ │ 等待信号量 │────▶│ 检查状态变化 │────▶│ 超时?采样堆栈 │ │
│ │ (带超时) │ │ │ │ │ │
│ └────────────────┘ └────────────────┘ └────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────┘
完整实现代码
1. 头文件定义
objc
// LagMonitor.h
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
/// 卡顿级别
typedef NS_ENUM(NSUInteger, LagLevel) {
LagLevelNormal = 0, // 正常
LagLevelMild = 1, // 轻微卡顿 (50-100ms)
LagLevelModerate = 2, // 中度卡顿 (100-250ms)
LagLevelSevere = 3, // 严重卡顿 (250-500ms)
LagLevelFatal = 4 // 致命卡顿 (>500ms)
};
/// 卡顿信息模型
@interface LagInfo : NSObject
@property (nonatomic, assign) LagLevel level;
@property (nonatomic, assign) NSTimeInterval duration; // 卡顿时长(ms)
@property (nonatomic, copy) NSArray<NSString *> *callStack; // 调用堆栈
@property (nonatomic, copy) NSString *symbolStack; // 符号化堆栈
@property (nonatomic, strong) NSDate *timestamp; // 发生时间
@property (nonatomic, copy) NSDictionary *context; // 上下文信息
@end
/// 卡顿监控回调
typedef void(^LagMonitorCallback)(LagInfo *lagInfo);
/// 卡顿监控器
@interface LagMonitor : NSObject
@property (nonatomic, class, readonly) LagMonitor *shared;
/// 卡顿阈值(毫秒),默认 100ms
@property (nonatomic, assign) NSTimeInterval threshold;
/// 采样间隔(毫秒),默认 50ms
@property (nonatomic, assign) NSTimeInterval sampleInterval;
/// 是否正在监控
@property (nonatomic, assign, readonly) BOOL isMonitoring;
/// 开始监控
- (void)startWithCallback:(LagMonitorCallback)callback;
/// 停止监控
- (void)stop;
/// 手动触发堆栈采样(用于调试)
- (NSArray<NSString *> *)captureMainThreadStack;
@end
NS_ASSUME_NONNULL_END
2. 核心实现
objc
// LagMonitor.m
#import "LagMonitor.h"
#import <mach/mach.h>
#import <pthread.h>
#import <execinfo.h>
#import <dlfcn.h>
// 最大堆栈深度
static const int kMaxStackDepth = 128;
// RunLoop 活动状态
typedef struct {
CFRunLoopActivity activity;
uint64_t timestamp;
} RunLoopState;
@implementation LagInfo
@end
@interface LagMonitor () {
CFRunLoopObserverRef _observer;
dispatch_semaphore_t _semaphore;
RunLoopState _currentState;
pthread_mutex_t _stateMutex;
thread_t _mainThread;
BOOL _isMonitoring;
}
@property (nonatomic, strong) dispatch_queue_t monitorQueue;
@property (nonatomic, copy) LagMonitorCallback callback;
@property (nonatomic, assign) NSInteger consecutiveTimeouts;
@end
@implementation LagMonitor
#pragma mark - Singleton
+ (LagMonitor *)shared {
static LagMonitor *instance;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
instance = [[LagMonitor alloc] init];
});
return instance;
}
- (instancetype)init {
if (self = [super init]) {
_threshold = 100; // 100ms
_sampleInterval = 50; // 50ms
_semaphore = dispatch_semaphore_create(0);
_monitorQueue = dispatch_queue_create("com.lagmonitor.queue", DISPATCH_QUEUE_SERIAL);
pthread_mutex_init(&_stateMutex, NULL);
// 获取主线程的 mach thread
_mainThread = mach_thread_self();
}
return self;
}
- (void)dealloc {
[self stop];
pthread_mutex_destroy(&_stateMutex);
}
#pragma mark - Public Methods
- (void)startWithCallback:(LagMonitorCallback)callback {
if (_isMonitoring) return;
_isMonitoring = YES;
_callback = callback;
_consecutiveTimeouts = 0;
// 在主线程添加 RunLoop Observer
dispatch_async(dispatch_get_main_queue(), ^{
[self setupRunLoopObserver];
});
// 启动监控线程
[self startMonitorThread];
}
- (void)stop {
if (!_isMonitoring) return;
_isMonitoring = NO;
// 移除 Observer
if (_observer) {
CFRunLoopRemoveObserver(CFRunLoopGetMain(), _observer, kCFRunLoopCommonModes);
CFRelease(_observer);
_observer = NULL;
}
// 发送信号让监控线程退出
dispatch_semaphore_signal(_semaphore);
}
- (NSArray<NSString *> *)captureMainThreadStack {
return [self captureStackForThread:_mainThread];
}
#pragma mark - RunLoop Observer
- (void)setupRunLoopObserver {
// 监控所有活动状态
CFRunLoopActivity activities = kCFRunLoopAllActivities;
__weak typeof(self) weakSelf = self;
_observer = CFRunLoopObserverCreateWithHandler(
kCFAllocatorDefault,
activities,
YES, // repeats
0, // order
^(CFRunLoopObserverRef observer, CFRunLoopActivity activity) {
[weakSelf runLoopActivityChanged:activity];
}
);
CFRunLoopAddObserver(CFRunLoopGetMain(), _observer, kCFRunLoopCommonModes);
}
- (void)runLoopActivityChanged:(CFRunLoopActivity)activity {
pthread_mutex_lock(&_stateMutex);
_currentState.activity = activity;
_currentState.timestamp = mach_absolute_time();
pthread_mutex_unlock(&_stateMutex);
// 状态变化,发送信号
dispatch_semaphore_signal(_semaphore);
}
#pragma mark - Monitor Thread
- (void)startMonitorThread {
dispatch_async(_monitorQueue, ^{
[self monitorLoop];
});
}
- (void)monitorLoop {
while (_isMonitoring) {
@autoreleasepool {
// 等待信号量,带超时
long timeout = dispatch_semaphore_wait(
_semaphore,
dispatch_time(DISPATCH_TIME_NOW, (int64_t)(_sampleInterval * NSEC_PER_MSEC))
);
if (!_isMonitoring) break;
if (timeout != 0) {
// 超时,检查是否卡顿
[self checkLag];
} else {
// 收到信号,状态正常变化
_consecutiveTimeouts = 0;
}
}
}
}
- (void)checkLag {
pthread_mutex_lock(&_stateMutex);
CFRunLoopActivity activity = _currentState.activity;
uint64_t stateTimestamp = _currentState.timestamp;
pthread_mutex_unlock(&_stateMutex);
// 只在 BeforeSources 和 AfterWaiting 状态检测卡顿
// 这两个状态表示正在处理事件
if (activity != kCFRunLoopBeforeSources &&
activity != kCFRunLoopAfterWaiting) {
return;
}
// 计算持续时间
uint64_t now = mach_absolute_time();
NSTimeInterval duration = [self machTimeToMs:now - stateTimestamp];
if (duration >= _threshold) {
_consecutiveTimeouts++;
// 采样堆栈
NSArray<NSString *> *stack = [self captureStackForThread:_mainThread];
// 确定卡顿级别
LagLevel level = [self lagLevelForDuration:duration];
// 创建卡顿信息
LagInfo *info = [[LagInfo alloc] init];
info.level = level;
info.duration = duration;
info.callStack = stack;
info.symbolStack = [stack componentsJoinedByString:@"\n"];
info.timestamp = [NSDate date];
info.context = @{
@"activity": [self activityName:activity],
@"consecutiveTimeouts": @(_consecutiveTimeouts)
};
// 回调通知
if (_callback) {
dispatch_async(dispatch_get_main_queue(), ^{
self.callback(info);
});
}
// 打印日志
NSLog(@"[LagMonitor] 检测到卡顿 - 级别:%@ 时长:%.2fms\n%@",
[self lagLevelName:level], duration, info.symbolStack);
}
}
#pragma mark - Stack Capture
- (NSArray<NSString *> *)captureStackForThread:(thread_t)thread {
NSMutableArray<NSString *> *result = [NSMutableArray array];
// 暂停目标线程
if (thread_suspend(thread) != KERN_SUCCESS) {
return result;
}
// 获取线程状态(CPU 寄存器)
_STRUCT_MCONTEXT machineContext;
mach_msg_type_number_t stateCount = THREAD_STATE_COUNT;
#if defined(__arm64__)
thread_state_flavor_t flavor = ARM_THREAD_STATE64;
stateCount = ARM_THREAD_STATE64_COUNT;
#elif defined(__x86_64__)
thread_state_flavor_t flavor = x86_THREAD_STATE64;
stateCount = x86_THREAD_STATE64_COUNT;
#else
#error "Unsupported architecture"
#endif
kern_return_t kr = thread_get_state(
thread,
flavor,
(thread_state_t)&machineContext.__ss,
&stateCount
);
if (kr == KERN_SUCCESS) {
// 使用回溯获取调用栈
uintptr_t backtraceBuffer[kMaxStackDepth];
int frameCount = [self backtraceFromContext:&machineContext
buffer:backtraceBuffer
maxDepth:kMaxStackDepth];
// 符号化
for (int i = 0; i < frameCount; i++) {
NSString *symbol = [self symbolForAddress:backtraceBuffer[i]];
[result addObject:symbol];
}
}
// 恢复线程
thread_resume(thread);
return result;
}
// 从寄存器上下文回溯调用栈
- (int)backtraceFromContext:(_STRUCT_MCONTEXT *)context
buffer:(uintptr_t *)buffer
maxDepth:(int)maxDepth {
int count = 0;
#if defined(__arm64__)
// ARM64 架构
uintptr_t pc = context->__ss.__pc;
uintptr_t lr = context->__ss.__lr;
uintptr_t fp = context->__ss.__fp;
// 第一个地址是 PC (Program Counter)
if (pc && count < maxDepth) {
buffer[count++] = pc;
}
// 第二个地址是 LR (Link Register)
if (lr && count < maxDepth) {
buffer[count++] = lr;
}
// 遍历栈帧
uintptr_t *frame = (uintptr_t *)fp;
while (frame && count < maxDepth) {
// 检查地址有效性
if (![self isValidAddress:(uintptr_t)frame]) break;
uintptr_t savedFP = frame[0];
uintptr_t savedLR = frame[1];
if (savedLR && count < maxDepth) {
buffer[count++] = savedLR;
}
if (savedFP <= (uintptr_t)frame) break;
frame = (uintptr_t *)savedFP;
}
#elif defined(__x86_64__)
// x86_64 架构
uintptr_t rip = context->__ss.__rip;
uintptr_t rbp = context->__ss.__rbp;
if (rip && count < maxDepth) {
buffer[count++] = rip;
}
uintptr_t *frame = (uintptr_t *)rbp;
while (frame && count < maxDepth) {
if (![self isValidAddress:(uintptr_t)frame]) break;
uintptr_t savedRBP = frame[0];
uintptr_t savedRIP = frame[1];
if (savedRIP && count < maxDepth) {
buffer[count++] = savedRIP;
}
if (savedRBP <= (uintptr_t)frame) break;
frame = (uintptr_t *)savedRBP;
}
#endif
return count;
}
// 检查地址是否有效
- (BOOL)isValidAddress:(uintptr_t)address {
if (address == 0) return NO;
// 简单检查:地址应该在合理范围内
// 实际项目中可能需要更精确的检查
#if defined(__arm64__)
return (address > 0x100000000ULL && address < 0x800000000000ULL);
#else
return (address > 0x1000 && address < 0x7FFFFFFFFFFFULL);
#endif
}
// 符号化地址
- (NSString *)symbolForAddress:(uintptr_t)address {
Dl_info info;
if (dladdr((void *)address, &info)) {
NSString *symbolName = info.dli_sname ?
[NSString stringWithUTF8String:info.dli_sname] : @"???";
NSString *imageName = info.dli_fname ?
[[NSString stringWithUTF8String:info.dli_fname] lastPathComponent] : @"???";
uintptr_t offset = address - (uintptr_t)info.dli_saddr;
return [NSString stringWithFormat:@"%@ %@ + %lu",
imageName, symbolName, (unsigned long)offset];
}
return [NSString stringWithFormat:@"0x%lx", (unsigned long)address];
}
#pragma mark - Utilities
- (NSTimeInterval)machTimeToMs:(uint64_t)machTime {
static mach_timebase_info_data_t timebase;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
mach_timebase_info(&timebase);
});
return (double)machTime * timebase.numer / timebase.denom / 1e6;
}
- (LagLevel)lagLevelForDuration:(NSTimeInterval)duration {
if (duration < 50) return LagLevelNormal;
if (duration < 100) return LagLevelMild;
if (duration < 250) return LagLevelModerate;
if (duration < 500) return LagLevelSevere;
return LagLevelFatal;
}
- (NSString *)lagLevelName:(LagLevel)level {
switch (level) {
case LagLevelNormal: return @"正常";
case LagLevelMild: return @"轻微";
case LagLevelModerate: return @"中度";
case LagLevelSevere: return @"严重";
case LagLevelFatal: return @"致命";
}
}
- (NSString *)activityName:(CFRunLoopActivity)activity {
switch (activity) {
case kCFRunLoopEntry: return @"Entry";
case kCFRunLoopBeforeTimers: return @"BeforeTimers";
case kCFRunLoopBeforeSources: return @"BeforeSources";
case kCFRunLoopBeforeWaiting: return @"BeforeWaiting";
case kCFRunLoopAfterWaiting: return @"AfterWaiting";
case kCFRunLoopExit: return @"Exit";
default: return @"Unknown";
}
}
@end
3. 增强版:多次采样聚合
objc
// LagStackAggregator.h - 堆栈聚合器
@interface LagStackAggregator : NSObject
/// 添加采样的堆栈
- (void)addSample:(NSArray<NSString *> *)stack;
/// 获取最频繁出现的堆栈(找出卡顿的真正原因)
- (NSArray<NSString *> *)getMostFrequentStack;
/// 获取所有采样的统计信息
- (NSDictionary *)getStatistics;
/// 重置
- (void)reset;
@end
// LagStackAggregator.m
@implementation LagStackAggregator {
NSMutableArray<NSArray<NSString *> *> *_samples;
NSMutableDictionary<NSString *, NSNumber *> *_stackCounts;
}
- (instancetype)init {
if (self = [super init]) {
_samples = [NSMutableArray array];
_stackCounts = [NSMutableDictionary dictionary];
}
return self;
}
- (void)addSample:(NSArray<NSString *> *)stack {
if (stack.count == 0) return;
[_samples addObject:stack];
// 统计每个调用栈的出现次数
// 使用堆栈的前几帧作为 key
NSString *key = [[stack subarrayWithRange:NSMakeRange(0, MIN(5, stack.count))]
componentsJoinedByString:@"|"];
NSInteger count = _stackCounts[key].integerValue + 1;
_stackCounts[key] = @(count);
}
- (NSArray<NSString *> *)getMostFrequentStack {
if (_samples.count == 0) return @[];
// 找出出现次数最多的堆栈
NSString *maxKey = nil;
NSInteger maxCount = 0;
for (NSString *key in _stackCounts) {
if (_stackCounts[key].integerValue > maxCount) {
maxCount = _stackCounts[key].integerValue;
maxKey = key;
}
}
// 返回对应的完整堆栈
if (maxKey) {
for (NSArray<NSString *> *stack in _samples) {
NSString *key = [[stack subarrayWithRange:NSMakeRange(0, MIN(5, stack.count))]
componentsJoinedByString:@"|"];
if ([key isEqualToString:maxKey]) {
return stack;
}
}
}
return _samples.lastObject ?: @[];
}
- (NSDictionary *)getStatistics {
return @{
@"totalSamples": @(_samples.count),
@"uniqueStacks": @(_stackCounts.count),
@"stackDistribution": [_stackCounts copy]
};
}
- (void)reset {
[_samples removeAllObjects];
[_stackCounts removeAllObjects];
}
@end
4. 使用示例
objc
// AppDelegate.m
- (BOOL)application:(UIApplication *)application
didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
// 配置并启动卡顿监控
LagMonitor *monitor = [LagMonitor shared];
monitor.threshold = 100; // 100ms 阈值
monitor.sampleInterval = 50; // 50ms 采样间隔
[monitor startWithCallback:^(LagInfo *lagInfo) {
// 处理卡顿信息
NSLog(@"⚠️ 检测到卡顿!");
NSLog(@"级别: %lu, 时长: %.2fms", lagInfo.level, lagInfo.duration);
NSLog(@"堆栈:\n%@", lagInfo.symbolStack);
// 可以上报到服务器
[self reportLagInfo:lagInfo];
// 严重卡顿可以弹窗提示(开发环境)
#if DEBUG
if (lagInfo.level >= LagLevelSevere) {
[self showLagAlert:lagInfo];
}
#endif
}];
return YES;
}
- (void)reportLagInfo:(LagInfo *)info {
// 上报到监控后台
NSDictionary *report = @{
@"type": @"lag",
@"level": @(info.level),
@"duration": @(info.duration),
@"stack": info.callStack ?: @[],
@"timestamp": @([info.timestamp timeIntervalSince1970]),
@"context": info.context ?: @{},
@"deviceInfo": [self deviceInfo]
};
// 发送到服务器...
}
- (void)showLagAlert:(LagInfo *)info {
dispatch_async(dispatch_get_main_queue(), ^{
UIAlertController *alert = [UIAlertController
alertControllerWithTitle:@"检测到严重卡顿"
message:[NSString stringWithFormat:@"时长: %.0fms\n\n%@",
info.duration,
[info.callStack.firstObject substringToIndex:MIN(100, info.callStack.firstObject.length)]]
preferredStyle:UIAlertControllerStyleAlert];
[alert addAction:[UIAlertAction actionWithTitle:@"查看详情"
style:UIAlertActionStyleDefault
handler:^(UIAlertAction *action) {
NSLog(@"完整堆栈:\n%@", info.symbolStack);
}]];
[alert addAction:[UIAlertAction actionWithTitle:@"忽略"
style:UIAlertActionStyleCancel
handler:nil]];
UIViewController *rootVC = UIApplication.sharedApplication.keyWindow.rootViewController;
[rootVC presentViewController:alert animated:YES completion:nil];
});
}
RunLoop 状态说明
┌────────────────────────────────────────────────────────────────────┐
│ RunLoop 一次循环 │
├────────────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────────┐ │
│ │ kCFRunLoopEntry │ ← 进入 RunLoop │
│ └────────┬────────┘ │
│ ▼ │
│ ┌─────────────────────┐ │
│ │kCFRunLoopBeforeTimers│ ← 即将处理 Timer │
│ └────────┬────────────┘ │
│ ▼ │
│ ┌──────────────────────┐ │
│ │kCFRunLoopBeforeSources│ ← 即将处理 Source ⚠️ 监控点1 │
│ └────────┬─────────────┘ │
│ ▼ │
│ ┌─────────────────────────┐ │
│ │ 处理 Source0 / Source1 │ ← 主要耗时点! │
│ │ (UI事件、手势、网络等) │ │
│ └────────┬────────────────┘ │
│ ▼ │
│ ┌──────────────────────┐ │
│ │kCFRunLoopBeforeWaiting│ ← 即将休眠 │
│ └────────┬─────────────┘ │
│ ▼ │
│ ┌─────────────────┐ │
│ │ 休眠等待 │ ← 等待唤醒(无耗时) │
│ └────────┬────────┘ │
│ ▼ │
│ ┌─────────────────────┐ │
│ │kCFRunLoopAfterWaiting│ ← 被唤醒后 ⚠️ 监控点2 │
│ └────────┬────────────┘ │
│ ▼ │
│ ┌─────────────────────────┐ │
│ │ 处理唤醒事件(Timer等) │ ← 另一个耗时点 │
│ └────────┬────────────────┘ │
│ ▼ │
│ ┌────────────────┐ │
│ │ kCFRunLoopExit │ ← 退出 │
│ └────────────────┘ │
│ │
└────────────────────────────────────────────────────────────────────┘
关键点总结
| 组件 | 说明 |
|---|---|
| RunLoop Observer | 监控 BeforeSources 和 AfterWaiting 状态 |
| 信号量 | 用于超时检测,状态变化时发送信号 |
| 堆栈采样 | 使用 thread_get_state 获取寄存器,手动回溯栈帧 |
| 符号化 | 使用 dladdr 将地址转换为符号 |
| 阈值设置 | 通常 100ms 作为卡顿阈值,16ms 为一帧 |
这个实现可以准确检测主线程卡顿,并捕获导致卡顿的调用堆栈,帮助定位性能问题。