iOS 卡顿监控实现:RunLoop + 堆栈采样

核心原理图

复制代码
┌─────────────────────────────────────────────────────────────────────┐
│                      Main Thread RunLoop                             │
├─────────────────────────────────────────────────────────────────────┤
│                                                                      │
│   ┌──────────┐    ┌──────────┐    ┌──────────┐    ┌──────────┐     │
│   │  Entry   │───▶│ Timers   │───▶│ Sources  │───▶│ Observer │     │
│   └──────────┘    └──────────┘    └──────────┘    └──────────┘     │
│        │                                               │            │
│        │         ┌─────────────────────────────────────┘            │
│        │         ▼                                                  │
│        │    ┌──────────┐    ┌──────────┐    ┌──────────┐           │
│        │    │BeforeWait│───▶│ Waiting  │───▶│AfterWait │           │
│        │    └──────────┘    └──────────┘    └──────────┘           │
│        │                                         │                  │
│        └─────────────────────────────────────────┘                  │
│                                                                      │
└─────────────────────────────────────────────────────────────────────┘
                              │
                              │ 监控
                              ▼
┌─────────────────────────────────────────────────────────────────────┐
│                     Monitor Thread                                   │
├─────────────────────────────────────────────────────────────────────┤
│                                                                      │
│   ┌────────────────┐     ┌────────────────┐     ┌────────────────┐  │
│   │ 等待信号量     │────▶│ 检查状态变化   │────▶│ 超时?采样堆栈  │  │
│   │ (带超时)       │     │                │     │                │  │
│   └────────────────┘     └────────────────┘     └────────────────┘  │
│                                                                      │
└─────────────────────────────────────────────────────────────────────┘

完整实现代码

1. 头文件定义

objc 复制代码
// LagMonitor.h
#import <Foundation/Foundation.h>

NS_ASSUME_NONNULL_BEGIN

/// 卡顿级别
typedef NS_ENUM(NSUInteger, LagLevel) {
    LagLevelNormal = 0,      // 正常
    LagLevelMild = 1,        // 轻微卡顿 (50-100ms)
    LagLevelModerate = 2,    // 中度卡顿 (100-250ms)
    LagLevelSevere = 3,      // 严重卡顿 (250-500ms)
    LagLevelFatal = 4        // 致命卡顿 (>500ms)
};

/// 卡顿信息模型
@interface LagInfo : NSObject
@property (nonatomic, assign) LagLevel level;
@property (nonatomic, assign) NSTimeInterval duration;      // 卡顿时长(ms)
@property (nonatomic, copy) NSArray<NSString *> *callStack; // 调用堆栈
@property (nonatomic, copy) NSString *symbolStack;          // 符号化堆栈
@property (nonatomic, strong) NSDate *timestamp;            // 发生时间
@property (nonatomic, copy) NSDictionary *context;          // 上下文信息
@end

/// 卡顿监控回调
typedef void(^LagMonitorCallback)(LagInfo *lagInfo);

/// 卡顿监控器
@interface LagMonitor : NSObject

@property (nonatomic, class, readonly) LagMonitor *shared;

/// 卡顿阈值(毫秒),默认 100ms
@property (nonatomic, assign) NSTimeInterval threshold;

/// 采样间隔(毫秒),默认 50ms
@property (nonatomic, assign) NSTimeInterval sampleInterval;

/// 是否正在监控
@property (nonatomic, assign, readonly) BOOL isMonitoring;

/// 开始监控
- (void)startWithCallback:(LagMonitorCallback)callback;

/// 停止监控
- (void)stop;

/// 手动触发堆栈采样(用于调试)
- (NSArray<NSString *> *)captureMainThreadStack;

@end

NS_ASSUME_NONNULL_END

2. 核心实现

objc 复制代码
// LagMonitor.m
#import "LagMonitor.h"
#import <mach/mach.h>
#import <pthread.h>
#import <execinfo.h>
#import <dlfcn.h>

// 最大堆栈深度
static const int kMaxStackDepth = 128;

// RunLoop 活动状态
typedef struct {
    CFRunLoopActivity activity;
    uint64_t timestamp;
} RunLoopState;

@implementation LagInfo
@end

@interface LagMonitor () {
    CFRunLoopObserverRef _observer;
    dispatch_semaphore_t _semaphore;
    RunLoopState _currentState;
    pthread_mutex_t _stateMutex;
    
    thread_t _mainThread;
    BOOL _isMonitoring;
}

@property (nonatomic, strong) dispatch_queue_t monitorQueue;
@property (nonatomic, copy) LagMonitorCallback callback;
@property (nonatomic, assign) NSInteger consecutiveTimeouts;

@end

@implementation LagMonitor

#pragma mark - Singleton

+ (LagMonitor *)shared {
    static LagMonitor *instance;
    static dispatch_once_t onceToken;
    dispatch_once(&onceToken, ^{
        instance = [[LagMonitor alloc] init];
    });
    return instance;
}

- (instancetype)init {
    if (self = [super init]) {
        _threshold = 100;      // 100ms
        _sampleInterval = 50;  // 50ms
        _semaphore = dispatch_semaphore_create(0);
        _monitorQueue = dispatch_queue_create("com.lagmonitor.queue", DISPATCH_QUEUE_SERIAL);
        pthread_mutex_init(&_stateMutex, NULL);
        
        // 获取主线程的 mach thread
        _mainThread = mach_thread_self();
    }
    return self;
}

- (void)dealloc {
    [self stop];
    pthread_mutex_destroy(&_stateMutex);
}

#pragma mark - Public Methods

- (void)startWithCallback:(LagMonitorCallback)callback {
    if (_isMonitoring) return;
    
    _isMonitoring = YES;
    _callback = callback;
    _consecutiveTimeouts = 0;
    
    // 在主线程添加 RunLoop Observer
    dispatch_async(dispatch_get_main_queue(), ^{
        [self setupRunLoopObserver];
    });
    
    // 启动监控线程
    [self startMonitorThread];
}

- (void)stop {
    if (!_isMonitoring) return;
    
    _isMonitoring = NO;
    
    // 移除 Observer
    if (_observer) {
        CFRunLoopRemoveObserver(CFRunLoopGetMain(), _observer, kCFRunLoopCommonModes);
        CFRelease(_observer);
        _observer = NULL;
    }
    
    // 发送信号让监控线程退出
    dispatch_semaphore_signal(_semaphore);
}

- (NSArray<NSString *> *)captureMainThreadStack {
    return [self captureStackForThread:_mainThread];
}

#pragma mark - RunLoop Observer

- (void)setupRunLoopObserver {
    // 监控所有活动状态
    CFRunLoopActivity activities = kCFRunLoopAllActivities;
    
    __weak typeof(self) weakSelf = self;
    _observer = CFRunLoopObserverCreateWithHandler(
        kCFAllocatorDefault,
        activities,
        YES,  // repeats
        0,    // order
        ^(CFRunLoopObserverRef observer, CFRunLoopActivity activity) {
            [weakSelf runLoopActivityChanged:activity];
        }
    );
    
    CFRunLoopAddObserver(CFRunLoopGetMain(), _observer, kCFRunLoopCommonModes);
}

- (void)runLoopActivityChanged:(CFRunLoopActivity)activity {
    pthread_mutex_lock(&_stateMutex);
    _currentState.activity = activity;
    _currentState.timestamp = mach_absolute_time();
    pthread_mutex_unlock(&_stateMutex);
    
    // 状态变化,发送信号
    dispatch_semaphore_signal(_semaphore);
}

#pragma mark - Monitor Thread

- (void)startMonitorThread {
    dispatch_async(_monitorQueue, ^{
        [self monitorLoop];
    });
}

- (void)monitorLoop {
    while (_isMonitoring) {
        @autoreleasepool {
            // 等待信号量,带超时
            long timeout = dispatch_semaphore_wait(
                _semaphore,
                dispatch_time(DISPATCH_TIME_NOW, (int64_t)(_sampleInterval * NSEC_PER_MSEC))
            );
            
            if (!_isMonitoring) break;
            
            if (timeout != 0) {
                // 超时,检查是否卡顿
                [self checkLag];
            } else {
                // 收到信号,状态正常变化
                _consecutiveTimeouts = 0;
            }
        }
    }
}

- (void)checkLag {
    pthread_mutex_lock(&_stateMutex);
    CFRunLoopActivity activity = _currentState.activity;
    uint64_t stateTimestamp = _currentState.timestamp;
    pthread_mutex_unlock(&_stateMutex);
    
    // 只在 BeforeSources 和 AfterWaiting 状态检测卡顿
    // 这两个状态表示正在处理事件
    if (activity != kCFRunLoopBeforeSources && 
        activity != kCFRunLoopAfterWaiting) {
        return;
    }
    
    // 计算持续时间
    uint64_t now = mach_absolute_time();
    NSTimeInterval duration = [self machTimeToMs:now - stateTimestamp];
    
    if (duration >= _threshold) {
        _consecutiveTimeouts++;
        
        // 采样堆栈
        NSArray<NSString *> *stack = [self captureStackForThread:_mainThread];
        
        // 确定卡顿级别
        LagLevel level = [self lagLevelForDuration:duration];
        
        // 创建卡顿信息
        LagInfo *info = [[LagInfo alloc] init];
        info.level = level;
        info.duration = duration;
        info.callStack = stack;
        info.symbolStack = [stack componentsJoinedByString:@"\n"];
        info.timestamp = [NSDate date];
        info.context = @{
            @"activity": [self activityName:activity],
            @"consecutiveTimeouts": @(_consecutiveTimeouts)
        };
        
        // 回调通知
        if (_callback) {
            dispatch_async(dispatch_get_main_queue(), ^{
                self.callback(info);
            });
        }
        
        // 打印日志
        NSLog(@"[LagMonitor] 检测到卡顿 - 级别:%@ 时长:%.2fms\n%@",
              [self lagLevelName:level], duration, info.symbolStack);
    }
}

#pragma mark - Stack Capture

- (NSArray<NSString *> *)captureStackForThread:(thread_t)thread {
    NSMutableArray<NSString *> *result = [NSMutableArray array];
    
    // 暂停目标线程
    if (thread_suspend(thread) != KERN_SUCCESS) {
        return result;
    }
    
    // 获取线程状态(CPU 寄存器)
    _STRUCT_MCONTEXT machineContext;
    mach_msg_type_number_t stateCount = THREAD_STATE_COUNT;
    
#if defined(__arm64__)
    thread_state_flavor_t flavor = ARM_THREAD_STATE64;
    stateCount = ARM_THREAD_STATE64_COUNT;
#elif defined(__x86_64__)
    thread_state_flavor_t flavor = x86_THREAD_STATE64;
    stateCount = x86_THREAD_STATE64_COUNT;
#else
    #error "Unsupported architecture"
#endif
    
    kern_return_t kr = thread_get_state(
        thread,
        flavor,
        (thread_state_t)&machineContext.__ss,
        &stateCount
    );
    
    if (kr == KERN_SUCCESS) {
        // 使用回溯获取调用栈
        uintptr_t backtraceBuffer[kMaxStackDepth];
        int frameCount = [self backtraceFromContext:&machineContext
                                             buffer:backtraceBuffer
                                           maxDepth:kMaxStackDepth];
        
        // 符号化
        for (int i = 0; i < frameCount; i++) {
            NSString *symbol = [self symbolForAddress:backtraceBuffer[i]];
            [result addObject:symbol];
        }
    }
    
    // 恢复线程
    thread_resume(thread);
    
    return result;
}

// 从寄存器上下文回溯调用栈
- (int)backtraceFromContext:(_STRUCT_MCONTEXT *)context
                     buffer:(uintptr_t *)buffer
                   maxDepth:(int)maxDepth {
    int count = 0;
    
#if defined(__arm64__)
    // ARM64 架构
    uintptr_t pc = context->__ss.__pc;
    uintptr_t lr = context->__ss.__lr;
    uintptr_t fp = context->__ss.__fp;
    
    // 第一个地址是 PC (Program Counter)
    if (pc && count < maxDepth) {
        buffer[count++] = pc;
    }
    
    // 第二个地址是 LR (Link Register)
    if (lr && count < maxDepth) {
        buffer[count++] = lr;
    }
    
    // 遍历栈帧
    uintptr_t *frame = (uintptr_t *)fp;
    while (frame && count < maxDepth) {
        // 检查地址有效性
        if (![self isValidAddress:(uintptr_t)frame]) break;
        
        uintptr_t savedFP = frame[0];
        uintptr_t savedLR = frame[1];
        
        if (savedLR && count < maxDepth) {
            buffer[count++] = savedLR;
        }
        
        if (savedFP <= (uintptr_t)frame) break;
        frame = (uintptr_t *)savedFP;
    }
    
#elif defined(__x86_64__)
    // x86_64 架构
    uintptr_t rip = context->__ss.__rip;
    uintptr_t rbp = context->__ss.__rbp;
    
    if (rip && count < maxDepth) {
        buffer[count++] = rip;
    }
    
    uintptr_t *frame = (uintptr_t *)rbp;
    while (frame && count < maxDepth) {
        if (![self isValidAddress:(uintptr_t)frame]) break;
        
        uintptr_t savedRBP = frame[0];
        uintptr_t savedRIP = frame[1];
        
        if (savedRIP && count < maxDepth) {
            buffer[count++] = savedRIP;
        }
        
        if (savedRBP <= (uintptr_t)frame) break;
        frame = (uintptr_t *)savedRBP;
    }
#endif
    
    return count;
}

// 检查地址是否有效
- (BOOL)isValidAddress:(uintptr_t)address {
    if (address == 0) return NO;
    
    // 简单检查:地址应该在合理范围内
    // 实际项目中可能需要更精确的检查
#if defined(__arm64__)
    return (address > 0x100000000ULL && address < 0x800000000000ULL);
#else
    return (address > 0x1000 && address < 0x7FFFFFFFFFFFULL);
#endif
}

// 符号化地址
- (NSString *)symbolForAddress:(uintptr_t)address {
    Dl_info info;
    
    if (dladdr((void *)address, &info)) {
        NSString *symbolName = info.dli_sname ? 
            [NSString stringWithUTF8String:info.dli_sname] : @"???";
        NSString *imageName = info.dli_fname ? 
            [[NSString stringWithUTF8String:info.dli_fname] lastPathComponent] : @"???";
        uintptr_t offset = address - (uintptr_t)info.dli_saddr;
        
        return [NSString stringWithFormat:@"%@ %@ + %lu", 
                imageName, symbolName, (unsigned long)offset];
    }
    
    return [NSString stringWithFormat:@"0x%lx", (unsigned long)address];
}

#pragma mark - Utilities

- (NSTimeInterval)machTimeToMs:(uint64_t)machTime {
    static mach_timebase_info_data_t timebase;
    static dispatch_once_t onceToken;
    dispatch_once(&onceToken, ^{
        mach_timebase_info(&timebase);
    });
    return (double)machTime * timebase.numer / timebase.denom / 1e6;
}

- (LagLevel)lagLevelForDuration:(NSTimeInterval)duration {
    if (duration < 50) return LagLevelNormal;
    if (duration < 100) return LagLevelMild;
    if (duration < 250) return LagLevelModerate;
    if (duration < 500) return LagLevelSevere;
    return LagLevelFatal;
}

- (NSString *)lagLevelName:(LagLevel)level {
    switch (level) {
        case LagLevelNormal: return @"正常";
        case LagLevelMild: return @"轻微";
        case LagLevelModerate: return @"中度";
        case LagLevelSevere: return @"严重";
        case LagLevelFatal: return @"致命";
    }
}

- (NSString *)activityName:(CFRunLoopActivity)activity {
    switch (activity) {
        case kCFRunLoopEntry: return @"Entry";
        case kCFRunLoopBeforeTimers: return @"BeforeTimers";
        case kCFRunLoopBeforeSources: return @"BeforeSources";
        case kCFRunLoopBeforeWaiting: return @"BeforeWaiting";
        case kCFRunLoopAfterWaiting: return @"AfterWaiting";
        case kCFRunLoopExit: return @"Exit";
        default: return @"Unknown";
    }
}

@end

3. 增强版:多次采样聚合

objc 复制代码
// LagStackAggregator.h - 堆栈聚合器
@interface LagStackAggregator : NSObject

/// 添加采样的堆栈
- (void)addSample:(NSArray<NSString *> *)stack;

/// 获取最频繁出现的堆栈(找出卡顿的真正原因)
- (NSArray<NSString *> *)getMostFrequentStack;

/// 获取所有采样的统计信息
- (NSDictionary *)getStatistics;

/// 重置
- (void)reset;

@end

// LagStackAggregator.m
@implementation LagStackAggregator {
    NSMutableArray<NSArray<NSString *> *> *_samples;
    NSMutableDictionary<NSString *, NSNumber *> *_stackCounts;
}

- (instancetype)init {
    if (self = [super init]) {
        _samples = [NSMutableArray array];
        _stackCounts = [NSMutableDictionary dictionary];
    }
    return self;
}

- (void)addSample:(NSArray<NSString *> *)stack {
    if (stack.count == 0) return;
    
    [_samples addObject:stack];
    
    // 统计每个调用栈的出现次数
    // 使用堆栈的前几帧作为 key
    NSString *key = [[stack subarrayWithRange:NSMakeRange(0, MIN(5, stack.count))] 
                     componentsJoinedByString:@"|"];
    NSInteger count = _stackCounts[key].integerValue + 1;
    _stackCounts[key] = @(count);
}

- (NSArray<NSString *> *)getMostFrequentStack {
    if (_samples.count == 0) return @[];
    
    // 找出出现次数最多的堆栈
    NSString *maxKey = nil;
    NSInteger maxCount = 0;
    
    for (NSString *key in _stackCounts) {
        if (_stackCounts[key].integerValue > maxCount) {
            maxCount = _stackCounts[key].integerValue;
            maxKey = key;
        }
    }
    
    // 返回对应的完整堆栈
    if (maxKey) {
        for (NSArray<NSString *> *stack in _samples) {
            NSString *key = [[stack subarrayWithRange:NSMakeRange(0, MIN(5, stack.count))] 
                             componentsJoinedByString:@"|"];
            if ([key isEqualToString:maxKey]) {
                return stack;
            }
        }
    }
    
    return _samples.lastObject ?: @[];
}

- (NSDictionary *)getStatistics {
    return @{
        @"totalSamples": @(_samples.count),
        @"uniqueStacks": @(_stackCounts.count),
        @"stackDistribution": [_stackCounts copy]
    };
}

- (void)reset {
    [_samples removeAllObjects];
    [_stackCounts removeAllObjects];
}

@end

4. 使用示例

objc 复制代码
// AppDelegate.m
- (BOOL)application:(UIApplication *)application 
        didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
    
    // 配置并启动卡顿监控
    LagMonitor *monitor = [LagMonitor shared];
    monitor.threshold = 100;      // 100ms 阈值
    monitor.sampleInterval = 50;  // 50ms 采样间隔
    
    [monitor startWithCallback:^(LagInfo *lagInfo) {
        // 处理卡顿信息
        NSLog(@"⚠️ 检测到卡顿!");
        NSLog(@"级别: %lu, 时长: %.2fms", lagInfo.level, lagInfo.duration);
        NSLog(@"堆栈:\n%@", lagInfo.symbolStack);
        
        // 可以上报到服务器
        [self reportLagInfo:lagInfo];
        
        // 严重卡顿可以弹窗提示(开发环境)
        #if DEBUG
        if (lagInfo.level >= LagLevelSevere) {
            [self showLagAlert:lagInfo];
        }
        #endif
    }];
    
    return YES;
}

- (void)reportLagInfo:(LagInfo *)info {
    // 上报到监控后台
    NSDictionary *report = @{
        @"type": @"lag",
        @"level": @(info.level),
        @"duration": @(info.duration),
        @"stack": info.callStack ?: @[],
        @"timestamp": @([info.timestamp timeIntervalSince1970]),
        @"context": info.context ?: @{},
        @"deviceInfo": [self deviceInfo]
    };
    
    // 发送到服务器...
}

- (void)showLagAlert:(LagInfo *)info {
    dispatch_async(dispatch_get_main_queue(), ^{
        UIAlertController *alert = [UIAlertController 
            alertControllerWithTitle:@"检测到严重卡顿"
            message:[NSString stringWithFormat:@"时长: %.0fms\n\n%@", 
                     info.duration, 
                     [info.callStack.firstObject substringToIndex:MIN(100, info.callStack.firstObject.length)]]
            preferredStyle:UIAlertControllerStyleAlert];
        
        [alert addAction:[UIAlertAction actionWithTitle:@"查看详情" 
                                                  style:UIAlertActionStyleDefault 
                                                handler:^(UIAlertAction *action) {
            NSLog(@"完整堆栈:\n%@", info.symbolStack);
        }]];
        
        [alert addAction:[UIAlertAction actionWithTitle:@"忽略" 
                                                  style:UIAlertActionStyleCancel 
                                                handler:nil]];
        
        UIViewController *rootVC = UIApplication.sharedApplication.keyWindow.rootViewController;
        [rootVC presentViewController:alert animated:YES completion:nil];
    });
}

RunLoop 状态说明

复制代码
┌────────────────────────────────────────────────────────────────────┐
│                    RunLoop 一次循环                                  │
├────────────────────────────────────────────────────────────────────┤
│                                                                     │
│  ┌─────────────────┐                                               │
│  │ kCFRunLoopEntry │ ← 进入 RunLoop                                 │
│  └────────┬────────┘                                               │
│           ▼                                                         │
│  ┌─────────────────────┐                                           │
│  │kCFRunLoopBeforeTimers│ ← 即将处理 Timer                          │
│  └────────┬────────────┘                                           │
│           ▼                                                         │
│  ┌──────────────────────┐                                          │
│  │kCFRunLoopBeforeSources│ ← 即将处理 Source ⚠️ 监控点1              │
│  └────────┬─────────────┘                                          │
│           ▼                                                         │
│  ┌─────────────────────────┐                                       │
│  │ 处理 Source0 / Source1  │ ← 主要耗时点!                         │
│  │ (UI事件、手势、网络等)   │                                        │
│  └────────┬────────────────┘                                       │
│           ▼                                                         │
│  ┌──────────────────────┐                                          │
│  │kCFRunLoopBeforeWaiting│ ← 即将休眠                               │
│  └────────┬─────────────┘                                          │
│           ▼                                                         │
│  ┌─────────────────┐                                               │
│  │    休眠等待      │ ← 等待唤醒(无耗时)                           │
│  └────────┬────────┘                                               │
│           ▼                                                         │
│  ┌─────────────────────┐                                           │
│  │kCFRunLoopAfterWaiting│ ← 被唤醒后 ⚠️ 监控点2                      │
│  └────────┬────────────┘                                           │
│           ▼                                                         │
│  ┌─────────────────────────┐                                       │
│  │  处理唤醒事件(Timer等) │ ← 另一个耗时点                         │
│  └────────┬────────────────┘                                       │
│           ▼                                                         │
│  ┌────────────────┐                                                │
│  │ kCFRunLoopExit │ ← 退出                                         │
│  └────────────────┘                                                │
│                                                                     │
└────────────────────────────────────────────────────────────────────┘

关键点总结

组件 说明
RunLoop Observer 监控 BeforeSources 和 AfterWaiting 状态
信号量 用于超时检测,状态变化时发送信号
堆栈采样 使用 thread_get_state 获取寄存器,手动回溯栈帧
符号化 使用 dladdr 将地址转换为符号
阈值设置 通常 100ms 作为卡顿阈值,16ms 为一帧

这个实现可以准确检测主线程卡顿,并捕获导致卡顿的调用堆栈,帮助定位性能问题。

相关推荐
2501_915909062 小时前
Fiddler抓包与接口调试实战,HTTPHTTPS配置、代理设置与移动端抓包详解
前端·测试工具·ios·小程序·fiddler·uni-app·webview
2501_9151063211 小时前
如何查看手机使用记录:Android和iOS设备全面指南
android·ios·智能手机·小程序·uni-app·iphone·webview
0xAaron17 小时前
确定crash文件和dSYM是否对应
ios·uuid·crash·dsym
0xAaron20 小时前
符号表和 dSYM UUID 确认
ios·cocoa·uuid·符号表·dsym
0xAaron20 小时前
如何使用dSYM文件来符号化崩溃信息
ios·swift·调试·崩溃·符号化·dsym
2501_915918411 天前
Flutter 加固方案全解析,从 Dart 层到 IPA 成品的多工具协同防护体系
flutter·macos·ios·小程序·uni-app·cocoa·iphone
wsxlgg1 天前
IOS 打包上传提示you do not have required contracts to perform an operation
ios
每周报刊1 天前
初代 iPhone SE 谢幕:被标为 “过时”,小屏旗舰时代彻底落幕
ios·iphone
RollingPin1 天前
iOS 动态库与静态库的区别
ios·framework·动态库·静态库·符号表·三方库·dyld