iOS 卡顿监控实现:RunLoop + 堆栈采样

核心原理图

复制代码
┌─────────────────────────────────────────────────────────────────────┐
│                      Main Thread RunLoop                             │
├─────────────────────────────────────────────────────────────────────┤
│                                                                      │
│   ┌──────────┐    ┌──────────┐    ┌──────────┐    ┌──────────┐     │
│   │  Entry   │───▶│ Timers   │───▶│ Sources  │───▶│ Observer │     │
│   └──────────┘    └──────────┘    └──────────┘    └──────────┘     │
│        │                                               │            │
│        │         ┌─────────────────────────────────────┘            │
│        │         ▼                                                  │
│        │    ┌──────────┐    ┌──────────┐    ┌──────────┐           │
│        │    │BeforeWait│───▶│ Waiting  │───▶│AfterWait │           │
│        │    └──────────┘    └──────────┘    └──────────┘           │
│        │                                         │                  │
│        └─────────────────────────────────────────┘                  │
│                                                                      │
└─────────────────────────────────────────────────────────────────────┘
                              │
                              │ 监控
                              ▼
┌─────────────────────────────────────────────────────────────────────┐
│                     Monitor Thread                                   │
├─────────────────────────────────────────────────────────────────────┤
│                                                                      │
│   ┌────────────────┐     ┌────────────────┐     ┌────────────────┐  │
│   │ 等待信号量     │────▶│ 检查状态变化   │────▶│ 超时?采样堆栈  │  │
│   │ (带超时)       │     │                │     │                │  │
│   └────────────────┘     └────────────────┘     └────────────────┘  │
│                                                                      │
└─────────────────────────────────────────────────────────────────────┘

完整实现代码

1. 头文件定义

objc 复制代码
// LagMonitor.h
#import <Foundation/Foundation.h>

NS_ASSUME_NONNULL_BEGIN

/// 卡顿级别
typedef NS_ENUM(NSUInteger, LagLevel) {
    LagLevelNormal = 0,      // 正常
    LagLevelMild = 1,        // 轻微卡顿 (50-100ms)
    LagLevelModerate = 2,    // 中度卡顿 (100-250ms)
    LagLevelSevere = 3,      // 严重卡顿 (250-500ms)
    LagLevelFatal = 4        // 致命卡顿 (>500ms)
};

/// 卡顿信息模型
@interface LagInfo : NSObject
@property (nonatomic, assign) LagLevel level;
@property (nonatomic, assign) NSTimeInterval duration;      // 卡顿时长(ms)
@property (nonatomic, copy) NSArray<NSString *> *callStack; // 调用堆栈
@property (nonatomic, copy) NSString *symbolStack;          // 符号化堆栈
@property (nonatomic, strong) NSDate *timestamp;            // 发生时间
@property (nonatomic, copy) NSDictionary *context;          // 上下文信息
@end

/// 卡顿监控回调
typedef void(^LagMonitorCallback)(LagInfo *lagInfo);

/// 卡顿监控器
@interface LagMonitor : NSObject

@property (nonatomic, class, readonly) LagMonitor *shared;

/// 卡顿阈值(毫秒),默认 100ms
@property (nonatomic, assign) NSTimeInterval threshold;

/// 采样间隔(毫秒),默认 50ms
@property (nonatomic, assign) NSTimeInterval sampleInterval;

/// 是否正在监控
@property (nonatomic, assign, readonly) BOOL isMonitoring;

/// 开始监控
- (void)startWithCallback:(LagMonitorCallback)callback;

/// 停止监控
- (void)stop;

/// 手动触发堆栈采样(用于调试)
- (NSArray<NSString *> *)captureMainThreadStack;

@end

NS_ASSUME_NONNULL_END

2. 核心实现

objc 复制代码
// LagMonitor.m
#import "LagMonitor.h"
#import <mach/mach.h>
#import <pthread.h>
#import <execinfo.h>
#import <dlfcn.h>

// 最大堆栈深度
static const int kMaxStackDepth = 128;

// RunLoop 活动状态
typedef struct {
    CFRunLoopActivity activity;
    uint64_t timestamp;
} RunLoopState;

@implementation LagInfo
@end

@interface LagMonitor () {
    CFRunLoopObserverRef _observer;
    dispatch_semaphore_t _semaphore;
    RunLoopState _currentState;
    pthread_mutex_t _stateMutex;
    
    thread_t _mainThread;
    BOOL _isMonitoring;
}

@property (nonatomic, strong) dispatch_queue_t monitorQueue;
@property (nonatomic, copy) LagMonitorCallback callback;
@property (nonatomic, assign) NSInteger consecutiveTimeouts;

@end

@implementation LagMonitor

#pragma mark - Singleton

+ (LagMonitor *)shared {
    static LagMonitor *instance;
    static dispatch_once_t onceToken;
    dispatch_once(&onceToken, ^{
        instance = [[LagMonitor alloc] init];
    });
    return instance;
}

- (instancetype)init {
    if (self = [super init]) {
        _threshold = 100;      // 100ms
        _sampleInterval = 50;  // 50ms
        _semaphore = dispatch_semaphore_create(0);
        _monitorQueue = dispatch_queue_create("com.lagmonitor.queue", DISPATCH_QUEUE_SERIAL);
        pthread_mutex_init(&_stateMutex, NULL);
        
        // 获取主线程的 mach thread
        _mainThread = mach_thread_self();
    }
    return self;
}

- (void)dealloc {
    [self stop];
    pthread_mutex_destroy(&_stateMutex);
}

#pragma mark - Public Methods

- (void)startWithCallback:(LagMonitorCallback)callback {
    if (_isMonitoring) return;
    
    _isMonitoring = YES;
    _callback = callback;
    _consecutiveTimeouts = 0;
    
    // 在主线程添加 RunLoop Observer
    dispatch_async(dispatch_get_main_queue(), ^{
        [self setupRunLoopObserver];
    });
    
    // 启动监控线程
    [self startMonitorThread];
}

- (void)stop {
    if (!_isMonitoring) return;
    
    _isMonitoring = NO;
    
    // 移除 Observer
    if (_observer) {
        CFRunLoopRemoveObserver(CFRunLoopGetMain(), _observer, kCFRunLoopCommonModes);
        CFRelease(_observer);
        _observer = NULL;
    }
    
    // 发送信号让监控线程退出
    dispatch_semaphore_signal(_semaphore);
}

- (NSArray<NSString *> *)captureMainThreadStack {
    return [self captureStackForThread:_mainThread];
}

#pragma mark - RunLoop Observer

- (void)setupRunLoopObserver {
    // 监控所有活动状态
    CFRunLoopActivity activities = kCFRunLoopAllActivities;
    
    __weak typeof(self) weakSelf = self;
    _observer = CFRunLoopObserverCreateWithHandler(
        kCFAllocatorDefault,
        activities,
        YES,  // repeats
        0,    // order
        ^(CFRunLoopObserverRef observer, CFRunLoopActivity activity) {
            [weakSelf runLoopActivityChanged:activity];
        }
    );
    
    CFRunLoopAddObserver(CFRunLoopGetMain(), _observer, kCFRunLoopCommonModes);
}

- (void)runLoopActivityChanged:(CFRunLoopActivity)activity {
    pthread_mutex_lock(&_stateMutex);
    _currentState.activity = activity;
    _currentState.timestamp = mach_absolute_time();
    pthread_mutex_unlock(&_stateMutex);
    
    // 状态变化,发送信号
    dispatch_semaphore_signal(_semaphore);
}

#pragma mark - Monitor Thread

- (void)startMonitorThread {
    dispatch_async(_monitorQueue, ^{
        [self monitorLoop];
    });
}

- (void)monitorLoop {
    while (_isMonitoring) {
        @autoreleasepool {
            // 等待信号量,带超时
            long timeout = dispatch_semaphore_wait(
                _semaphore,
                dispatch_time(DISPATCH_TIME_NOW, (int64_t)(_sampleInterval * NSEC_PER_MSEC))
            );
            
            if (!_isMonitoring) break;
            
            if (timeout != 0) {
                // 超时,检查是否卡顿
                [self checkLag];
            } else {
                // 收到信号,状态正常变化
                _consecutiveTimeouts = 0;
            }
        }
    }
}

- (void)checkLag {
    pthread_mutex_lock(&_stateMutex);
    CFRunLoopActivity activity = _currentState.activity;
    uint64_t stateTimestamp = _currentState.timestamp;
    pthread_mutex_unlock(&_stateMutex);
    
    // 只在 BeforeSources 和 AfterWaiting 状态检测卡顿
    // 这两个状态表示正在处理事件
    if (activity != kCFRunLoopBeforeSources && 
        activity != kCFRunLoopAfterWaiting) {
        return;
    }
    
    // 计算持续时间
    uint64_t now = mach_absolute_time();
    NSTimeInterval duration = [self machTimeToMs:now - stateTimestamp];
    
    if (duration >= _threshold) {
        _consecutiveTimeouts++;
        
        // 采样堆栈
        NSArray<NSString *> *stack = [self captureStackForThread:_mainThread];
        
        // 确定卡顿级别
        LagLevel level = [self lagLevelForDuration:duration];
        
        // 创建卡顿信息
        LagInfo *info = [[LagInfo alloc] init];
        info.level = level;
        info.duration = duration;
        info.callStack = stack;
        info.symbolStack = [stack componentsJoinedByString:@"\n"];
        info.timestamp = [NSDate date];
        info.context = @{
            @"activity": [self activityName:activity],
            @"consecutiveTimeouts": @(_consecutiveTimeouts)
        };
        
        // 回调通知
        if (_callback) {
            dispatch_async(dispatch_get_main_queue(), ^{
                self.callback(info);
            });
        }
        
        // 打印日志
        NSLog(@"[LagMonitor] 检测到卡顿 - 级别:%@ 时长:%.2fms\n%@",
              [self lagLevelName:level], duration, info.symbolStack);
    }
}

#pragma mark - Stack Capture

- (NSArray<NSString *> *)captureStackForThread:(thread_t)thread {
    NSMutableArray<NSString *> *result = [NSMutableArray array];
    
    // 暂停目标线程
    if (thread_suspend(thread) != KERN_SUCCESS) {
        return result;
    }
    
    // 获取线程状态(CPU 寄存器)
    _STRUCT_MCONTEXT machineContext;
    mach_msg_type_number_t stateCount = THREAD_STATE_COUNT;
    
#if defined(__arm64__)
    thread_state_flavor_t flavor = ARM_THREAD_STATE64;
    stateCount = ARM_THREAD_STATE64_COUNT;
#elif defined(__x86_64__)
    thread_state_flavor_t flavor = x86_THREAD_STATE64;
    stateCount = x86_THREAD_STATE64_COUNT;
#else
    #error "Unsupported architecture"
#endif
    
    kern_return_t kr = thread_get_state(
        thread,
        flavor,
        (thread_state_t)&machineContext.__ss,
        &stateCount
    );
    
    if (kr == KERN_SUCCESS) {
        // 使用回溯获取调用栈
        uintptr_t backtraceBuffer[kMaxStackDepth];
        int frameCount = [self backtraceFromContext:&machineContext
                                             buffer:backtraceBuffer
                                           maxDepth:kMaxStackDepth];
        
        // 符号化
        for (int i = 0; i < frameCount; i++) {
            NSString *symbol = [self symbolForAddress:backtraceBuffer[i]];
            [result addObject:symbol];
        }
    }
    
    // 恢复线程
    thread_resume(thread);
    
    return result;
}

// 从寄存器上下文回溯调用栈
- (int)backtraceFromContext:(_STRUCT_MCONTEXT *)context
                     buffer:(uintptr_t *)buffer
                   maxDepth:(int)maxDepth {
    int count = 0;
    
#if defined(__arm64__)
    // ARM64 架构
    uintptr_t pc = context->__ss.__pc;
    uintptr_t lr = context->__ss.__lr;
    uintptr_t fp = context->__ss.__fp;
    
    // 第一个地址是 PC (Program Counter)
    if (pc && count < maxDepth) {
        buffer[count++] = pc;
    }
    
    // 第二个地址是 LR (Link Register)
    if (lr && count < maxDepth) {
        buffer[count++] = lr;
    }
    
    // 遍历栈帧
    uintptr_t *frame = (uintptr_t *)fp;
    while (frame && count < maxDepth) {
        // 检查地址有效性
        if (![self isValidAddress:(uintptr_t)frame]) break;
        
        uintptr_t savedFP = frame[0];
        uintptr_t savedLR = frame[1];
        
        if (savedLR && count < maxDepth) {
            buffer[count++] = savedLR;
        }
        
        if (savedFP <= (uintptr_t)frame) break;
        frame = (uintptr_t *)savedFP;
    }
    
#elif defined(__x86_64__)
    // x86_64 架构
    uintptr_t rip = context->__ss.__rip;
    uintptr_t rbp = context->__ss.__rbp;
    
    if (rip && count < maxDepth) {
        buffer[count++] = rip;
    }
    
    uintptr_t *frame = (uintptr_t *)rbp;
    while (frame && count < maxDepth) {
        if (![self isValidAddress:(uintptr_t)frame]) break;
        
        uintptr_t savedRBP = frame[0];
        uintptr_t savedRIP = frame[1];
        
        if (savedRIP && count < maxDepth) {
            buffer[count++] = savedRIP;
        }
        
        if (savedRBP <= (uintptr_t)frame) break;
        frame = (uintptr_t *)savedRBP;
    }
#endif
    
    return count;
}

// 检查地址是否有效
- (BOOL)isValidAddress:(uintptr_t)address {
    if (address == 0) return NO;
    
    // 简单检查:地址应该在合理范围内
    // 实际项目中可能需要更精确的检查
#if defined(__arm64__)
    return (address > 0x100000000ULL && address < 0x800000000000ULL);
#else
    return (address > 0x1000 && address < 0x7FFFFFFFFFFFULL);
#endif
}

// 符号化地址
- (NSString *)symbolForAddress:(uintptr_t)address {
    Dl_info info;
    
    if (dladdr((void *)address, &info)) {
        NSString *symbolName = info.dli_sname ? 
            [NSString stringWithUTF8String:info.dli_sname] : @"???";
        NSString *imageName = info.dli_fname ? 
            [[NSString stringWithUTF8String:info.dli_fname] lastPathComponent] : @"???";
        uintptr_t offset = address - (uintptr_t)info.dli_saddr;
        
        return [NSString stringWithFormat:@"%@ %@ + %lu", 
                imageName, symbolName, (unsigned long)offset];
    }
    
    return [NSString stringWithFormat:@"0x%lx", (unsigned long)address];
}

#pragma mark - Utilities

- (NSTimeInterval)machTimeToMs:(uint64_t)machTime {
    static mach_timebase_info_data_t timebase;
    static dispatch_once_t onceToken;
    dispatch_once(&onceToken, ^{
        mach_timebase_info(&timebase);
    });
    return (double)machTime * timebase.numer / timebase.denom / 1e6;
}

- (LagLevel)lagLevelForDuration:(NSTimeInterval)duration {
    if (duration < 50) return LagLevelNormal;
    if (duration < 100) return LagLevelMild;
    if (duration < 250) return LagLevelModerate;
    if (duration < 500) return LagLevelSevere;
    return LagLevelFatal;
}

- (NSString *)lagLevelName:(LagLevel)level {
    switch (level) {
        case LagLevelNormal: return @"正常";
        case LagLevelMild: return @"轻微";
        case LagLevelModerate: return @"中度";
        case LagLevelSevere: return @"严重";
        case LagLevelFatal: return @"致命";
    }
}

- (NSString *)activityName:(CFRunLoopActivity)activity {
    switch (activity) {
        case kCFRunLoopEntry: return @"Entry";
        case kCFRunLoopBeforeTimers: return @"BeforeTimers";
        case kCFRunLoopBeforeSources: return @"BeforeSources";
        case kCFRunLoopBeforeWaiting: return @"BeforeWaiting";
        case kCFRunLoopAfterWaiting: return @"AfterWaiting";
        case kCFRunLoopExit: return @"Exit";
        default: return @"Unknown";
    }
}

@end

3. 增强版:多次采样聚合

objc 复制代码
// LagStackAggregator.h - 堆栈聚合器
@interface LagStackAggregator : NSObject

/// 添加采样的堆栈
- (void)addSample:(NSArray<NSString *> *)stack;

/// 获取最频繁出现的堆栈(找出卡顿的真正原因)
- (NSArray<NSString *> *)getMostFrequentStack;

/// 获取所有采样的统计信息
- (NSDictionary *)getStatistics;

/// 重置
- (void)reset;

@end

// LagStackAggregator.m
@implementation LagStackAggregator {
    NSMutableArray<NSArray<NSString *> *> *_samples;
    NSMutableDictionary<NSString *, NSNumber *> *_stackCounts;
}

- (instancetype)init {
    if (self = [super init]) {
        _samples = [NSMutableArray array];
        _stackCounts = [NSMutableDictionary dictionary];
    }
    return self;
}

- (void)addSample:(NSArray<NSString *> *)stack {
    if (stack.count == 0) return;
    
    [_samples addObject:stack];
    
    // 统计每个调用栈的出现次数
    // 使用堆栈的前几帧作为 key
    NSString *key = [[stack subarrayWithRange:NSMakeRange(0, MIN(5, stack.count))] 
                     componentsJoinedByString:@"|"];
    NSInteger count = _stackCounts[key].integerValue + 1;
    _stackCounts[key] = @(count);
}

- (NSArray<NSString *> *)getMostFrequentStack {
    if (_samples.count == 0) return @[];
    
    // 找出出现次数最多的堆栈
    NSString *maxKey = nil;
    NSInteger maxCount = 0;
    
    for (NSString *key in _stackCounts) {
        if (_stackCounts[key].integerValue > maxCount) {
            maxCount = _stackCounts[key].integerValue;
            maxKey = key;
        }
    }
    
    // 返回对应的完整堆栈
    if (maxKey) {
        for (NSArray<NSString *> *stack in _samples) {
            NSString *key = [[stack subarrayWithRange:NSMakeRange(0, MIN(5, stack.count))] 
                             componentsJoinedByString:@"|"];
            if ([key isEqualToString:maxKey]) {
                return stack;
            }
        }
    }
    
    return _samples.lastObject ?: @[];
}

- (NSDictionary *)getStatistics {
    return @{
        @"totalSamples": @(_samples.count),
        @"uniqueStacks": @(_stackCounts.count),
        @"stackDistribution": [_stackCounts copy]
    };
}

- (void)reset {
    [_samples removeAllObjects];
    [_stackCounts removeAllObjects];
}

@end

4. 使用示例

objc 复制代码
// AppDelegate.m
- (BOOL)application:(UIApplication *)application 
        didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
    
    // 配置并启动卡顿监控
    LagMonitor *monitor = [LagMonitor shared];
    monitor.threshold = 100;      // 100ms 阈值
    monitor.sampleInterval = 50;  // 50ms 采样间隔
    
    [monitor startWithCallback:^(LagInfo *lagInfo) {
        // 处理卡顿信息
        NSLog(@"⚠️ 检测到卡顿!");
        NSLog(@"级别: %lu, 时长: %.2fms", lagInfo.level, lagInfo.duration);
        NSLog(@"堆栈:\n%@", lagInfo.symbolStack);
        
        // 可以上报到服务器
        [self reportLagInfo:lagInfo];
        
        // 严重卡顿可以弹窗提示(开发环境)
        #if DEBUG
        if (lagInfo.level >= LagLevelSevere) {
            [self showLagAlert:lagInfo];
        }
        #endif
    }];
    
    return YES;
}

- (void)reportLagInfo:(LagInfo *)info {
    // 上报到监控后台
    NSDictionary *report = @{
        @"type": @"lag",
        @"level": @(info.level),
        @"duration": @(info.duration),
        @"stack": info.callStack ?: @[],
        @"timestamp": @([info.timestamp timeIntervalSince1970]),
        @"context": info.context ?: @{},
        @"deviceInfo": [self deviceInfo]
    };
    
    // 发送到服务器...
}

- (void)showLagAlert:(LagInfo *)info {
    dispatch_async(dispatch_get_main_queue(), ^{
        UIAlertController *alert = [UIAlertController 
            alertControllerWithTitle:@"检测到严重卡顿"
            message:[NSString stringWithFormat:@"时长: %.0fms\n\n%@", 
                     info.duration, 
                     [info.callStack.firstObject substringToIndex:MIN(100, info.callStack.firstObject.length)]]
            preferredStyle:UIAlertControllerStyleAlert];
        
        [alert addAction:[UIAlertAction actionWithTitle:@"查看详情" 
                                                  style:UIAlertActionStyleDefault 
                                                handler:^(UIAlertAction *action) {
            NSLog(@"完整堆栈:\n%@", info.symbolStack);
        }]];
        
        [alert addAction:[UIAlertAction actionWithTitle:@"忽略" 
                                                  style:UIAlertActionStyleCancel 
                                                handler:nil]];
        
        UIViewController *rootVC = UIApplication.sharedApplication.keyWindow.rootViewController;
        [rootVC presentViewController:alert animated:YES completion:nil];
    });
}

RunLoop 状态说明

复制代码
┌────────────────────────────────────────────────────────────────────┐
│                    RunLoop 一次循环                                  │
├────────────────────────────────────────────────────────────────────┤
│                                                                     │
│  ┌─────────────────┐                                               │
│  │ kCFRunLoopEntry │ ← 进入 RunLoop                                 │
│  └────────┬────────┘                                               │
│           ▼                                                         │
│  ┌─────────────────────┐                                           │
│  │kCFRunLoopBeforeTimers│ ← 即将处理 Timer                          │
│  └────────┬────────────┘                                           │
│           ▼                                                         │
│  ┌──────────────────────┐                                          │
│  │kCFRunLoopBeforeSources│ ← 即将处理 Source ⚠️ 监控点1              │
│  └────────┬─────────────┘                                          │
│           ▼                                                         │
│  ┌─────────────────────────┐                                       │
│  │ 处理 Source0 / Source1  │ ← 主要耗时点!                         │
│  │ (UI事件、手势、网络等)   │                                        │
│  └────────┬────────────────┘                                       │
│           ▼                                                         │
│  ┌──────────────────────┐                                          │
│  │kCFRunLoopBeforeWaiting│ ← 即将休眠                               │
│  └────────┬─────────────┘                                          │
│           ▼                                                         │
│  ┌─────────────────┐                                               │
│  │    休眠等待      │ ← 等待唤醒(无耗时)                           │
│  └────────┬────────┘                                               │
│           ▼                                                         │
│  ┌─────────────────────┐                                           │
│  │kCFRunLoopAfterWaiting│ ← 被唤醒后 ⚠️ 监控点2                      │
│  └────────┬────────────┘                                           │
│           ▼                                                         │
│  ┌─────────────────────────┐                                       │
│  │  处理唤醒事件(Timer等) │ ← 另一个耗时点                         │
│  └────────┬────────────────┘                                       │
│           ▼                                                         │
│  ┌────────────────┐                                                │
│  │ kCFRunLoopExit │ ← 退出                                         │
│  └────────────────┘                                                │
│                                                                     │
└────────────────────────────────────────────────────────────────────┘

关键点总结

组件 说明
RunLoop Observer 监控 BeforeSources 和 AfterWaiting 状态
信号量 用于超时检测,状态变化时发送信号
堆栈采样 使用 thread_get_state 获取寄存器,手动回溯栈帧
符号化 使用 dladdr 将地址转换为符号
阈值设置 通常 100ms 作为卡顿阈值,16ms 为一帧

这个实现可以准确检测主线程卡顿,并捕获导致卡顿的调用堆栈,帮助定位性能问题。

相关推荐
Digitally14 分钟前
如何不用 USB 线将 iPhone 照片传到电脑?
ios·电脑·iphone
Sim148012 小时前
iPhone将内置本地大模型,手机端AI实现0 token成本时代来临?
人工智能·ios·智能手机·iphone
Digitally14 小时前
如何将 iPad 上的照片传输到 U 盘(4 种解决方案)
ios·ipad
报错小能手17 小时前
ios开发方向——swift并发进阶核心 @MainActor 与 DispatchQueue.main 解析
开发语言·ios·swift
LcGero17 小时前
Cocos Creator 业务与原生通信详解
android·ios·cocos creator·游戏开发·jsb
ii_best17 小时前
lua语言开发脚本基础、mql命令库开发、安卓/ios基础开发教程,按键精灵新手工具
android·ios·自动化·编辑器
用户223586218202 天前
WebKit WebPage API 的引入尝试与自研实现
ios
啦啦啦!2 天前
ChatGPT和Gemini的接入和封装
人工智能·ios·chatgpt
报错小能手2 天前
ios开发方向——swift并发进阶核心 async/await 详解
开发语言·ios·swift
开心就好20252 天前
HTTPS超文本传输安全协议全面解析与工作原理
后端·ios