基于Android P版本分析
TimeCheck
css
// A class monitoring execution time for a code block (scoped variable) and causing an assert
// if it exceeds a certain time
监视代码块(作用域变量)执行时间并在超过特定时间时引起断言的类。即用于检测Binder引用超时;
TimeCheck 实现
arduino
#ifndef ANDROID_TIME_CHECK_H
#define ANDROID_TIME_CHECK_H
#include <utils/KeyedVector.h>
#include <utils/Thread.h>
namespace android {
// A class monitoring execution time for a code block (scoped variable) and causing an assert
// if it exceeds a certain time
class TimeCheck {
public:
// The default timeout is chosen to be less than system server watchdog timeout
// 默认的超时检测时间,TimeOut = 5s
static constexpr uint32_t kDefaultTimeOutMs = 5000;
TimeCheck(const char *tag, uint32_t timeoutMs = kDefaultTimeOutMs);
~TimeCheck();
private:
// 内部类,继承自Thread
class TimeCheckThread : public Thread {
public:
TimeCheckThread() {}
virtual ~TimeCheckThread() override;
// 开始检测,返回的是一个nsecs_t类型的时间节点
nsecs_t startMonitoring(const char *tag, uint32_t timeoutMs);
// 停止检测
void stopMonitoring(nsecs_t endTimeNs);
private:
// RefBase
// 即强指针修饰TimeCheckThread的时候,就会直接开启该线程
virtual void onFirstRef() override { run("TimeCheckThread", PRIORITY_URGENT_AUDIO); }
// Thread
virtual bool threadLoop() override;
Condition mCond;
Mutex mMutex;
// using the end time in ns as key is OK given the risk is low that two entries
// are added in such a way that <add time> + <timeout> are the same for both.
// 使用 Ns 中的结束时间作为键是可以的,因为两个条目以<add time> + <timeout>的方式添加的风险很低。
KeyedVector< nsecs_t, const char*> mMonitorRequests;
};
// 获取TimeCheckThread线程
static sp<TimeCheckThread> getTimeCheckThread();
// 结束时间节点
const nsecs_t mEndTimeNs;
};
}; // namespace android
#endif // ANDROID_TIME_CHECK_H
scss
#include <media/TimeCheck.h>
namespace android {
/* static */
// 获取TimeCheckThread线程,其实质上就是创建TimeCheckThread实例
sp<TimeCheck::TimeCheckThread> TimeCheck::getTimeCheckThread()
{
static sp<TimeCheck::TimeCheckThread> sTimeCheckThread = new TimeCheck::TimeCheckThread();
return sTimeCheckThread;
}
// 在TimeCheck的构造函数中,直接调用了TimeCheckThread的startMonitoring函数开始监听计时
TimeCheck::TimeCheck(const char *tag, uint32_t timeoutMs)
// timeoutMs默认为kDefaultTimeOutMs
: mEndTimeNs(getTimeCheckThread()->startMonitoring(tag, timeoutMs))
{
}
TimeCheck::~TimeCheck() {
// 析构函数中停止监听
getTimeCheckThread()->stopMonitoring(mEndTimeNs);
}
TimeCheck::TimeCheckThread::~TimeCheckThread()
{
AutoMutex _l(mMutex);
// 通知TimeCheckThread退出
requestExit();
// 清空mMonitorRequests集合
mMonitorRequests.clear();
// 唤醒等待mCond条件变量的TimeCheckThread(mCond.waitRelative),线程正常退出
mCond.signal();
}
nsecs_t TimeCheck::TimeCheckThread::startMonitoring(const char *tag, uint32_t timeoutMs) {
Mutex::Autolock _l(mMutex);
// 系统当前时间 + 超时监听时长(毫秒化)
nsecs_t endTimeNs = systemTime() + milliseconds(timeoutMs);
// 初始状态下,默认mMonitorRequests集合中应该是不包含endTimeNs对应的监听线程Tag
for (; mMonitorRequests.indexOfKey(endTimeNs) >= 0; ++endTimeNs);
// for循环结束之后,将需要监听的线程Tag以及对应的时间节点以键值对的形式保存
mMonitorRequests.add(endTimeNs, tag);
mCond.signal();
return endTimeNs;
}
void TimeCheck::TimeCheckThread::stopMonitoring(nsecs_t endTimeNs) {
Mutex::Autolock _l(mMutex);
mMonitorRequests.removeItem(endTimeNs);
mCond.signal();
}
bool TimeCheck::TimeCheckThread::threadLoop()
{
status_t status = TIMED_OUT;
const char *tag;
{
AutoMutex _l(mMutex);
if (exitPending()) {
return false;
}
nsecs_t endTimeNs = INT64_MAX;
// KeyedVector mMonitorRequests is ordered so take first entry as next timeout
// KeyedVector mMonitorRequests是有序的,因此将第一个条目作为下一个超时
if (mMonitorRequests.size() != 0) { // 开始start监听之后,就传入了一对数值,不为空
endTimeNs = mMonitorRequests.keyAt(0); // 获取startMonitoring函数中计算出来的值
tag = mMonitorRequests.valueAt(0);
}
// 计算需要等待的时长
const nsecs_t waitTimeNs = endTimeNs - systemTime();
if (waitTimeNs > 0) {
// waitRelative()与和wait()的区别是,会有一个等待超时时间,到了时间没有获得该条件变量也会返回,可通过返回值判断结果
status = mCond.waitRelative(mMutex, waitTimeNs);
}
}
// 根据status的返回值来判断是否需要输出一个assert断言
LOG_ALWAYS_FATAL_IF(status != NO_ERROR, "TimeCheck timeout for %s", tag);
return true;
}
}; // namespace android
TimeCheck 使用
在了解TimeCheck的使用之前,我们先了解一下Binder的概念,因为TimeCheck是基于Binder的调用实现的;
针对TimeCheck的使用,在BnAudioPolicyService中定义;
arduino
status_t BnAudioPolicyService::onTransact(
uint32_t code, const Parcel& data, Parcel* reply, uint32_t flags)
{
..................
char timeCheckString[64];
snprintf(timeCheckString, sizeof(timeCheckString), "IAudioPolicyService: %d", code);
// 创建一个TimeCheck实例,其中传入的timeCheckString为 tag 参数
// check这个函数,目前没有定位到在哪儿定义的,是如何实现的,如何调用到TimeCheck构造函数中去的
// 同时也没有找到在哪调用的TimeCheck的析构函数
TimeCheck check(timeCheckString);
switch (code) {
case SET_DEVICE_CONNECTION_STATE: {
CHECK_INTERFACE(IAudioPolicyService, data, reply);
audio_devices_t device =
static_cast <audio_devices_t>(data.readInt32());
audio_policy_dev_state_t state =
static_cast <audio_policy_dev_state_t>(data.readInt32());
const char *device_address = data.readCString();
const char *device_name = data.readCString();
if (device_address == nullptr || device_name == nullptr) {
ALOGE("Bad Binder transaction: SET_DEVICE_CONNECTION_STATE for device %u", device);
reply->writeInt32(static_cast<int32_t> (BAD_VALUE));
} else {
reply->writeInt32(static_cast<uint32_t> (setDeviceConnectionState(device,
state,
device_address,
device_name)));
}
return NO_ERROR;
} break;
..................
}
}
在BnAudioPolicyService的onTransact函数开始,创建TimeCheck实例,开启TimeCheckThread线程,然后执行后续的switch过程;
在onTransact结束的时候,TimeCheck的析构函数会自动执行,其中会调用mCond.signal()去唤醒TimeCheckThread,status返回NO_ERROR,线程正常退出;
如果在设定的超时时间调用没有返回,即TimeCheck没有及时析构,那么到了waitRelative就回返回一个TIMED_OUT的状态;
TimeCheck 使用场景
目前在源码中找到了两处使用:
- AudioFlinger
c
status_t BnAudioFlinger::onTransact(
uint32_t code, const Parcel& data, Parcel* reply, uint32_t flags)
{
........................
char timeCheckString[64];
snprintf(timeCheckString, sizeof(timeCheckString), "IAudioFlinger: %d", code);
TimeCheck check(timeCheckString);
........................
}
- AudioPolicyService
c
status_t BnAudioPolicyService::onTransact(
uint32_t code, const Parcel& data, Parcel* reply, uint32_t flags)
{
..................
char timeCheckString[64];
snprintf(timeCheckString, sizeof(timeCheckString), "IAudioPolicyService: %d", code);
TimeCheck check(timeCheckString);
........................
}
这两处都严格限制了跨进程指令执行时长,一旦超过5s,就会提示异常;
异常
backtrace --- 1
less
Build fingerprint: 'Android/sa8155_v35_b16/sa8155_v35_b16:9/PQ1A.190105.004/285:userdebug/test-keys'
Revision: '0'
ABI: 'arm'
pid: 681, tid: 1280, name: TimeCheckThread >>> /system/bin/audioserver <<<
signal 6 (SIGABRT), code -6 (SI_TKILL), fault addr --------
Abort message: 'TimeCheck timeout for IAudioPolicyService: 6'
r0 00000000 r1 00000500 r2 00000006 r3 efa62830
r4 000002a9 r5 00000500 r6 ecf854a4 r7 0000010c
r8 ff8a73f4 r9 ef31e158 r10 3b9aca00 r11 ef8dcd69
ip 7fffffff sp ecf85490 lr ef9ee105 pc ef9e5e92
backtrace:
#00 pc 0001ce92 /system/lib/libc.so (abort+62)
#01 pc 00006dd5 /system/lib/liblog.so (__android_log_assert+156)
#02 pc 0000e233 /system/lib/libmedia_helper.so (android::TimeCheck::TimeCheckThread::threadLoop()+270)
#03 pc 0000c1bf /system/lib/libutils.so (android::Thread::_threadLoop(void*)+286)
#04 pc 00063c85 /system/lib/libc.so (__pthread_start(void*)+22)
#05 pc 0001e085 /system/lib/libc.so (__start_thread+22)
- pid: 681:异常进程;
- tid: 1280:异常线程;
- code -6:对应code = 6,对应BnAudioPolicyService::onTransact()函数中code参数,本质上对应的就是Binder指令,在B16项目中,code = 6 = SET_FORCE_USE;
- Abort message:对应的就是LOG_ALWAYS_FATAL_IF中的提示message;
- backtrace:TimeCheckThread报错源码位置(回溯);
backtrace --- 2
rust
backtrace:
#00 pc 00019d74 /system/lib/libc.so (syscall+28)
#01 pc 0001d235 /system/lib/libc.so (__futex_wait_ex(void volatile*, bool, int, bool, timespec const*)+88)
#02 pc 000647fd /system/lib/libc.so (NonPI::MutexLockWithTimeout(pthread_mutex_internal_t*, bool, timespec const*)+156)
#03 pc 0000a671 /system/lib/libaudiopolicyservice.so (android::AudioPolicyService::AudioCommandThread::sendCommand(android::sp<android::AudioPolicyService::AudioCommandThread::AudioCommand>&, int)+28)
#04 pc 0000a883 /system/lib/libaudiopolicyservice.so (android::AudioPolicyService::AudioCommandThread::volumeCommand(audio_stream_type_t, float, int, int)+126)
#05 pc 00033fc3 /system/lib/libaudiopolicymanagerdefault.so (android::SwAudioOutputDescriptor::setVolume(float, audio_stream_type_t, unsigned int, unsigned int, bool)+142)
#06 pc 0000ae83 /system/lib/libaudiopolicymanager.so (android::AudioPolicyManagerCustom::checkAndSetVolume(audio_stream_type_t, int, android::sp<android::AudioOutputDescriptor> const&, unsigned int, int, bool)+222)
#07 pc 0001f999 /system/lib/libaudiopolicymanagerdefault.so (android::AudioPolicyManager::applyStreamVolumes(android::sp<android::AudioOutputDescriptor> const&, unsigned int, int, bool)+52)
#08 pc 0000a64f /system/lib/libaudiopolicymanager.so (android::AudioPolicyManagerCustom::setForceUse(audio_policy_force_use_t, audio_policy_forced_cfg_t)+566)
#09 pc 0000db81 /system/lib/libaudiopolicyservice.so (android::AudioPolicyService::setForceUse(audio_policy_force_use_t, audio_policy_forced_cfg_t)+66)
#10 pc 00040bd9 /system/lib/libaudioclient.so (android::BnAudioPolicyService::onTransact(unsigned int, android::Parcel const&, android::Parcel*, unsigned int)+4336)
#11 pc 000361cf /system/lib/libbinder.so (android::BBinder::transact(unsigned int, android::Parcel const&, android::Parcel*, unsigned int)+70)
#12 pc 0003da17 /system/lib/libbinder.so (android::IPCThreadState::executeCommand(int)+410)
#13 pc 0003d7a3 /system/lib/libbinder.so (android::IPCThreadState::getAndExecuteCommand()+106)
#14 pc 0003dccb /system/lib/libbinder.so (android::IPCThreadState::joinThreadPool(bool)+38)
#15 pc 0000358d /system/bin/audioserver (main+568)
#16 pc 0008bdf9 /system/lib/libc.so (__libc_init+48)
#17 pc 00003313 /system/bin/audioserver (_start_main+46)
#18 pc 00019a27 /system/bin/linker (__dl__ZNSt3__112__hash_tableINS_17__hash_value_typeIjP6soinfoEENS_22__unordered_map_hasherIjS4_NS_4hashIjEELb1EEENS_21__unordered_map_equalIjS4_NS_8equal_toIjEELb1EEENS_9allocatorIS4_EEE14__erase_uniqueIjEEjRKT_+90)
#19 pc 00020b0a [stack:ff887000]
在dropbox文件中,存在两处backtrace,这个为第二处,代表了真正导致异常的逻辑;
我们可以看到,在调用setForceUse函数的时候发生了MutexLockWithTimeout,导致异常;
arduino
03-24 18:38:05.546 681 681 D AudioPolicyManagerCustom: setForceUse() usage 0, config 1, mPhoneState 2
在 18:38:05.546 时刻调用了setForceUse之后,一直没有return,导致的TimeOut,但是不是这个函数调用导致的,目前不能断定;同时后续所有的Binder调用都无法返回,例如setPhoneState;
backtrace --- 3
rust
pid: 681, tid: 875, name: HwBinder:681_1 >>> /system/bin/audioserver <<<
r0 00000004 r1 c0306201 r2 ee95d878 r3 ee95d874
r4 ee303000 r5 ee95d878 r6 00000000 r7 00000036
r8 ee303058 r9 c0306201 r10 ee95d898 r11 ee30309c
ip ee95d880 sp ee95d860 lr ef9eac35 pc efa1cf7c
backtrace:
#00 pc 00053f7c /system/lib/libc.so (__ioctl+8)
#01 pc 00021c31 /system/lib/libc.so (ioctl+36)
#02 pc 00015def /system/lib/libhwbinder.so (android::hardware::IPCThreadState::talkWithDriver(bool)+190)
#03 pc 0000f0b3 /system/lib/libhwbinder.so (android::hardware::IPCThreadState::getAndExecuteCommand()+10)
#04 pc 0000f33d /system/lib/libhwbinder.so (android::hardware::IPCThreadState::joinThreadPool(bool)+188)
#05 pc 00015491 /system/lib/libhwbinder.so (android::hardware::PoolThread::threadLoop()+12)
#06 pc 0000c147 /system/lib/libutils.so (android::Thread::_threadLoop(void*)+166)
#07 pc 00063c85 /system/lib/libc.so (__pthread_start(void*)+22)
#08 pc 0001e085 /system/lib/libc.so (__start_thread+22)
一般出现这种情况,大概率是Binder机制出现了问题,但是具体是因为什么导致的,这个暂时分析不出来;