这是一个介绍 Android 平台日志系统的系列文章:
- Android 平台日志系统整体框架
- logd 守护进程初始化过程
- 客户端写日志过程分析
- logd 写日志过程分析一
- logd 写日志过程分析二(本文)
- 冗余日志处理过程分析
- logcat 读日志过程分析
- logd 读日志过程分析
本文基于 AOSP android-10.0.0_r41 版本讲解
1. SocketListener 读取 App 发送的日志信息
上一篇,我们讲到 SocketListener 收到 App 发送过出来的日志信息,接着会调用 onDataAvailable() 来处理收到的数据:
cpp
// system/core/logd/LogListener.cpp
bool LogListener::onDataAvailable(SocketClient* cli) {
//prctl 系统调用用于设置进程相关的一些东西。这里使用 PR_SET_NAME 设置了线程的名字为 logd.writer
static bool name_set;
if (!name_set) {
prctl(PR_SET_NAME, "logd.writer");
name_set = true;
}
// + 1 to ensure null terminator if MAX_PAYLOAD buffer is received
char buffer[sizeof_log_id_t + sizeof(uint16_t) + sizeof(log_time) +
LOGGER_ENTRY_MAX_PAYLOAD + 1];
struct iovec iov = { buffer, sizeof(buffer) - 1 };
alignas(4) char control[CMSG_SPACE(sizeof(struct ucred))];
struct msghdr hdr = {
nullptr, 0, &iov, 1, control, sizeof(control), 0,
};
int socket = cli->getSocket();
// 接下来使用 recvmsg 从 socket 里读取数据
// To clear the entire buffer is secure/safe, but this contributes to 1.68%
// overhead under logging load. We are safe because we check counts, but
// still need to clear null terminator
// memset(buffer, 0, sizeof(buffer));
ssize_t n = recvmsg(socket, &hdr, 0);
if (n <= (ssize_t)(sizeof(android_log_header_t))) {
return false;
}
buffer[n] = 0;
// 由于我们创建 socket 时打开了 SO_PASSCRED 选项,这里我们可以读取一个用于表示客户端身份的 struct ucred
struct ucred* cred = nullptr;
struct cmsghdr* cmsg = CMSG_FIRSTHDR(&hdr);
while (cmsg != nullptr) {
if (cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SCM_CREDENTIALS) {
cred = (struct ucred*)CMSG_DATA(cmsg);
break;
}
cmsg = CMSG_NXTHDR(&hdr, cmsg);
}
struct ucred fake_cred;
if (cred == nullptr) {
cred = &fake_cred;
cred->pid = 0;
cred->uid = DEFAULT_OVERFLOWUID;
}
if (cred->uid == AID_LOGD) {
// ignore log messages we send to ourself.
// Such log messages are often generated by libraries we depend on
// which use standard Android logging.
return false;
}
// 如果 struct ucred 内部有一些特殊值,做单独处理
android_log_header_t* header =
reinterpret_cast<android_log_header_t*>(buffer);
log_id_t logId = static_cast<log_id_t>(header->id);
if (/* logId < LOG_ID_MIN || */ logId >= LOG_ID_MAX ||
logId == LOG_ID_KERNEL) {
return false;
}
if ((logId == LOG_ID_SECURITY) &&
(!__android_log_security() ||
!clientHasLogCredentials(cred->uid, cred->gid, cred->pid))) {
return false;
}
// Check credential validity, acquire corrected details if not supplied.
if (cred->pid == 0) {
cred->pid = logbuf ? logbuf->tidToPid(header->tid)
: android::tidToPid(header->tid);
if (cred->pid == getpid()) {
// We expect that /proc/<tid>/ is accessible to self even without
// readproc group, so that we will always drop messages that come
// from any of our logd threads and their library calls.
return false; // ignore self
}
}
if (cred->uid == DEFAULT_OVERFLOWUID) {
uid_t uid =
logbuf ? logbuf->pidToUid(cred->pid) : android::pidToUid(cred->pid);
if (uid == AID_LOGD) {
uid = logbuf ? logbuf->pidToUid(header->tid)
: android::pidToUid(cred->pid);
}
if (uid != AID_LOGD) cred->uid = uid;
}
char* msg = ((char*)buffer) + sizeof(android_log_header_t);
n -= sizeof(android_log_header_t);
// NB: hdr.msg_flags & MSG_TRUNC is not tested, silently passing a
// truncated message to the logs.
// 调用 logbuf->log 将数据写入 LogBuffer
if (logbuf != nullptr) {
int res = logbuf->log(
logId, header->realtime, cred->uid, cred->pid, header->tid, msg,
((size_t)n <= UINT16_MAX) ? (uint16_t)n : UINT16_MAX);
// 此时可能有客户端在等待读取数据,于是也调用 reader->notifyNewLog()
if (res > 0 && reader != nullptr) {
reader->notifyNewLog(static_cast<log_mask_t>(1 << logId));
}
}
return true;
}
函数主要流程如下:
- prctl 系统调用用于设置进程相关的一些东西。这里使用 PR_SET_NAME 设置了线程的名字为 logd.writer
- 由于我们创建 socket 时打开了 SO_PASSCRED 选项,这里我们可以读取一个用于表示客户端身份的 struct ucred
- 接下来,如果 struct ucred 内部有一些特殊值,做单独处理
- 调用 logbuf->log 将数据写入 LogBuffer
- 此时可能有客户端在等待读取数据,于是也调用 reader->notifyNewLog()
2. 写入 LogBuffer
log 的写入分为 4 个步骤:
- 根据日志的内容对数据做调整,数据不对,就直接 return
- 使用一个状态机去除重复的 log
- 写入 log
- 如果需要,删除一些 log 以避免 log 数据过多
接下来分步查看源码:
根据 tag 和优先级判断该 log 是否可以写入
cpp
// system/core/logd/LogBuffer.cpp
int LogBuffer::log(log_id_t log_id, log_time realtime, uid_t uid, pid_t pid,
pid_t tid, const char* msg, uint16_t len) {
if (log_id >= LOG_ID_MAX) {
return -EINVAL;
}
// Slip the time by 1 nsec if the incoming lands on xxxxxx000 ns.
// This prevents any chance that an outside source can request an
// exact entry with time specified in ms or us precision.
if ((realtime.tv_nsec % 1000) == 0) ++realtime.tv_nsec;
// LogBufferElement 用于保存一条日志信息
LogBufferElement* elem =
new LogBufferElement(log_id, realtime, uid, pid, tid, msg, len);
// 根据数据内容,调整数据
if (log_id != LOG_ID_SECURITY) {
int prio = ANDROID_LOG_INFO;
const char* tag = nullptr;
size_t tag_len = 0;
if (log_id == LOG_ID_EVENTS || log_id == LOG_ID_STATS) {
tag = tagToName(elem->getTag());
if (tag) {
tag_len = strlen(tag);
}
} else {
prio = *msg;
tag = msg + 1;
tag_len = strnlen(tag, len - 1);
}
if (!__android_log_is_loggable_len(prio, tag, tag_len,
ANDROID_LOG_VERBOSE)) {
// Log traffic received to total
wrlock();
stats.addTotal(elem);
unlock();
delete elem;
return -EACCES;
}
}
// ......
}
使用一个状态机去除重复的 log
cpp
int LogBuffer::log(log_id_t log_id, log_time realtime, uid_t uid, pid_t pid,
pid_t tid, const char* msg, uint16_t len) {
//......
wrlock();
LogBufferElement* currentLast = lastLoggedElements[log_id];
if (currentLast) {
LogBufferElement* dropped = droppedElements[log_id];
uint16_t count = dropped ? dropped->getDropped() : 0;
//
// State Init
// incoming:
// dropped = nullptr
// currentLast = nullptr;
// elem = incoming message
// outgoing:
// dropped = nullptr -> State 0
// currentLast = copy of elem
// log elem
// State 0
// incoming:
// count = 0
// dropped = nullptr
// currentLast = copy of last message
// elem = incoming message
// outgoing: if match != DIFFERENT
// dropped = copy of first identical message -> State 1
// currentLast = reference to elem
// break: if match == DIFFERENT
// dropped = nullptr -> State 0
// delete copy of last message (incoming currentLast)
// currentLast = copy of elem
// log elem
// State 1
// incoming:
// count = 0
// dropped = copy of first identical message
// currentLast = reference to last held-back incoming
// message
// elem = incoming message
// outgoing: if match == SAME
// delete copy of first identical message (dropped)
// dropped = reference to last held-back incoming
// message set to chatty count of 1 -> State 2
// currentLast = reference to elem
// outgoing: if match == SAME_LIBLOG
// dropped = copy of first identical message -> State 1
// take sum of currentLast and elem
// if sum overflows:
// log currentLast
// currentLast = reference to elem
// else
// delete currentLast
// currentLast = reference to elem, sum liblog.
// break: if match == DIFFERENT
// delete dropped
// dropped = nullptr -> State 0
// log reference to last held-back (currentLast)
// currentLast = copy of elem
// log elem
// State 2
// incoming:
// count = chatty count
// dropped = chatty message holding count
// currentLast = reference to last held-back incoming
// message.
// dropped = chatty message holding count
// elem = incoming message
// outgoing: if match != DIFFERENT
// delete chatty message holding count
// dropped = reference to last held-back incoming
// message, set to chatty count + 1
// currentLast = reference to elem
// break: if match == DIFFERENT
// log dropped (chatty message)
// dropped = nullptr -> State 0
// log reference to last held-back (currentLast)
// currentLast = copy of elem
// log elem
//
enum match_type match = identical(elem, currentLast);
if (match != DIFFERENT) {
if (dropped) {
// Sum up liblog tag messages?
if ((count == 0) /* at Pass 1 */ && (match == SAME_LIBLOG)) {
android_log_event_int_t* event =
reinterpret_cast<android_log_event_int_t*>(
const_cast<char*>(currentLast->getMsg()));
//
// To unit test, differentiate with something like:
// event->header.tag = htole32(CHATTY_LOG_TAG);
// here, then instead of delete currentLast below,
// log(currentLast) to see the incremental sums form.
//
uint32_t swab = event->payload.data;
unsigned long long total = htole32(swab);
event = reinterpret_cast<android_log_event_int_t*>(
const_cast<char*>(elem->getMsg()));
swab = event->payload.data;
lastLoggedElements[LOG_ID_EVENTS] = elem;
total += htole32(swab);
// check for overflow
if (total >= UINT32_MAX) {
log(currentLast);
unlock();
return len;
}
stats.addTotal(currentLast);
delete currentLast;
swab = total;
event->payload.data = htole32(swab);
unlock();
return len;
}
if (count == USHRT_MAX) {
log(dropped);
count = 1;
} else {
delete dropped;
++count;
}
}
if (count) {
stats.addTotal(currentLast);
currentLast->setDropped(count);
}
droppedElements[log_id] = currentLast;
lastLoggedElements[log_id] = elem;
unlock();
return len;
}
if (dropped) { // State 1 or 2
if (count) { // State 2
log(dropped); // report chatty
} else { // State 1
delete dropped;
}
droppedElements[log_id] = nullptr;
log(currentLast); // report last message in the series
} else { // State 0
delete currentLast;
}
}
lastLoggedElements[log_id] = new LogBufferElement(*elem);
// .....
这里使用一个状态机来去除重复的 log,知道流程即可,有兴趣的同学可以参考源码中的注释自行分析。
写入 log
cpp
int LogBuffer::log(log_id_t log_id, log_time realtime, uid_t uid, pid_t pid,
pid_t tid, const char* msg, uint16_t len) {
//......
log(elem);
unlock();
return len;
}
这里调用另一个重载 log:
cpp
// assumes LogBuffer::wrlock() held, owns elem, look after garbage collection
void LogBuffer::log(LogBufferElement* elem) {
// cap on how far back we will sort in-place, otherwise append
static uint32_t too_far_back = 5; // five seconds
// Insert elements in time sorted order if possible
// NB: if end is region locked, place element at end of list
LogBufferElementCollection::iterator it = mLogElements.end();
LogBufferElementCollection::iterator last = it;
if (__predict_true(it != mLogElements.begin())) --it;
if (__predict_false(it == mLogElements.begin()) ||
__predict_true((*it)->getRealTime() <= elem->getRealTime()) ||
__predict_false((((*it)->getRealTime().tv_sec - too_far_back) >
elem->getRealTime().tv_sec) &&
(elem->getLogId() != LOG_ID_KERNEL) &&
((*it)->getLogId() != LOG_ID_KERNEL))) {
mLogElements.push_back(elem);
} else {
log_time end(log_time::EPOCH);
bool end_set = false;
bool end_always = false;
LogTimeEntry::rdlock();
LastLogTimes::iterator times = mTimes.begin();
while (times != mTimes.end()) {
LogTimeEntry* entry = times->get();
if (!entry->mNonBlock) {
end_always = true;
break;
}
// it passing mEnd is blocked by the following checks.
if (!end_set || (end <= entry->mEnd)) {
end = entry->mEnd;
end_set = true;
}
times++;
}
if (end_always || (end_set && (end > (*it)->getRealTime()))) {
mLogElements.push_back(elem);
} else {
// should be short as timestamps are localized near end()
do {
last = it;
if (__predict_false(it == mLogElements.begin())) {
break;
}
--it;
} while (((*it)->getRealTime() > elem->getRealTime()) &&
(!end_set || (end <= (*it)->getRealTime())));
mLogElements.insert(last, elem);
}
LogTimeEntry::unlock();
}
stats.add(elem);
maybePrune(elem->getLogId());
}
__predict_true 和 __predict_false 用来提示编译器对应的判断很可能是 true/false,类似于 Linux 内核的 likely/unlikely。如果判断正确,可以得到很大的性能提升。
客户端使用 Unix-domain socket 来写入 log,读出来的时候,很可能就已经按时间顺序排好,这个时候,只需要把 LogBufferElement 插入列表末尾就可以了。
万一很不幸的,上面的判断失败了,就只能从后往前遍历列表,找到一个合适的位置来插入 LogBufferElement。这个过程类似于插入排序。
如果需要,删除一些 log 以避免 log 数据过多
void LogBuffer::log(LogBufferElement* elem)
在最后会调用 maybePrune,根据 log 总量判断是否需要删除一些 log。
cpp
// system/core/logd/LogBuffer.cpp
// Prune at most 10% of the log entries or maxPrune, whichever is less.
//
// LogBuffer::wrlock() must be held when this function is called.
void LogBuffer::maybePrune(log_id_t id) {
size_t sizes = stats.sizes(id);
unsigned long maxSize = log_buffer_size(id);
if (sizes > maxSize) {
size_t sizeOver = sizes - ((maxSize * 9) / 10);
size_t elements = stats.realElements(id);
size_t minElements = elements / 100;
if (minElements < minPrune) {
minElements = minPrune;
}
unsigned long pruneRows = elements * sizeOver / sizes;
if (pruneRows < minElements) {
pruneRows = minElements;
}
if (pruneRows > maxPrune) {
pruneRows = maxPrune;
}
prune(id, pruneRows);
}
}
如果 id 类型的 log 超过了总量限制,就删除 10% 的 log。在这个前提下,所删除的 log 调试大于 minElements 和 minPrune,小于 maxPrune。其中,minElements 是所有 id 类型的 log 的总条数的百分之一。
当每条 log 都很大、log 总量又很小,限制最小值可以避免总是需要剔除旧 log。如果 log 每条很小,限制最大数目可以避免删除过多的 log。
实际的删除工作由 prune 方法完成,这个我们会在冗余日志处理过程分析日志部分来讲解。