macos的信息采集器appledataHarvester-2

1.src/LogSource/ProcessMonitor.cpp

#include "ProcessMonitor.hpp"

#include "Utils/Logger.hpp"

#include <array>

#include <memory>

#include <cstdio>

#include <sys/sysctl.h>

#include <libproc.h>

#include <pwd.h>

#include <grp.h>

#include <regex>

#include <sstream>

#include <fstream>

class ProcessMonitor::Impl {

public:

Impl() = default;

~Impl() = default;

};

ProcessMonitor::ProcessMonitor(const std::vector<std::string>& monitoredProcesses)

: pImpl_(std::make_unique<Impl>())

, monitoredProcesses_(monitoredProcesses) {

if (monitoredProcesses.empty() ||

(monitoredProcesses.size() == 1 && monitoredProcesses0 == "*")) {

includeAllProcesses_ = true;

LOG_INFO("ProcessMonitor initialized - monitoring all processes");

} else {

LOG_INFO("ProcessMonitor initialized - monitoring {} processes",

monitoredProcesses.size());

}

}

ProcessMonitor::~ProcessMonitor() {

LOG_INFO("ProcessMonitor destroyed");

}

std::vector<nlohmann::json> ProcessMonitor::collectProcesses() {

std::vector<nlohmann::json> processes;

try {

LOG_DEBUG("Starting process collection");

// 执行ps命令获取进程列表

std::string psOutput = executePSCommand("-eo pid,ppid,user,%cpu,%mem,command");

// 解析输出

processes = parsePSOutput(psOutput);

// 更新统计信息

updateStats(processes);

LOG_INFO("Collected {} processes", processes.size());

} catch (const std::exception& e) {

LOG_ERROR("Failed to collect processes: {}", e.what());

}

return processes;

}

std::string ProcessMonitor::executePSCommand(const std::string& options) const {

std::string command = "ps " + options + " 2>/dev/null";

LOG_DEBUG("Executing command: {}", command);

std::array<char, 128> buffer;

std::string result;

FILE* pipe = popen(command.c_str(), "r");

if (!pipe) {

LOG_ERROR("Failed to execute ps command");

return "";

}

try {

while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) {

result += buffer.data();

}

} catch (...) {

pclose(pipe);

throw;

}

int status = pclose(pipe);

if (status != 0) {

LOG_WARN("PS command exited with status {}", status);

}

return result;

}

std::vector<nlohmann::json> ProcessMonitor::parsePSOutput(const std::string& output) const {

std::vector<nlohmann::json> processes;

std::istringstream stream(output);

std::string line;

bool isFirstLine = true;

while (std::getline(stream, line)) {

if (isFirstLine) {

isFirstLine = false;

continue; // 跳过标题行

}

if (line.empty()) continue;

try {

std::istringstream lineStream(line);

std::string pidStr, ppidStr, user, cpuStr, memStr, command;

lineStream >> pidStr >> ppidStr >> user >> cpuStr >> memStr;

// 获取命令(可能包含空格)

std::getline(lineStream, command);

// 去除命令前后的空格

size_t start = command.find_first_not_of(" ");

size_t end = command.find_last_not_of(" ");

if (start != std::string::npos && end != std::string::npos) {

command = command.substr(start, end - start + 1);

}

pid_t pid = std::stoi(pidStr);

pid_t ppid = std::stoi(ppidStr);

float cpu = std::stof(cpuStr);

float mem = std::stof(memStr);

// 提取进程名

std::string processName = command;

size_t spacePos = command.find(' ');

if (spacePos != std::string::npos) {

processName = command.substr(0, spacePos);

}

// 提取基本名称(去掉路径)

size_t slashPos = processName.find_last_of('/');

if (slashPos != std::string::npos) {

processName = processName.substr(slashPos + 1);

}

// 检查是否应该监控此进程

if (!shouldMonitorProcess(processName)) {

continue;

}

// 获取详细信息

nlohmann::json processInfo = getDetailedProcessInfo(pid);

// 添加基本信息

processInfo"pid" = pid;

processInfo"ppid" = ppid;

processInfo"user" = user;

processInfo"cpu_percent" = cpu;

processInfo"memory_percent" = mem;

processInfo"command" = command;

processInfo"name" = processName;

processes.push_back(processInfo);

} catch (const std::exception& e) {

LOG_WARN("Failed to parse process line: {} - {}", line, e.what());

}

}

return processes;

}

nlohmann::json ProcessMonitor::getDetailedProcessInfo(pid_t pid) const {

nlohmann::json info;

try {

// 使用proc_pidinfo获取更多信息

struct proc_bsdinfo procInfo;

int ret = proc_pidinfo(pid, PROC_PIDTBSDINFO, 0, &procInfo, sizeof(procInfo));

if (ret > 0) {

info"start_time" = procInfo.pbi_start_tvsec;

info"status" = std::string(1, procInfo.pbi_status);

info"nice" = procInfo.pbi_nice;

info"flags" = procInfo.pbi_flags;

}

// 获取打开文件数

int fds4 = {CTL_KERN, KERN_PROC, KERN_PROC_NFDS, pid};

int numFds = 0;

size_t len = sizeof(numFds);

if (sysctl(fds, 4, &numFds, &len, NULL, 0) == 0) {

info"open_files" = numFds;

}

// 获取进程参数

char pathbufPROC_PIDPATHINFO_MAXSIZE;

if (proc_pidpath(pid, pathbuf, sizeof(pathbuf)) > 0) {

info"executable_path" = std::string(pathbuf);

}

// 获取进程参数

char argsARG_MAX;

int mib3 = {CTL_KERN, KERN_PROCARGS2, pid};

size_t argsSize = sizeof(args);

if (sysctl(mib, 3, args, &argsSize, NULL, 0) == 0) {

info"arguments" = std::string(args, argsSize);

}

// 获取进程环境

char env4096;

size_t envSize = sizeof(env);

int mibEnv3 = {CTL_KERN, KERN_PROCENV, pid};

if (sysctl(mibEnv, 3, env, &envSize, NULL, 0) == 0) {

info"environment_size" = envSize;

}

} catch (const std::exception& e) {

LOG_WARN("Failed to get detailed info for PID {}: {}", pid, e.what());

}

return info;

}

bool ProcessMonitor::shouldMonitorProcess(const std::string& processName) const {

if (includeAllProcesses_) {

return true;

}

for (const auto& monitored : monitoredProcesses_) {

if (monitored == "*" || monitored == processName) {

return true;

}

// 支持通配符匹配

if (monitored.find('*') != std::string::npos) {

std::string pattern = std::regex_replace(monitored,

std::regex("\\*"), ".*");

std::regex regexPattern(pattern);

if (std::regex_match(processName, regexPattern)) {

return true;

}

}

}

return false;

}

void ProcessMonitor::updateStats(const std::vector<nlohmann::json>& processes) {

stats_.totalProcesses = processes.size();

stats_.monitoredProcesses = processes.size(); // 简化处理

stats_.processCountByUser.clear();

stats_.activeProcesses = 0;

stats_.zombieProcesses = 0;

for (const auto& process : processes) {

std::string user = process.value("user", "unknown");

stats_.processCountByUseruser++;

std::string status = process.value("status", "?");

if (status == "Z") {

stats_.zombieProcesses++;

} else {

stats_.activeProcesses++;

}

}

}

nlohmann::json ProcessMonitor::getProcessInfo(pid_t pid) {

// 首先尝试从缓存获取

auto it = processCache_.find(pid);

if (it != processCache_.end()) {

return it->second;

}

// 重新获取信息

nlohmann::json info = getDetailedProcessInfo(pid);

// 更新缓存

processCache_pid = info;

return info;

}

std::vector<nlohmann::json> ProcessMonitor::getProcessesByName(

const std::string& name) {

std::vector<nlohmann::json> processes;

auto allProcesses = collectProcesses();

for (const auto& process : allProcesses) {

std::string processName = process.value("name", "");

if (processName.find(name) != std::string::npos) {

processes.push_back(process);

}

}

return processes;

}

void ProcessMonitor::startMonitoring(const std::string& processName) {

if (std::find(monitoredProcesses_.begin(), monitoredProcesses_.end(),

processName) == monitoredProcesses_.end()) {

monitoredProcesses_.push_back(processName);

LOG_INFO("Started monitoring process: {}", processName);

}

}

void ProcessMonitor::stopMonitoring(const std::string& processName) {

auto it = std::find(monitoredProcesses_.begin(), monitoredProcesses_.end(),

processName);

if (it != monitoredProcesses_.end()) {

monitoredProcesses_.erase(it);

LOG_INFO("Stopped monitoring process: {}", processName);

}

}

bool ProcessMonitor::isMonitoring(const std::string& processName) const {

return std::find(monitoredProcesses_.begin(), monitoredProcesses_.end(),

processName) != monitoredProcesses_.end();

}

ProcessMonitor::ProcessStats ProcessMonitor::getStats() const {

return stats_;

}

2.src/LogSource/UnifiedLogCollector.cpp

#include "UnifiedLogCollector.hpp"

#include "Utils/Logger.hpp"

#include <array>

#include <memory>

#include <cstdio>

#include <ctime>

#include <regex>

#include <sstream>

#include <iomanip>

class UnifiedLogCollector::Impl {

public:

Impl() = default;

~Impl() = default;

};

UnifiedLogCollector::UnifiedLogCollector()

: pImpl_(std::make_unique<Impl>()) {

LOG_INFO("UnifiedLogCollector initialized");

}

UnifiedLogCollector::~UnifiedLogCollector() {

LOG_INFO("UnifiedLogCollector destroyed");

}

std::vector<nlohmann::json> UnifiedLogCollector::collectLogs() {

std::vector<nlohmann::json> logs;

try {

LOG_DEBUG("Starting log collection");

// 构建log命令

std::string options = "--last " + std::to_string(timeRangeMinutes_) + "m";

if (!filterPredicate_.empty()) {

options += " --predicate '" + filterPredicate_ + "'";

}

// 执行命令

std::string command = "log show " + options +

" --style json --info --debug --error --fault";

std::string output = executeLogCommand();

// 解析输出

std::istringstream stream(output);

std::string line;

size_t entryCount = 0;

while (std::getline(stream, line) && entryCount < maxEntries_) {

if (!line.empty() && line.find('{') != std::string::npos) {

try {

auto logEntry = nlohmann::json::parse(line);

logs.push_back(logEntry);

entryCount++;

// 更新统计

updateStats(logEntry);

} catch (const nlohmann::json::parse_error& e) {

LOG_WARN("Failed to parse log line: {}", line);

}

}

}

stats_.totalEntries += logs.size();

stats_.lastCollectionTime = std::chrono::system_clock::now();

LOG_INFO("Collected {} log entries", logs.size());

} catch (const std::exception& e) {

LOG_ERROR("Failed to collect logs: {}", e.what());

}

return logs;

}

std::vector<nlohmann::json> UnifiedLogCollector::collectProcessLogs(

const std::string& processName) {

std::string oldFilter = filterPredicate_;

setFilter("process == \"" + processName + "\"");

auto logs = collectLogs();

filterPredicate_ = oldFilter;

return logs;

}

std::vector<nlohmann::json> UnifiedLogCollector::collectSubsystemLogs(

const std::string& subsystem) {

std::string oldFilter = filterPredicate_;

setFilter("subsystem == \"" + subsystem + "\"");

auto logs = collectLogs();

filterPredicate_ = oldFilter;

return logs;

}

void UnifiedLogCollector::setFilter(const std::string& filter) {

filterPredicate_ = filter;

LOG_DEBUG("Log filter set to: {}", filter);

}

void UnifiedLogCollector::setTimeRange(int minutesAgo) {

if (minutesAgo > 0 && minutesAgo <= 1440) { // 最多24小时

timeRangeMinutes_ = minutesAgo;

LOG_DEBUG("Time range set to last {} minutes", minutesAgo);

}

}

void UnifiedLogCollector::setMaxEntries(int maxEntries) {

if (maxEntries > 0 && maxEntries <= 10000) {

maxEntries_ = maxEntries;

LOG_DEBUG("Max entries set to {}", maxEntries);

}

}

std::string UnifiedLogCollector::executeLogCommand(

const std::string& predicate,

const std::string& options) {

std::string command = "log show";

if (!options.empty()) {

command += " " + options;

} else {

command += " --last " + std::to_string(timeRangeMinutes_) + "m";

}

if (!predicate.empty()) {

command += " --predicate '" + predicate + "'";

} else if (!filterPredicate_.empty()) {

command += " --predicate '" + filterPredicate_ + "'";

}

command += " 2>/dev/null";

LOG_DEBUG("Executing command: {}", command);

std::array<char, 128> buffer;

std::string result;

FILE* pipe = popen(command.c_str(), "r");

if (!pipe) {

LOG_ERROR("Failed to execute log command");

return "";

}

try {

while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) {

result += buffer.data();

}

} catch (...) {

pclose(pipe);

throw;

}

int status = pclose(pipe);

if (status != 0) {

LOG_WARN("Log command exited with status {}", status);

}

return result;

}

nlohmann::json UnifiedLogCollector::parseLogLine(const std::string& line) {

nlohmann::json entry;

try {

// 尝试解析为JSON

entry = nlohmann::json::parse(line);

} catch (const nlohmann::json::parse_error&) {

// 如果不是JSON,创建简单条目

entry = {

{"raw_message", line},

{"timestamp", std::chrono::duration_cast<std::chrono::milliseconds>(

std::chrono::system_clock::now().time_since_epoch()).count()},

{"source", "unifiedlog"}

};

}

return entry;

}

void UnifiedLogCollector::updateStats(const nlohmann::json& entry) {

if (entry.contains("eventType")) {

std::string eventType = entry"eventType";

if (eventType == "errorEvent" || eventType == "faultEvent") {

stats_.errorEntries++;

} else if (eventType == "debugEvent") {

stats_.warningEntries++;

} else if (eventType == "logEvent") {

stats_.infoEntries++;

}

}

}

UnifiedLogCollector::LogStats UnifiedLogCollector::getStats() const {

return stats_;

}

相关推荐
Mintimate2 小时前
WorkBuddy 上手: 让脚本项目 Homebrew CN 变成会排障的 Agent
macos·边缘计算·agent
fthux1 天前
如果你用 Mac,那你可能需要 Noti Shift
macos·开源·github
counterxing4 天前
最近发现一个 Mac 工具,有点像把 Raycast、语音输入法、截图和录屏塞到了一起
macos·ai编程·claude
元Y亨H11 天前
MacBook Air 开发神器:IDEA 与 PyCharm 极简安装及环境配置
macos
yuanyxh12 天前
macOS 应用 - 纯对话生成
前端·macos·ai编程
AI创界者14 天前
PilotTTS 一键整合包(Win/Mac):8G 显存畅跑,实测解锁情绪与副语言的精准控制
人工智能·macos·aigc·音视频
零零信安14 天前
零零信安荣登数世咨询《新质·数字安全专精百强(2026)》暗网情报领域,彰显专业实力与创新引领
安全·网络安全·数据泄露·暗网·零零信安
憧憬成为web高手14 天前
l33t-hoster
学习·web安全·网络安全
HackTwoHub14 天前
Sqli-Scanner SQL注入SKILL自动化挖掘SQL注入,零依赖自动化SQL注入挖掘,赏金猎人
数据库·人工智能·sql·web安全·网络安全·自动化·系统安全
AirDroid_cn14 天前
系统终端与iTerm2字体看起来不一样?macOS Sequoia统一渲染指南
macos