在生产环境中设置MongoDB
第一部分:从命令行启动 MongoDB
1.1 基础启动命令
bash
复制代码
# ============================================
# MongoDB 基本启动命令
# ============================================
# 最简单的启动方式(使用默认配置)
mongod
# 指定数据目录
mongod --dbpath /var/lib/mongodb
# 指定日志文件
mongod --logpath /var/log/mongodb/mongod.log
# 指定端口和绑定IP
mongod --port 27017 --bind_ip 127.0.0.1,192.168.1.100
# 以守护进程模式运行(后台运行)
mongod --fork --logpath /var/log/mongodb/mongod.log
# 完整的生产环境启动示例
mongod \
--dbpath /data/mongodb \ # 数据目录
--logpath /var/log/mongodb/mongod.log \ # 日志文件
--logappend \ # 追加日志而非覆盖
--port 27017 \ # 监听端口
--bind_ip 127.0.0.1,10.0.0.1 \ # 绑定IP地址
--fork \ # 后台运行
--auth \ # 启用认证
--keyFile /etc/mongodb/keyfile \ # 副本集认证密钥
--replSet rs0 \ # 副本集名称
--journal \ # 启用日志
--maxConns 10000 \ # 最大连接数
--oplogSize 10240 # oplog大小(MB)
1.2 配置文件启动(推荐生产环境)
yaml
复制代码
# /etc/mongod.conf
# MongoDB 生产环境完整配置文件示例
# 系统日志配置
systemLog:
# 日志输出目标:file(文件)| syslog(系统日志)| console(控制台)
destination: file
# 日志文件路径
path: /var/log/mongodb/mongod.log
# 是否追加日志(推荐 true)
logAppend: true
# 日志详细级别(0=默认,1-5=更详细)
verbosity: 1
# 慢查询阈值(毫秒)
slowOpThresholdMs: 100
# 是否记录所有慢查询
profile: 1
# 存储配置
storage:
# 数据目录
dbPath: /data/mongodb
# 是否启用日志(生产环境必须)
journal:
enabled: true
# 日志提交间隔(毫秒)
commitIntervalMs: 100
# WiredTiger 存储引擎配置
wiredTiger:
engineConfig:
# 缓存大小(默认RAM的50%)
cacheSizeGB: 8
# 目录存储格式
directoryForIndexes: false
collectionConfig:
blockCompressor: snappy # 压缩算法:snappy/zlib/none
indexConfig:
prefixCompression: true
# 网络配置
net:
# 绑定IP(生产环境应绑定特定IP,避免0.0.0.0)
bindIp: 127.0.0.1,10.0.1.100
# 端口号
port: 27017
# 最大连接数
maxIncomingConnections: 65536
# 启用IPv6
ipv6: true
# 是否启用HTTP状态接口(生产环境建议false)
http:
enabled: false
# 压缩通信
compression:
compressors: snappy,zstd
# 安全配置
security:
# 启用授权
authorization: enabled
# 密钥文件(副本集认证)
keyFile: /etc/mongodb/keyfile
# 集群认证模式
clusterAuthMode: keyFile
# TLS/SSL 配置
tls:
mode: requireTLS
certificateKeyFile: /etc/ssl/mongodb.pem
CAFile: /etc/ssl/ca-cert.pem
# 进程管理
processManagement:
# 以守护进程运行
fork: true
# PID文件路径
pidFilePath: /var/run/mongodb/mongod.pid
# 时间区域
timeZoneInfo: /usr/share/zoneinfo
# 副本集配置
replication:
# 副本集名称
replSetName: rs0
# 启用次要节点读取
enableMajorityReadConcern: true
# oplog 大小(MB)
oplogSizeMB: 10240
# 审计日志(企业版)
auditLog:
destination: file
format: JSON
path: /var/log/mongodb/audit.json
filter: '{ "atype": { $in: ["authCheck", "createUser", "dropUser"] } }'
# 操作限制
operationProfiling:
# 性能分析模式:off|slowOp|all
mode: slowOp
# 慢查询阈值(毫秒)
slowOpThresholdMs: 100
# 启动配置文件
# mongod --config /etc/mongod.conf
1.3 不同环境启动脚本
bash
复制代码
#!/bin/bash
# ============================================
# 生产环境启动脚本 /etc/init.d/mongod
# ============================================
#!/bin/bash
### BEGIN INIT INFO
# Provides: mongod
# Required-Start: $network $remote_fs
# Required-Stop: $network $remote_fs
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: MongoDB database server
### END INIT INFO
NAME=mongod
DESC="MongoDB Database Server"
MONGO_USER=mongodb
MONGO_GROUP=mongodb
DAEMON=/usr/bin/mongod
CONFIG=/etc/mongod.conf
PIDFILE=/var/run/mongodb/mongod.pid
# 从配置文件读取端口
PORT=$(grep -E "^ port:" $CONFIG | awk '{print $2}')
if [ -z "$PORT" ]; then
PORT=27017
fi
# 检查MongoDB是否运行
check_running() {
if [ -f "$PIDFILE" ]; then
PID=$(cat $PIDFILE)
if kill -0 $PID 2>/dev/null; then
return 0
else
rm -f $PIDFILE
return 1
fi
fi
return 1
}
# 启动函数
start() {
echo "Starting $DESC: $NAME"
# 检查是否已运行
if check_running; then
echo "$NAME is already running"
return 1
fi
# 确保数据目录存在且有正确权限
mkdir -p /data/mongodb
chown -R $MONGO_USER:$MONGO_GROUP /data/mongodb
chown -R $MONGO_USER:$MONGO_GROUP $(dirname $PIDFILE)
# 启动MongoDB
start-stop-daemon --start --quiet --chuid $MONGO_USER \
--pidfile $PIDFILE --exec $DAEMON -- \
--config $CONFIG
# 等待启动完成
sleep 5
if check_running; then
echo "$NAME started successfully on port $PORT"
return 0
else
echo "Failed to start $NAME"
return 1
fi
}
# 停止函数
stop() {
echo "Stopping $DESC: $NAME"
if ! check_running; then
echo "$NAME is not running"
return 0
fi
PID=$(cat $PIDFILE)
# 发送SIGTERM信号
kill -TERM $PID
# 等待进程结束
for i in {1..30}; do
if ! kill -0 $PID 2>/dev/null; then
rm -f $PIDFILE
echo "$NAME stopped"
return 0
fi
sleep 1
done
# 强制终止
echo "Force stopping $NAME"
kill -9 $PID 2>/dev/null
rm -f $PIDFILE
return 0
}
# 重启函数
restart() {
stop
sleep 2
start
}
# 状态检查
status() {
if check_running; then
PID=$(cat $PIDFILE)
echo "$NAME is running (pid: $PID)"
# 获取更多状态信息
if command -v mongosh >/dev/null 2>&1; then
mongosh --quiet --eval "db.serverStatus().uptime" \
--port $PORT 2>/dev/null && \
echo "MongoDB is responsive"
fi
return 0
else
echo "$NAME is not running"
return 1
fi
}
# 主逻辑
case "$1" in
start)
start
;;
stop)
stop
;;
restart)
restart
;;
status)
status
;;
*)
echo "Usage: $0 {start|stop|restart|status}"
exit 1
;;
esac
exit $?
1.4 Systemd 服务配置
ini
复制代码
# /etc/systemd/system/mongod.service
# MongoDB Systemd 服务文件
[Unit]
Description=MongoDB Database Server
Documentation=https://docs.mongodb.org/manual
After=network.target
[Service]
User=mongodb
Group=mongodb
Environment="OPTIONS=-f /etc/mongod.conf"
EnvironmentFile=-/etc/default/mongod
ExecStart=/usr/bin/mongod $OPTIONS
ExecStartPre=/bin/mkdir -p /data/mongodb
ExecStartPre=/bin/chown mongodb:mongodb /data/mongodb
ExecReload=/bin/kill -HUP $MAINPID
ExecStop=/bin/kill -TERM $MAINPID
# 进程重启策略
Restart=always
RestartSec=10
# 资源限制
LimitNOFILE=64000
LimitNPROC=64000
LimitMEMLOCK=infinity
# 超时设置
TimeoutStartSec=120
TimeoutStopSec=120
# PID文件
PIDFile=/var/run/mongodb/mongod.pid
# 工作目录
WorkingDirectory=/data/mongodb
[Install]
WantedBy=multi-user.target
bash
复制代码
# Systemd 命令使用示例
sudo systemctl daemon-reload # 重新加载服务配置
sudo systemctl enable mongod # 设置开机自启
sudo systemctl start mongod # 启动服务
sudo systemctl stop mongod # 停止服务
sudo systemctl restart mongod # 重启服务
sudo systemctl status mongod # 查看服务状态
sudo journalctl -u mongod -f # 实时查看日志
sudo systemctl show mongod # 显示所有配置
第二部分:停止 MongoDB
2.1 优雅停止方法
javascript
复制代码
// ============================================
// MongoDB 内部的停止方法
// ============================================
// 1. 使用 admin 命令优雅关闭
use admin
db.shutdownServer()
// 2. 带超时的关闭
db.shutdownServer({ timeoutSecs: 60 })
// 3. 强制关闭(不推荐)
db.adminCommand({ shutdown: 1, force: true })
bash
复制代码
# ============================================
# 命令行停止方法
# ============================================
# 1. 使用 mongod 命令关闭
mongod --shutdown --dbpath /data/mongodb
# 2. 使用配置文件关闭
mongod --shutdown --config /etc/mongod.conf
# 3. 使用 kill 命令发送 SIGTERM(推荐)
kill -TERM $(cat /var/run/mongodb/mongod.pid)
# 4. 使用 pkill
pkill -TERM mongod
# 5. 使用系统服务
sudo systemctl stop mongod
# 或
sudo service mongod stop
# 6. 使用 killall(慎用)
killall -TERM mongod
2.2 停止脚本
bash
复制代码
#!/bin/bash
# ============================================
# 智能停止脚本 - /usr/local/bin/mongod-stop
# ============================================
#!/bin/bash
MONGOD_PID_FILE="/var/run/mongodb/mongod.pid"
MONGO_PORT=27017
TIMEOUT=60
CHECK_INTERVAL=2
# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
print_status() {
echo -e "${GREEN}[INFO]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 获取PID
get_pid() {
if [ -f "$MONGOD_PID_FILE" ]; then
cat "$MONGOD_PID_FILE"
else
pgrep -x "mongod" | head -1
fi
}
# 检查进程是否存在
is_running() {
local pid=$1
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
return 0
fi
return 1
}
# 优雅停止
graceful_shutdown() {
local pid=$1
local timeout=$2
print_status "Sending SIGTERM to mongod (PID: $pid)"
kill -TERM "$pid"
local elapsed=0
while is_running "$pid"; do
if [ $elapsed -ge $timeout ]; then
print_warning "Timeout reached after ${timeout}s"
return 1
fi
print_status "Waiting for mongod to stop... (${elapsed}s)"
sleep $CHECK_INTERVAL
elapsed=$((elapsed + CHECK_INTERVAL))
done
return 0
}
# 强制停止
force_shutdown() {
local pid=$1
print_warning "Force killing mongod (PID: $pid)"
kill -9 "$pid" 2>/dev/null
sleep 1
if ! is_running "$pid"; then
print_status "MongoDB force stopped"
return 0
fi
return 1
}
# 检查是否有活动连接
check_active_connections() {
if command -v mongosh >/dev/null 2>&1; then
local conn_count=$(mongosh --quiet --eval \
"db.adminCommand({ serverStatus: 1 }).connections.current" \
--port $MONGO_PORT 2>/dev/null)
if [ -n "$conn_count" ] && [ "$conn_count" -gt 1 ]; then
print_warning "Found $conn_count active connections"
return 0
fi
fi
return 1
}
# 主函数
main() {
local pid=$(get_pid)
if ! is_running "$pid"; then
print_error "MongoDB is not running"
exit 1
fi
print_status "Found MongoDB process: $pid"
# 检查活动连接
if check_active_connections; then
print_warning "Active connections detected"
read -p "Continue shutdown? (y/N) " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
print_status "Shutdown cancelled"
exit 0
fi
fi
# 尝试优雅停止
if graceful_shutdown "$pid" $TIMEOUT; then
print_status "MongoDB stopped gracefully"
rm -f "$MONGOD_PID_FILE"
exit 0
fi
# 优雅停止失败,强制停止
print_warning "Graceful shutdown failed, attempting force stop"
if force_shutdown "$pid"; then
print_status "MongoDB force stopped"
rm -f "$MONGOD_PID_FILE"
exit 0
fi
print_error "Failed to stop MongoDB"
exit 1
}
# 解析命令行参数
case "$1" in
-f|--force)
FORCE=true
pid=$(get_pid)
force_shutdown "$pid"
;;
-t|--timeout)
TIMEOUT="$2"
main
;;
*)
main
;;
esac
2.3 关闭前的数据刷新
javascript
复制代码
// ============================================
// 关闭前确保数据持久化
// ============================================
// 1. 强制所有写入刷新到磁盘
use admin
db.runCommand({ fsync: 1 })
// 2. 锁定数据库进行备份/维护
db.fsyncLock()
// 执行维护操作...
// 完成后解锁
db.fsyncUnlock()
// 3. 等待所有写入完成
db.runCommand({
waitForWriteConcern: {
w: "majority",
wtimeout: 30000
}
})
// 4. 优雅关闭脚本示例
function gracefulShutdown() {
print("Starting graceful shutdown...");
// 停止接受新连接
db.adminCommand({ setParameter: 1, "connPoolMaxShardedConns": 0 });
// 等待现有操作完成
let activeOps;
do {
activeOps = db.currentOp({ active: true }).inprog.length;
print(`Waiting for ${activeOps} active operations to complete...`);
sleep(1000);
} while (activeOps > 0);
// 强制刷新
db.runCommand({ fsync: 1 });
// 关闭服务器
db.shutdownServer();
print("Shutdown complete");
}
// 执行关闭
gracefulShutdown();
第三部分:安全性配置
3.1 用户和角色安全
javascript
复制代码
// ============================================
// 生产环境用户安全配置
// ============================================
// 切换到 admin 数据库
use admin
// 1. 创建超级管理员(仅用于紧急情况)
db.createUser({
user: "root_admin",
pwd: passwordPrompt(), // 交互式输入密码
roles: [
{ role: "root", db: "admin" },
{ role: "clusterAdmin", db: "admin" }
],
authenticationRestrictions: [{
// 限制只能从特定IP连接
clientSource: ["10.0.1.0/24", "127.0.0.1"],
serverAddress: ["10.0.1.100"]
}]
})
// 2. 创建数据库管理员
db.createUser({
user: "db_admin",
pwd: passwordPrompt(),
roles: [
{ role: "userAdminAnyDatabase", db: "admin" },
{ role: "dbAdminAnyDatabase", db: "admin" },
{ role: "readWriteAnyDatabase", db: "admin" }
]
})
// 3. 创建应用用户(最小权限原则)
use ecommerce
db.createUser({
user: "app_user",
pwd: passwordPrompt(),
roles: [
{ role: "readWrite", db: "ecommerce" },
{ role: "read", db: "logs" }
]
})
// 4. 创建只读监控用户
use admin
db.createUser({
user: "monitoring",
pwd: passwordPrompt(),
roles: [
{ role: "clusterMonitor", db: "admin" },
{ role: "read", db: "local" }
]
})
// 5. 创建备份用户
db.createUser({
user: "backup_user",
pwd: passwordPrompt(),
roles: [
{ role: "backup", db: "admin" },
{ role: "restore", db: "admin" }
]
})
// 6. 创建审计用户
db.createUser({
user: "audit_user",
pwd: passwordPrompt(),
roles: [
{ role: "read", db: "audit" },
{ role: "read", db: "admin" }
]
})
3.2 网络安全配置
bash
复制代码
# ============================================
# iptables 防火墙配置
# ============================================
#!/bin/bash
# MongoDB 防火墙规则脚本
# 清空现有规则
iptables -F
iptables -X
# 默认策略
iptables -P INPUT DROP
iptables -P FORWARD DROP
iptables -P OUTPUT ACCEPT
# 允许本地回环
iptables -A INPUT -i lo -j ACCEPT
# 允许已建立的连接
iptables -A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
# 允许 SSH(管理端口)
iptables -A INPUT -p tcp --dport 22 -s 10.0.0.0/8 -j ACCEPT
# 允许 MongoDB(仅允许内网访问)
iptables -A INPUT -p tcp --dport 27017 -s 10.0.0.0/8 -j ACCEPT
iptables -A INPUT -p tcp --dport 27017 -s 192.168.0.0/16 -j ACCEPT
# 允许副本集通信
iptables -A INPUT -p tcp --dport 27018 -s 10.0.0.0/8 -j ACCEPT # 内部通信
iptables -A INPUT -p tcp --dport 27019 -s 10.0.0.0/8 -j ACCEPT # 分片通信
# 允许 ICMP(ping)
iptables -A INPUT -p icmp --icmp-type echo-request -j ACCEPT
# 记录被拒绝的连接
iptables -A INPUT -m limit --limit 5/min -j LOG --log-prefix "iptables denied: " --log-level 7
# 保存规则
iptables-save > /etc/iptables/rules.v4
yaml
复制代码
# /etc/mongod.conf
# 网络安全配置
net:
# 只绑定内网IP,不暴露公网
bindIp: 127.0.0.1,10.0.1.100,192.168.1.100
# 不使用 0.0.0.0
# 设置最大连接数防止DoS
maxIncomingConnections: 10000
# 启用连接限制
serviceExecutor: adaptive
# 启用压缩减少带宽
compression:
compressors: snappy,zstd
# 设置连接超时
socketTimeoutMS: 360000
connectTimeoutMS: 10000
security:
# 启用授权
authorization: enabled
# 集群认证模式
clusterAuthMode: x509 # 或 keyFile
# TLS/SSL 配置
tls:
mode: requireTLS
certificateKeyFile: /etc/ssl/mongodb.pem
CAFile: /etc/ssl/ca-cert.pem
allowConnectionsWithoutCertificates: false
allowInvalidCertificates: false
allowInvalidHostnames: false
# 启用 LDAP 认证(企业版)
# ldap:
# servers: "ldap.example.com:389"
# bind:
# method: simple
# queryUser: "cn=readonly,dc=example,dc=com"
3.3 加密配置
javascript
复制代码
// ============================================
// 静态数据加密配置(企业版)
// ============================================
// 1. 在配置文件中启用加密
// security:
// enableEncryption: true
// encryptionKeyFile: /etc/mongodb/encryption-key
// 2. 生成加密密钥
// openssl rand -base64 32 > /etc/mongodb/encryption-key
// chmod 600 /etc/mongodb/encryption-key
// chown mongodb:mongodb /etc/mongodb/encryption-key
// 3. 密钥管理配置
// security:
// kmip:
// keyIdentifier: "MongoDB-Key-001"
// serverName: "kmip.example.com"
// port: 5696
// clientCertificateFile: /etc/ssl/kmip-client.pem
// clientCertificatePassword: "password"
// 4. 字段级加密(客户端)
const autoEncryptionOptions = {
keyVaultNamespace: "encryption.__keyVault",
kmsProviders: {
aws: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
region: "us-east-1"
}
},
schemaMap: {
"ecommerce.users": {
bsonType: "object",
properties: {
ssn: {
encrypt: {
bsonType: "string",
algorithm: "AEAD_AES_256_CBC_HMAC_SHA_512-Random"
}
}
}
}
}
};
3.4 安全审计脚本
bash
复制代码
#!/bin/bash
# ============================================
# 安全审计脚本 - mongodb_security_audit.sh
# ============================================
#!/bin/bash
MONGOD_HOST="localhost"
MONGOD_PORT="27017"
AUDIT_LOG="/var/log/mongodb/security_audit.log"
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
log_message() {
echo "[$TIMESTAMP] $1" >> $AUDIT_LOG
}
echo "========================================="
echo "MongoDB Security Audit Report"
echo "Time: $TIMESTAMP"
echo "========================================="
# 1. 检查认证配置
echo -e "\n[1] Authentication Configuration:"
if grep -q "authorization: enabled" /etc/mongod.conf; then
echo " ✓ Authorization is ENABLED"
log_message "AUTH_CHECK: Authorization enabled"
else
echo " ✗ WARNING: Authorization is DISABLED"
log_message "AUTH_CHECK: Authorization disabled - HIGH RISK"
fi
# 2. 检查绑定IP配置
echo -e "\n[2] Network Binding:"
BIND_IP=$(grep -E "^ bindIp:" /etc/mongod.conf | awk '{print $2}')
if [[ "$BIND_IP" == "0.0.0.0" ]] || [[ "$BIND_IP" == "*" ]]; then
echo " ✗ WARNING: MongoDB bound to all interfaces (0.0.0.0)"
log_message "NETWORK_CHECK: MongoDB bound to 0.0.0.0 - RISK"
else
echo " ✓ MongoDB bound to: $BIND_IP"
fi
# 3. 检查TLS配置
echo -e "\n[3] TLS Configuration:"
if grep -q "mode: requireTLS" /etc/mongod.conf; then
echo " ✓ TLS is ENABLED and required"
TLS_CERT=$(grep "certificateKeyFile:" /etc/mongod.conf | awk '{print $2}')
if [ -f "$TLS_CERT" ]; then
echo " ✓ Certificate file found: $TLS_CERT"
# 检查证书有效期
openssl x509 -in "$TLS_CERT" -noout -dates 2>/dev/null
fi
else
echo " ✗ WARNING: TLS is NOT enabled"
log_message "TLS_CHECK: TLS not enabled - RISK"
fi
# 4. 检查用户和角色
echo -e "\n[4] User Security:"
mongosh --quiet --eval "
const users = db.getSiblingDB('admin').getUsers();
print(' Total users: ' + users.users.length);
users.users.forEach(u => {
print(' - ' + u.user + ': roles=' + JSON.stringify(u.roles));
// 检查弱密码用户
if (u.user === 'admin' || u.user === 'root') {
print(' ⚠️ Default username detected');
}
});
" --host $MONGOD_HOST --port $MONGOD_PORT 2>/dev/null
# 5. 检查文件权限
echo -e "\n[5] File Permissions:"
for file in /etc/mongod.conf /etc/mongodb/keyfile /etc/ssl/mongodb.pem; do
if [ -f "$file" ]; then
PERM=$(stat -c "%a" "$file")
OWNER=$(stat -c "%U" "$file")
if [ "$PERM" = "600" ] || [ "$PERM" = "400" ]; then
echo " ✓ $file: $PERM ($OWNER)"
else
echo " ✗ WARNING: $file has wrong permissions: $PERM"
log_message "PERM_CHECK: $file has permission $PERM"
fi
fi
done
# 6. 检查当前连接
echo -e "\n[6] Current Connections:"
CURRENT_CONN=$(mongosh --quiet --eval "db.serverStatus().connections.current" \
--host $MONGOD_HOST --port $MONGOD_PORT 2>/dev/null)
echo " Active connections: $CURRENT_CONN"
# 7. 检查慢查询
echo -e "\n[7] Slow Query Analysis:"
SLOW_QUERIES=$(mongosh --quiet --eval "
db.getSiblingDB('admin').getLog('slowest')
" --host $MONGOD_HOST --port $MONGOD_PORT 2>/dev/null | grep -c "command")
echo " Slow queries in log: $SLOW_QUERIES"
# 8. 生成报告摘要
echo -e "\n========================================="
echo "Audit completed. Full report: $AUDIT_LOG"
echo "========================================="
log_message "AUDIT_COMPLETE: Security audit completed"
第四部分:日志管理
4.1 日志配置详解
yaml
复制代码
# /etc/mongod.conf
# 完整日志配置
systemLog:
# 日志目标
destination: file
# 日志文件路径
path: /var/log/mongodb/mongod.log
# 追加模式(生产环境必须)
logAppend: true
# 日志详细级别(0=默认)
verbosity: 0
# 组件级别日志
component:
# 访问控制日志
accessControl:
verbosity: 1
# 认证日志
authorization:
verbosity: 1
# 命令日志
command:
verbosity: 1
# 副本集日志
replication:
verbosity: 1
# 存储引擎日志
storage:
verbosity: 1
# 网络日志
network:
verbosity: 0
# 查询日志
query:
verbosity: 1
# 索引日志
index:
verbosity: 0
# 事务日志
transaction:
verbosity: 1
# 日志轮转配置
logRotate: reopen
# 追踪所有异常
traceAllExceptions: true
# 慢查询阈值(毫秒)
slowOpThresholdMs: 100
# 慢查询采样率(0-1)
slowOpSampleRate: 1.0
# 是否记录所有慢查询
profile: 1
# 操作分析
operationProfiling:
# 分析模式:off, slowOp, all
mode: slowOp
# 慢查询阈值
slowOpThresholdMs: 100
# 慢操作采样率
slowOpSampleRate: 1.0
4.2 日志轮转配置
bash
复制代码
# ============================================
# logrotate 配置 - /etc/logrotate.d/mongodb
# ============================================
/var/log/mongodb/*.log {
# 每天轮转
daily
# 保留30天日志
rotate 30
# 如果日志文件为空则跳过
notifempty
# 如果日志文件不存在则不报错
missingok
# 压缩旧日志
compress
# 延迟压缩
delaycompress
# 创建新日志文件
create 640 mongodb mongodb
# 使用日期作为后缀
dateext
dateformat -%Y%m%d-%s
# 轮转后执行脚本
postrotate
# 重新打开日志文件
/usr/bin/kill -SIGUSR1 $(cat /var/run/mongodb/mongod.pid) 2>/dev/null || true
# 等待日志重新打开
sleep 1
# 清理超过90天的日志
find /var/log/mongodb -name "*.log.*" -mtime +90 -delete
endscript
# 共享脚本
sharedscripts
}
4.3 日志管理脚本
bash
复制代码
#!/bin/bash
# ============================================
# 日志管理脚本 - /usr/local/bin/mongodb-log-mgmt.sh
# ============================================
#!/bin/bash
LOG_DIR="/var/log/mongodb"
MONGO_LOG="$LOG_DIR/mongod.log"
ARCHIVE_DIR="$LOG_DIR/archive"
RETENTION_DAYS=30
ALERT_EMAIL="admin@example.com"
# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
log_info() {
echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
# 创建归档目录
mkdir -p $ARCHIVE_DIR
# 1. 日志分析函数
analyze_log() {
log_info "Analyzing MongoDB logs..."
local log_file=$1
local report_file="$ARCHIVE_DIR/log_analysis_$(date +%Y%m%d).txt"
{
echo "=== MongoDB Log Analysis Report ==="
echo "Time: $(date)"
echo "Log File: $log_file"
echo ""
# 错误统计
echo "--- Error Statistics ---"
echo "FATAL errors: $(grep -c "FATAL" $log_file 2>/dev/null || echo 0)"
echo "ERROR entries: $(grep -c "ERROR" $log_file 2>/dev/null || echo 0)"
echo "WARNING entries: $(grep -c "WARNING" $log_file 2>/dev/null || echo 0)"
# 认证失败
echo ""
echo "--- Authentication Failures ---"
grep "Failed to authenticate" $log_file 2>/dev/null | \
awk '{print $1, $2, $NF}' | sort | uniq -c | tail -20
# 慢查询统计
echo ""
echo "--- Slow Query Statistics ---"
grep -E "slow query|slow ms" $log_file 2>/dev/null | \
awk -F'ms' '{print $1}' | awk '{print $NF}' | \
awk '{sum+=$1; count++} END {if(count>0) print "Average: " sum/count "ms\nCount: " count}'
# 连接数统计
echo ""
echo "--- Connection Statistics ---"
grep "connection accepted" $log_file 2>/dev/null | wc -l | \
awk '{print "Total connections: " $1}'
# 副本集状态变化
echo ""
echo "--- Replica Set State Changes ---"
grep -E "transition to|step down|election" $log_file 2>/dev/null | tail -10
# 磁盘空间警告
echo ""
echo "--- Disk Space Warnings ---"
grep -i "no space left" $log_file 2>/dev/null | tail -5
} > $report_file
log_info "Analysis report saved to: $report_file"
# 如果有严重错误,发送告警
local error_count=$(grep -c "FATAL\|ERROR" $log_file 2>/dev/null || echo 0)
if [ $error_count -gt 100 ]; then
echo "High error count detected: $error_count" | \
mail -s "MongoDB High Error Alert" $ALERT_EMAIL
log_warn "High error count detected: $error_count"
fi
}
# 2. 日志清理函数
cleanup_logs() {
log_info "Cleaning up old logs (older than $RETENTION_DAYS days)..."
# 删除超过保留天数的归档日志
find $ARCHIVE_DIR -name "*.log*" -mtime +$RETENTION_DAYS -delete
find $LOG_DIR -name "*.log.*.gz" -mtime +$RETENTION_DAYS -delete
# 计算释放的空间
local freed_space=$(du -sh $LOG_DIR 2>/dev/null | awk '{print $1}')
log_info "Current log directory size: $freed_space"
}
# 3. 实时日志监控函数
tail_logs() {
local filter=$1
if [ -n "$filter" ]; then
tail -f $MONGO_LOG | grep --color=always -E "$filter|$"
else
tail -f $MONGO_LOG
fi
}
# 4. 错误日志摘要
error_summary() {
local hours=${1:-24}
log_info "Generating error summary for last $hours hours..."
local since=$(date -d "$hours hours ago" '+%Y-%m-%dT%H:%M:%S')
echo "=== Error Summary (Last $hours hours) ==="
awk -v since="$since" '
$1"T"$2 >= since && /ERROR|FATAL|WARNING/ {
print
}
' $MONGO_LOG | \
awk '{print $3, $4, $5, $6}' | \
sort | uniq -c | sort -rn | head -20
}
# 5. 性能分析
performance_analysis() {
log_info "Performing performance analysis..."
echo "=== MongoDB Performance Analysis ==="
echo ""
# 慢查询TOP 10
echo "Top 10 Slowest Operations:"
grep -E "slow query|slow ms" $MONGO_LOG 2>/dev/null | \
grep -oP '[0-9]+ms' | \
sort -rn | head -10 | \
awk '{print " " $0}'
echo ""
echo "Operation Type Distribution:"
grep -E "command|query|insert|update|delete" $MONGO_LOG 2>/dev/null | \
awk '{print $6}' | \
sort | uniq -c | sort -rn | head -10
}
# 6. 主函数
main() {
case "$1" in
analyze)
analyze_log $MONGO_LOG
;;
cleanup)
cleanup_logs
;;
tail)
tail_logs "$2"
;;
errors)
error_summary "$2"
;;
performance)
performance_analysis
;;
rotate)
log_info "Manually rotating logs..."
kill -SIGUSR1 $(cat /var/run/mongodb/mongod.pid)
log_info "Log rotation triggered"
;;
all)
analyze_log $MONGO_LOG
performance_analysis
cleanup_logs
;;
*)
echo "Usage: $0 {analyze|cleanup|tail [pattern]|errors [hours]|performance|rotate|all}"
exit 1
;;
esac
}
# 执行主函数
main "$@"
4.4 日志监控告警配置
javascript
复制代码
// ============================================
// MongoDB 内部监控脚本
// ============================================
// 创建监控集合存储日志分析结果
use monitoring
db.createCollection("log_analysis", {
capped: true,
size: 104857600, // 100MB
max: 10000
})
// 日志监控函数
function monitorLogs() {
const alertThresholds = {
errorCount: 50, // 错误数量阈值
authFailures: 10, // 认证失败阈值
slowQueries: 5, // 慢查询阈值
connectionRate: 100 // 连接速率阈值
};
// 获取最近的错误
const recentErrors = db.adminCommand({
getLog: "global"
}).log.filter(l => l.includes("ERROR") || l.includes("FATAL"));
if (recentErrors.length > alertThresholds.errorCount) {
// 触发告警
db.log_analysis.insertOne({
timestamp: new Date(),
type: "ERROR_BURST",
count: recentErrors.length,
samples: recentErrors.slice(0, 10),
severity: "HIGH"
});
print(`ALERT: High error count detected: ${recentErrors.length}`);
}
// 检查认证失败
const authFailures = db.adminCommand({
getLog: "global"
}).log.filter(l => l.includes("Failed to authenticate"));
if (authFailures.length > alertThresholds.authFailures) {
db.log_analysis.insertOne({
timestamp: new Date(),
type: "AUTH_FAILURE_BURST",
count: authFailures.length,
severity: "CRITICAL"
});
print(`CRITICAL: Multiple authentication failures: ${authFailures.length}`);
}
}
// 每5分钟执行一次监控
// 在生产环境中可以使用 cron 或 MongoDB 的调度功能
4.5 日志最佳实践清单
bash
复制代码
# ============================================
# 日志配置最佳实践检查清单
# ============================================
cat << 'EOF' > /tmp/mongodb_log_checklist.md
# MongoDB 生产环境日志配置清单
## 必需配置
- [ ] 启用日志追加 (logAppend: true)
- [ ] 设置合适的日志级别 (verbosity: 0-2)
- [ ] 配置日志轮转策略
- [ ] 设置慢查询阈值 (slowOpThresholdMs: 100)
- [ ] 启用操作分析 (mode: slowOp)
## 安全配置
- [ ] 日志目录权限设置为 750
- [ ] 日志文件权限设置为 640
- [ ] 避免在日志中记录敏感信息
- [ ] 配置日志审计
## 监控告警
- [ ] 配置日志大小监控
- [ ] 设置错误率告警
- [ ] 监控认证失败事件
- [ ] 跟踪慢查询趋势
## 存储管理
- [ ] 日志存储在独立分区
- [ ] 配置日志归档策略
- [ ] 定期清理旧日志
- [ ] 监控磁盘使用率
## 性能优化
- [ ] 生产环境避免使用 -v 详细日志
- [ ] 根据业务需求调整慢查询阈值
- [ ] 使用异步日志写入
EOF
echo "Log checklist created at /tmp/mongodb_log_checklist.md"
总结
生产环境部署检查清单
bash
复制代码
#!/bin/bash
# 生产环境部署最终检查脚本
echo "=== MongoDB 生产环境部署检查 ==="
# 1. 配置文件检查
[ -f /etc/mongod.conf ] && echo "✓ 配置文件存在" || echo "✗ 配置文件缺失"
# 2. 认证检查
grep -q "authorization: enabled" /etc/mongod.conf && echo "✓ 认证已启用" || echo "✗ 认证未启用"
# 3. 日志检查
grep -q "logAppend: true" /etc/mongod.conf && echo "✓ 日志追加已启用" || echo "✗ 日志未配置"
# 4. 网络检查
grep -q "bindIp:" /etc/mongod.conf && echo "✓ 网络绑定已配置" || echo "✗ 网络未配置"
# 5. 服务检查
systemctl is-active mongod >/dev/null && echo "✓ 服务运行中" || echo "✗ 服务未运行"
# 6. 连接测试
mongosh --eval "db.runCommand({ping:1})" >/dev/null 2>&1 && echo "✓ 数据库可连接" || echo "✗ 数据库连接失败"
echo "=== 检查完成 ==="
快速参考命令
bash
复制代码
# 启动MongoDB
mongod --config /etc/mongod.conf
# 停止MongoDB
mongod --shutdown --config /etc/mongod.conf
# 查看日志
tail -f /var/log/mongodb/mongod.log
# 日志轮转
kill -SIGUSR1 $(cat /var/run/mongodb/mongod.pid)
# 检查服务状态
systemctl status mongod
# 查看连接数
mongosh --eval "db.serverStatus().connections"
# 查看慢查询
mongosh --eval "db.setProfilingLevel(1, 100)"