MongoDB备份完全指南
目录
- 备份方法概览
- 对服务器进行备份
- 副本集的特殊注意事项
- 分片集群的特殊注意事项
一、备份方法概览
1.1 核心知识点
MongoDB提供多种备份方法,各有优劣:
| 备份方法 | 原理 | 优点 | 缺点 | 适用场景 |
|---|---|---|---|---|
| 逻辑备份 (mongodump) | 导出数据为BSON格式 | 跨版本兼容、可部分恢复 | 速度慢、占用空间大 | 中小型数据库、跨平台迁移 |
| 物理备份 | 复制数据库物理文件 | 速度快、恢复快 | 需停机或锁库、版本敏感 | 大型数据库、生产环境 |
| 快照备份 | 文件系统级别快照 | 秒级完成、对性能影响小 | 需要存储系统支持 | 云环境、LVM/EBS存储 |
| 增量备份 | 备份oplog | 节省空间、支持PITR | 需配合全量备份 | 重要业务、需要时间点恢复 |
1.2 备份策略设计
javascript
// backup_strategy.js
// 备份策略配置和设计脚本
// 1. 备份策略配置模板
const backupStrategy = {
// 全量备份配置
fullBackup: {
frequency: "daily", // 每天凌晨2点
retention: 30, // 保留30天
method: "mongodump", // 逻辑备份
compression: true, // 启用压缩
encryption: true // 启用加密
},
// 增量备份配置
incrementalBackup: {
frequency: "hourly", // 每小时
retention: 7, // 保留7天
method: "oplog",
source: "secondary" // 从从节点获取
},
// 归档备份配置
archiveBackup: {
frequency: "monthly", // 每月
retention: 365, // 保留1年
location: "offsite" // 异地存储
}
};
// 2. 计算备份所需空间
function estimateBackupSize() {
const stats = db.stats();
const dataSize = stats.dataSize;
const indexSize = stats.indexSize;
// 逻辑备份通常比物理文件小30-50%
const estimatedLogicalSize = (dataSize + indexSize) * 0.6;
// 压缩后大小约为原始大小的20-30%
const estimatedCompressedSize = estimatedLogicalSize * 0.25;
print("=== 备份空间估算 ===");
print(`原始数据大小: ${(dataSize / 1024 / 1024 / 1024).toFixed(2)} GB`);
print(`索引大小: ${(indexSize / 1024 / 1024 / 1024).toFixed(2)} GB`);
print(`预估逻辑备份大小: ${(estimatedLogicalSize / 1024 / 1024 / 1024).toFixed(2)} GB`);
print(`预估压缩后大小: ${(estimatedCompressedSize / 1024 / 1024 / 1024).toFixed(2)} GB`);
// 计算保留策略所需总空间
const fullBackupsPerYear = 365 / backupStrategy.fullBackup.retention;
const totalYearlyStorage = estimatedCompressedSize * fullBackupsPerYear;
print(`\n年度存储需求: ${(totalYearlyStorage / 1024).toFixed(2)} TB`);
return {
logicalSize: estimatedLogicalSize,
compressedSize: estimatedCompressedSize,
yearlyStorage: totalYearlyStorage
};
}
estimateBackupSize();
二、对服务器进行备份
2.1 使用mongodump进行逻辑备份
2.1.1 基础备份命令
bash
#!/bin/bash
# mongodb_full_backup.sh
# MongoDB全量备份脚本
#!/bin/bash
# ==================== 配置变量 ====================
BACKUP_DIR="/backup/mongodb"
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_PATH="${BACKUP_DIR}/full_${DATE}"
MONGO_HOST="localhost"
MONGO_PORT="27017"
MONGO_USER="backup_user"
MONGO_PASS="your_password"
AUTH_DB="admin"
RETENTION_DAYS=30
LOG_FILE="/var/log/mongodb/backup.log"
# ==================== 创建备份目录 ====================
mkdir -p ${BACKUP_PATH}
mkdir -p $(dirname ${LOG_FILE})
# ==================== 日志函数 ====================
log_message() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a ${LOG_FILE}
}
# ==================== 开始备份 ====================
log_message "开始全量备份..."
# 1. 全量备份所有数据库
mongodump \
--host ${MONGO_HOST} \
--port ${MONGO_PORT} \
--username ${MONGO_USER} \
--password ${MONGO_PASS} \
--authenticationDatabase ${AUTH_DB} \
--out ${BACKUP_PATH} \
--gzip \
--dumpDbUsersAndRoles \
--verbose
# 检查备份是否成功
if [ $? -eq 0 ]; then
log_message "✓ 全量备份成功: ${BACKUP_PATH}"
# 获取备份大小
BACKUP_SIZE=$(du -sh ${BACKUP_PATH} | cut -f1)
log_message "备份大小: ${BACKUP_SIZE}"
else
log_message "✗ 全量备份失败!"
exit 1
fi
# 2. 创建备份元数据
cat > ${BACKUP_PATH}/backup_info.json << EOF
{
"backup_id": "full_${DATE}",
"backup_time": "$(date -Iseconds)",
"backup_type": "full",
"mongodb_version": "$(mongod --version | head -1)",
"host": "${MONGO_HOST}:${MONGO_PORT}",
"size_bytes": $(du -sb ${BACKUP_PATH} | cut -f1),
"databases": $(mongosh --quiet --eval "db.adminCommand('listDatabases').databases.map(d => d.name)" --host ${MONGO_HOST} --port ${MONGO_PORT} --username ${MONGO_USER} --password ${MONGO_PASS} --authenticationDatabase ${AUTH_DB})
}
EOF
# 3. 清理过期备份
log_message "清理 ${RETENTION_DAYS} 天前的备份..."
find ${BACKUP_DIR} -name "full_*" -type d -mtime +${RETENTION_DAYS} -exec rm -rf {} \;
find ${BACKUP_DIR} -name "*.tar.gz" -type f -mtime +${RETENTION_DAYS} -delete
# 4. 可选:加密备份(使用GPG)
# tar -czf - ${BACKUP_PATH} | gpg --symmetric --cipher-algo AES256 --passphrase-file /secure/passphrase.txt > ${BACKUP_PATH}.tar.gz.gpg
log_message "备份完成!"
2.1.2 指定数据库和集合备份
javascript
// selective_backup.js
// 选择性备份脚本
// 备份指定数据库
function backupDatabase(dbName, backupPath) {
print(`正在备份数据库: ${dbName}`);
const cmd = `mongodump --db ${dbName} --out ${backupPath}`;
// 实际执行需要使用runCommand或shell执行
print(`执行命令: ${cmd}`);
return true;
}
// 备份指定集合
function backupCollection(dbName, collectionName, backupPath) {
print(`正在备份集合: ${dbName}.${collectionName}`);
const cmd = `mongodump --db ${dbName} --collection ${collectionName} --out ${backupPath}`;
print(`执行命令: ${cmd}`);
return true;
}
// 按条件备份数据(使用mongoexport导出JSON)
function backupWithQuery(dbName, collectionName, query, outputFile) {
const db = db.getSiblingDB(dbName);
const cursor = db.getCollection(collectionName).find(query);
// 导出为JSON格式
const data = cursor.toArray();
const jsonData = JSON.stringify(data, null, 2);
// 写入文件(实际需要使用文件系统API)
print(`导出 ${data.length} 条记录到 ${outputFile}`);
return data.length;
}
// 使用示例
// backupCollection("test", "users", "/backup/test_users");
// backupWithQuery("test", "orders", { status: "completed", createdAt: { $gt: new Date("2024-01-01") } }, "/backup/completed_orders.json");
2.2 使用物理备份(文件系统级别)
bash
#!/bin/bash
# mongodb_physical_backup.sh
# MongoDB物理备份脚本
#!/bin/bash
# ==================== 配置 ====================
MONGO_DATA_DIR="/var/lib/mongodb"
BACKUP_DIR="/backup/mongodb/physical"
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_PATH="${BACKUP_DIR}/physical_${DATE}"
MONGO_PID=$(pgrep mongod)
LOG_FILE="/var/log/mongodb/physical_backup.log"
# ==================== 日志函数 ====================
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a ${LOG_FILE}
}
# ==================== 开始物理备份 ====================
log "开始物理备份..."
# 方法1: 使用fsync锁(适用于副本集从节点)
lock_database() {
log "锁定数据库..."
mongosh --eval "db.adminCommand({fsync:1, lock:1})" --quiet
}
unlock_database() {
log "解锁数据库..."
mongosh --eval "db.adminCommand({fsyncUnlock:1})" --quiet
}
# 方法2: 停止MongoDB服务(适用于单节点)
stop_mongodb() {
log "停止MongoDB服务..."
systemctl stop mongod
sleep 5
}
start_mongodb() {
log "启动MongoDB服务..."
systemctl start mongod
sleep 10
}
# 创建备份目录
mkdir -p ${BACKUP_PATH}
# 选择备份方式
BACKUP_METHOD="hot" # hot 或 cold
if [ "${BACKUP_METHOD}" = "hot" ]; then
# 热备份:锁定数据库
lock_database
# 复制数据文件
log "复制数据文件..."
cp -rp ${MONGO_DATA_DIR}/* ${BACKUP_PATH}/
# 解锁数据库
unlock_database
else
# 冷备份:停止服务后复制
stop_mongodb
cp -rp ${MONGO_DATA_DIR}/* ${BACKUP_PATH}/
start_mongodb
fi
# 验证备份
if [ $? -eq 0 ]; then
log "✓ 物理备份成功: ${BACKUP_PATH}"
BACKUP_SIZE=$(du -sh ${BACKUP_PATH} | cut -f1)
log "备份大小: ${BACKUP_SIZE}"
else
log "✗ 物理备份失败!"
exit 1
fi
# 创建备份信息
cat > ${BACKUP_PATH}/backup_metadata.json << EOF
{
"backup_id": "physical_${DATE}",
"backup_time": "$(date -Iseconds)",
"backup_type": "physical",
"method": "${BACKUP_METHOD}",
"source_data_dir": "${MONGO_DATA_DIR}",
"mongodb_version": "$(mongod --version | head -1)"
}
EOF
log "物理备份完成"
2.3 使用快照备份(LVM/EBS)
bash
#!/bin/bash
# mongodb_snapshot_backup.sh
# LVM快照备份脚本
#!/bin/bash
# ==================== 配置 ====================
VG_NAME="vg_data" # 卷组名称
LV_NAME="lv_mongodb" # 逻辑卷名称
SNAPSHOT_SIZE="10G" # 快照大小
SNAPSHOT_NAME="mongodb_snap_$(date +%Y%m%d_%H%M%S)"
MOUNT_POINT="/var/lib/mongodb"
BACKUP_DIR="/backup/mongodb/snapshots"
LOG_FILE="/var/log/mongodb/snapshot_backup.log"
# ==================== 日志函数 ====================
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a ${LOG_FILE}
}
# ==================== LVM快照备份 ====================
log "开始LVM快照备份..."
# 1. 刷新所有写入并锁定数据库
log "刷新写入并锁定数据库..."
mongosh --eval "db.adminCommand({fsync:1, lock:1})" --quiet
# 2. 创建LVM快照
log "创建LVM快照: ${SNAPSHOT_NAME}"
lvcreate -L ${SNAPSHOT_SIZE} -s -n ${SNAPSHOT_NAME} /dev/${VG_NAME}/${LV_NAME}
if [ $? -eq 0 ]; then
log "✓ 快照创建成功"
else
log "✗ 快照创建失败"
mongosh --eval "db.adminCommand({fsyncUnlock:1})" --quiet
exit 1
fi
# 3. 解锁数据库
log "解锁数据库..."
mongosh --eval "db.adminCommand({fsyncUnlock:1})" --quiet
# 4. 挂载快照并备份
SNAPSHOT_DEVICE="/dev/${VG_NAME}/${SNAPSHOT_NAME}"
SNAPSHOT_MOUNT="/mnt/${SNAPSHOT_NAME}"
mkdir -p ${SNAPSHOT_MOUNT}
mount ${SNAPSHOT_DEVICE} ${SNAPSHOT_MOUNT}
# 复制快照数据到备份目录
mkdir -p ${BACKUP_DIR}
tar -czf ${BACKUP_DIR}/snapshot_$(date +%Y%m%d_%H%M%S).tar.gz -C ${SNAPSHOT_MOUNT} .
# 5. 清理快照
umount ${SNAPSHOT_MOUNT}
lvremove -f ${SNAPSHOT_DEVICE}
log "快照备份完成: ${BACKUP_DIR}"
2.4 增量备份(基于Oplog)
javascript
// incremental_backup.js
// 增量备份脚本 - 基于Oplog
class IncrementalBackup {
constructor(config) {
this.backupDir = config.backupDir;
this.lastOplogTs = null;
this.oplogCollection = db.getSiblingDB("local").oplog.rs;
}
// 获取当前oplog位置
getCurrentOplogPosition() {
const oplogEntry = this.oplogCollection.find().sort({ $natural: -1 }).limit(1).next();
return oplogEntry.ts;
}
// 备份从指定时间点到现在的oplog
backupOplog(fromTimestamp, toTimestamp, outputFile) {
const query = { ts: { $gt: fromTimestamp } };
if (toTimestamp) {
query.ts.$lt = toTimestamp;
}
const oplogEntries = this.oplogCollection.find(query).toArray();
// 保存oplog条目
const fs = require('fs');
fs.writeFileSync(outputFile, JSON.stringify(oplogEntries, null, 2));
print(`备份了 ${oplogEntries.length} 条oplog记录到 ${outputFile}`);
return oplogEntries.length;
}
// 执行增量备份
performIncrementalBackup() {
// 读取上次备份位置
const lastPositionFile = `${this.backupDir}/last_position.json`;
let lastPosition = null;
try {
const fs = require('fs');
const lastPosData = fs.readFileSync(lastPositionFile, 'utf8');
lastPosition = JSON.parse(lastPosData).timestamp;
} catch(e) {
// 如果没有上次备份记录,从24小时前开始
lastPosition = Timestamp(Math.floor(Date.now() / 1000) - 86400, 0);
print("未找到上次备份位置,从24小时前开始备份");
}
const currentPosition = this.getCurrentOplogPosition();
const backupFile = `${this.backupDir}/incremental_${Date.now()}.json`;
// 备份oplog
const count = this.backupOplog(lastPosition, currentPosition, backupFile);
// 保存当前位置
const fs = require('fs');
fs.writeFileSync(lastPositionFile, JSON.stringify({
timestamp: currentPosition,
lastBackup: new Date().toISOString(),
recordsCount: count
}));
print(`增量备份完成,备份了 ${count} 条操作记录`);
return backupFile;
}
// 从增量备份恢复
restoreFromIncremental(fullBackupPath, incrementalBackupPath) {
print("开始从增量备份恢复...");
print(`1. 首先恢复全量备份: mongorestore ${fullBackupPath}`);
print(`2. 然后重放oplog: mongorestore --oplogReplay ${incrementalBackupPath}`);
// 实际恢复命令
// mongorestore --oplogReplay ${incrementalBackupPath}
}
}
// 使用示例
const backup = new IncrementalBackup({ backupDir: "/backup/mongodb/incremental" });
backup.performIncrementalBackup();
// 设置定时任务
// 使用cron每6小时执行一次
// 0 */6 * * * mongosh --file /scripts/incremental_backup.js
2.5 自动化备份调度
bash
#!/bin/bash
# setup_backup_cron.sh
# 配置自动化备份定时任务
#!/bin/bash
# ==================== 创建备份脚本目录 ====================
BACKUP_SCRIPT_DIR="/usr/local/bin/mongodb-backup"
mkdir -p ${BACKUP_SCRIPT_DIR}
# ==================== 创建主备份脚本 ====================
cat > ${BACKUP_SCRIPT_DIR}/backup.sh << 'EOF'
#!/bin/bash
# 主备份脚本
BACKUP_TYPE=${1:-full} # full, incremental, physical
DATE=$(date +%Y%m%d_%H%M%S)
LOG_FILE="/var/log/mongodb/backup_${DATE}.log"
case ${BACKUP_TYPE} in
full)
echo "[$(date)] 执行全量备份" >> ${LOG_FILE}
mongodump --out /backup/mongodb/full_${DATE} --gzip
;;
incremental)
echo "[$(date)] 执行增量备份" >> ${LOG_FILE}
mongosh --file /usr/local/bin/mongodb-backup/incremental_backup.js >> ${LOG_FILE}
;;
physical)
echo "[$(date)] 执行物理备份" >> ${LOG_FILE}
/usr/local/bin/mongodb-backup/physical_backup.sh >> ${LOG_FILE}
;;
*)
echo "未知备份类型: ${BACKUP_TYPE}"
exit 1
;;
esac
# 清理7天前的日志
find /var/log/mongodb/ -name "backup_*.log" -mtime +7 -delete
echo "[$(date)] 备份完成" >> ${LOG_FILE}
EOF
chmod +x ${BACKUP_SCRIPT_DIR}/backup.sh
# ==================== 配置定时任务 ====================
# 添加cron任务
cat > /tmp/mongodb_cron << EOF
# MongoDB备份定时任务
# 每天凌晨2点执行全量备份
0 2 * * * /usr/local/bin/mongodb-backup/backup.sh full >> /var/log/mongodb/cron.log 2>&1
# 每天上午10点、下午4点执行增量备份
0 10,16 * * * /usr/local/bin/mongodb-backup/backup.sh incremental >> /var/log/mongodb/cron.log 2>&1
# 每周日凌晨3点执行物理备份
0 3 * * 0 /usr/local/bin/mongodb-backup/backup.sh physical >> /var/log/mongodb/cron.log 2>&1
# 每天凌晨1点清理30天前的备份
0 1 * * * find /backup/mongodb -name "full_*" -type d -mtime +30 -exec rm -rf {} \;
EOF
# 安装cron任务
crontab /tmp/mongodb_cron
echo "定时任务配置完成!"
crontab -l
三、副本集的特殊注意事项
3.1 核心知识点
副本集备份的关键考虑因素:
- 从节点备份:始终在从节点上执行备份,避免影响主节点性能
- Oplog一致性 :使用
--oplog参数确保时间点一致性 - 备份窗口:确保oplog窗口足够覆盖备份时间
- 隐藏节点:可配置隐藏节点专门用于备份
3.2 副本集备份最佳实践
javascript
// replica_set_backup.js
// 副本集备份脚本
class ReplicaSetBackup {
constructor(replicaSetName, backupDir) {
this.replicaSetName = replicaSetName;
this.backupDir = backupDir;
}
// 获取副本集状态
getReplicaSetStatus() {
const status = rs.status();
return {
setName: status.set,
primary: status.members.find(m => m.state === 1),
secondaries: status.members.filter(m => m.state === 2),
hidden: status.members.filter(m => m.priority === 0 && m.hidden === true)
};
}
// 推荐备份节点
getRecommendedBackupNode() {
const status = this.getReplicaSetStatus();
// 优先使用隐藏节点
if (status.hidden.length > 0) {
return {
node: status.hidden[0],
type: "hidden",
reason: "隐藏节点专门用于备份,不影响业务"
};
}
// 其次使用延迟最低的从节点
if (status.secondaries.length > 0) {
const secondariesWithLatency = status.secondaries.map(m => ({
...m,
latency: m.pingMs
}));
secondariesWithLatency.sort((a, b) => a.latency - b.latency);
return {
node: secondariesWithLatency[0],
type: "secondary",
reason: "选择延迟最低的从节点"
};
}
return null;
}
// 在从节点上执行备份
performBackupOnSecondary() {
const backupNode = this.getRecommendedBackupNode();
if (!backupNode) {
print("错误:未找到合适的备份节点");
return false;
}
const nodeHost = backupNode.node.name;
print(`在节点 ${nodeHost} (${backupNode.type}) 上执行备份`);
// 构建备份命令(带oplog以确保一致性)
const backupCmd = `
mongodump \
--host ${nodeHost} \
--out ${this.backupDir}/backup_$(date +%Y%m%d_%H%M%S) \
--oplog \
--gzip \
--readPreference=secondary
`;
print(`执行命令: ${backupCmd}`);
// 记录备份元数据
const backupMetadata = {
backup_time: new Date().toISOString(),
source_node: nodeHost,
source_type: backupNode.type,
replica_set: this.replicaSetName,
oplog_included: true
};
print("备份元数据:", JSON.stringify(backupMetadata, null, 2));
return true;
}
// 检查oplog窗口是否足够
checkOplogWindow() {
const localDb = db.getSiblingDB("local");
const oplog = localDb.oplog.rs;
// 获取最早的oplog时间
const firstOplog = oplog.find().sort({ $natural: 1 }).limit(1).next();
const lastOplog = oplog.find().sort({ $natural: -1 }).limit(1).next();
const firstTime = new Date(firstOplog.ts.t * 1000);
const lastTime = new Date(lastOplog.ts.t * 1000);
const oplogHours = (lastTime - firstTime) / 1000 / 3600;
print("=== Oplog窗口检查 ===");
print(`最早oplog时间: ${firstTime.toISOString()}`);
print(`最新oplog时间: ${lastTime.toISOString()}`);
print(`oplog窗口: ${oplogHours.toFixed(2)} 小时`);
// 检查是否足够支持备份
const estimatedBackupTime = 2; // 假设备份需要2小时
if (oplogHours < estimatedBackupTime * 2) {
print(`⚠️ 警告: oplog窗口 (${oplogHours.toFixed(2)}小时) 可能不足以完成备份`);
print("建议: 增加oplog大小或缩短备份间隔");
} else {
print("✓ oplog窗口充足");
}
return oplogHours;
}
// 从副本集备份恢复
restoreToReplicaSet(backupPath, targetReplicaSet) {
print("=== 副本集恢复注意事项 ===");
print("1. 恢复时使用 --oplogReplay 参数");
print("2. 恢复后需要重新初始化副本集配置");
print("3. 如果恢复整个副本集,需要重建所有节点");
const restoreCommand = `
mongorestore \
--host ${targetReplicaSet} \
--oplogReplay \
--drop \
${backupPath}
`;
print(`恢复命令: ${restoreCommand}`);
return true;
}
}
// 使用示例
const replBackup = new ReplicaSetBackup("rs0", "/backup/mongodb");
replBackup.checkOplogWindow();
replBackup.performBackupOnSecondary();
3.3 配置隐藏节点用于备份
javascript
// configure_hidden_node.js
// 配置隐藏节点专门用于备份
// 1. 添加隐藏节点到副本集
function addHiddenNode(host, port) {
const config = rs.conf();
config.members.push({
_id: config.members.length,
host: `${host}:${port}`,
priority: 0, // 永远不会成为主节点
hidden: true, // 隐藏节点,应用不可见
votes: 0, // 不参与选举
slaveDelay: 0, // 无延迟
buildIndexes: true
});
// 重新配置副本集
rs.reconfig(config);
print(`已添加隐藏节点: ${host}:${port}`);
}
// 2. 查看隐藏节点状态
function checkHiddenNodeStatus() {
const status = rs.status();
print("=== 隐藏节点状态 ===");
status.members.forEach(member => {
if (member.priority === 0 && member.hidden === true) {
print(`节点: ${member.name}`);
print(` 状态: ${member.stateStr}`);
print(` 延迟: ${member.pingMs}ms`);
print(` 是否隐藏: ${member.hidden}`);
print("---");
}
});
}
// 3. 在隐藏节点上执行备份脚本
function backupFromHiddenNode(backupPath) {
// 获取隐藏节点地址
const status = rs.status();
const hiddenNode = status.members.find(m => m.priority === 0 && m.hidden === true);
if (!hiddenNode) {
print("未找到隐藏节点");
return false;
}
print(`在隐藏节点 ${hiddenNode.name} 上执行备份`);
// 备份命令(使用 readPreference 确保从隐藏节点读取)
// mongodump --host ${hiddenNode.name} --out ${backupPath} --oplog --gzip
return true;
}
// 使用示例
// addHiddenNode("backup-server", 27017);
// checkHiddenNodeStatus();
3.4 副本集恢复注意事项
javascript
// replica_set_recovery.js
// 副本集恢复脚本
class ReplicaSetRecovery {
constructor(backupPath) {
this.backupPath = backupPath;
}
// 恢复单个节点到副本集
async recoverSingleNode(nodeHost, isPrimary = false) {
print(`恢复节点: ${nodeHost}`);
// 1. 停止MongoDB服务
print("停止MongoDB服务...");
// systemctl stop mongod
// 2. 清空数据目录
print("清空数据目录...");
// rm -rf /var/lib/mongodb/*
// 3. 恢复数据
print("恢复数据...");
const restoreCmd = `mongorestore --host ${nodeHost} --oplogReplay --drop ${this.backupPath}`;
print(`执行: ${restoreCmd}`);
// 4. 启动MongoDB
print("启动MongoDB服务...");
// systemctl start mongod
// 5. 如果是主节点,重新初始化副本集
if (isPrimary) {
print("重新初始化副本集配置...");
// rs.initiate()
}
print(`节点 ${nodeHost} 恢复完成`);
}
// 恢复整个副本集
recoverFullReplicaSet(nodes) {
print("=== 恢复整个副本集 ===");
print("步骤:");
print("1. 停止所有副本集节点");
print("2. 在所有节点上恢复数据");
print("3. 启动所有节点");
print("4. 等待自动同步完成");
for (const node of nodes) {
this.recoverSingleNode(node.host, node.isPrimary);
}
}
// 验证恢复后的副本集
verifyRecovery() {
print("=== 验证副本集恢复 ===");
const status = rs.status();
// 检查成员数量
const expectedMembers = 3;
if (status.members.length !== expectedMembers) {
print(`⚠️ 成员数量不正确: 期望 ${expectedMembers}, 实际 ${status.members.length}`);
}
// 检查主节点
const primary = status.members.find(m => m.state === 1);
if (primary) {
print(`✓ 主节点: ${primary.name}`);
} else {
print("✗ 未找到主节点");
}
// 检查从节点同步
const secondaries = status.members.filter(m => m.state === 2);
print(`从节点数量: ${secondaries.length}`);
for (const secondary of secondaries) {
const lag = (primary.optime.ts - secondary.optime.ts) / 1000;
print(` ${secondary.name}: 延迟 ${lag.toFixed(2)} 秒`);
}
}
}
// 使用示例
// const recovery = new ReplicaSetRecovery("/backup/mongodb/backup_20240101");
// recovery.recoverSingleNode("secondary1.example.com:27017");
四、分片集群的特殊注意事项
4.1 核心知识点
分片集群备份的关键考虑因素:
- 组件完整性:必须备份所有分片、配置服务器和mongos
- 一致性保证:需要在备份期间停止平衡器和写入
- 配置服务器:备份CSRS(配置服务器副本集)中的元数据
- 时间点恢复:需要协调所有组件的oplog
4.2 分片集群备份完整流程
javascript
// sharded_cluster_backup.js
// 分片集群备份脚本
class ShardedClusterBackup {
constructor(clusterConfig) {
this.configServers = clusterConfig.configServers;
this.shards = clusterConfig.shards;
this.mongosHost = clusterConfig.mongosHost;
this.backupDir = clusterConfig.backupDir;
}
// 1. 停止平衡器
stopBalancer() {
print("停止平衡器...");
// 连接到mongos执行
const mongos = new Mongo(this.mongosHost);
const configDb = mongos.getDB("config");
// 停止平衡器
const result = mongos.adminCommand({ balancerStop: 1 });
if (result.ok) {
print("✓ 平衡器已停止");
} else {
print("✗ 停止平衡器失败");
return false;
}
// 等待平衡器完成当前轮次
let isRunning = true;
while (isRunning) {
const status = mongos.adminCommand({ balancerStatus: 1 });
isRunning = status.mode === "full";
if (isRunning) {
print("等待平衡器完成当前操作...");
sleep(5000);
}
}
return true;
}
// 2. 锁定所有分片
lockAllShards() {
print("锁定所有分片以防止写入...");
for (const shard of this.shards) {
print(`锁定分片: ${shard.name}`);
const shardConn = new Mongo(shard.host);
try {
// 获取主节点并锁定
const adminDb = shardConn.getDB("admin");
const lockResult = adminDb.runCommand({ fsync: 1, lock: 1 });
if (lockResult.ok) {
print(` ✓ ${shard.name} 已锁定`);
} else {
print(` ✗ ${shard.name} 锁定失败`);
return false;
}
} catch(e) {
print(` ✗ 锁定 ${shard.name} 时出错: ${e.message}`);
return false;
}
}
return true;
}
// 3. 备份配置服务器
backupConfigServers() {
print("备份配置服务器...");
// 配置服务器使用副本集,备份其中一个节点即可
const configPrimary = this.configServers[0]; // 获取主节点
const backupPath = `${this.backupDir}/config_$(date +%Y%m%d_%H%M%S)`;
const backupCmd = `
mongodump \
--host ${configPrimary} \
--db config \
--out ${backupPath} \
--gzip
`;
print(`执行: ${backupCmd}`);
print("配置服务器备份完成");
return backupPath;
}
// 4. 备份每个分片
backupAllShards() {
print("备份所有分片...");
const shardBackups = [];
for (const shard of this.shards) {
print(`备份分片: ${shard.name}`);
const backupPath = `${this.backupDir}/shard_${shard.name}_$(date +%Y%m%d_%H%M%S)`;
// 使用文件系统快照或mongodump
// 对于大分片,推荐使用文件系统快照
const backupCmd = `
mongodump \
--host ${shard.host} \
--out ${backupPath} \
--oplog \
--gzip
`;
print(`执行: ${backupCmd}`);
shardBackups.push({
shard: shard.name,
path: backupPath,
host: shard.host
});
}
return shardBackups;
}
// 5. 解锁所有分片
unlockAllShards() {
print("解锁所有分片...");
for (const shard of this.shards) {
print(`解锁分片: ${shard.name}`);
const shardConn = new Mongo(shard.host);
try {
const adminDb = shardConn.getDB("admin");
const unlockResult = adminDb.runCommand({ fsyncUnlock: 1 });
if (unlockResult.ok) {
print(` ✓ ${shard.name} 已解锁`);
} else {
print(` ✗ ${shard.name} 解锁失败`);
}
} catch(e) {
print(` 解锁 ${shard.name} 时出错: ${e.message}`);
}
}
return true;
}
// 6. 重启平衡器
startBalancer() {
print("重启平衡器...");
const mongos = new Mongo(this.mongosHost);
const result = mongos.adminCommand({ balancerStart: 1 });
if (result.ok) {
print("✓ 平衡器已重启");
} else {
print("✗ 重启平衡器失败");
}
return result.ok;
}
// 执行完整备份流程
performFullBackup() {
print("=".repeat(60));
print("开始分片集群完整备份");
print("=".repeat(60));
// 1. 停止平衡器
if (!this.stopBalancer()) {
print("备份中止:无法停止平衡器");
return false;
}
// 2. 锁定分片
if (!this.lockAllShards()) {
print("备份中止:无法锁定分片");
this.startBalancer(); // 恢复平衡器
return false;
}
try {
// 3. 备份配置服务器
const configBackup = this.backupConfigServers();
// 4. 备份所有分片
const shardBackups = this.backupAllShards();
// 5. 创建备份元数据
const metadata = {
backup_time: new Date().toISOString(),
type: "full_cluster",
config_backup: configBackup,
shard_backups: shardBackups,
mongos: this.mongosHost
};
// 保存元数据
const fs = require('fs');
fs.writeFileSync(
`${this.backupDir}/backup_metadata_${Date.now()}.json`,
JSON.stringify(metadata, null, 2)
);
print("\n✓ 分片集群备份完成!");
print(`备份目录: ${this.backupDir}`);
} finally {
// 6. 解锁分片
this.unlockAllShards();
// 7. 重启平衡器
this.startBalancer();
}
return true;
}
}
// 使用示例
const clusterConfig = {
configServers: ["cfg1.example.com:27019", "cfg2.example.com:27019", "cfg3.example.com:27019"],
shards: [
{ name: "shard01", host: "shard01.example.com:27018" },
{ name: "shard02", host: "shard02.example.com:27018" },
{ name: "shard03", host: "shard03.example.com:27018" }
],
mongosHost: "mongos.example.com:27017",
backupDir: "/backup/mongodb/sharded_cluster"
};
const clusterBackup = new ShardedClusterBackup(clusterConfig);
clusterBackup.performFullBackup();
4.3 使用文件系统快照备份分片集群
bash
#!/bin/bash
# sharded_cluster_snapshot_backup.sh
# 使用文件系统快照备份分片集群
#!/bin/bash
# ==================== 配置 ====================
MONGO_HOST="mongos.example.com"
CONFIG_SERVERS=("cfg1:27019" "cfg2:27019" "cfg3:27019")
SHARDS=("shard1:27018" "shard2:27018" "shard3:27018")
BACKUP_BASE_DIR="/backup/mongodb/snapshots"
DATE=$(date +%Y%m%d_%H%M%S)
LOG_FILE="/var/log/mongodb/sharded_backup.log"
# ==================== 日志函数 ====================
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a ${LOG_FILE}
}
# ==================== 备份函数 ====================
# 1. 停止平衡器
stop_balancer() {
log "停止平衡器..."
mongosh --host ${MONGO_HOST} --quiet --eval "
sh.stopBalancer();
while (sh.isBalancerRunning()) {
print('等待平衡器停止...');
sleep(1000);
}
print('平衡器已停止');
"
}
# 2. 锁定集群
lock_cluster() {
log "锁定分片集群..."
# 锁定配置服务器
for cfg in "${CONFIG_SERVERS[@]}"; do
log "锁定配置服务器: ${cfg}"
mongosh --host ${cfg} --quiet --eval "db.adminCommand({fsync:1, lock:1})"
done
# 锁定每个分片
for shard in "${SHARDS[@]}"; do
log "锁定分片: ${shard}"
mongosh --host ${shard} --quiet --eval "db.adminCommand({fsync:1, lock:1})"
done
log "集群已锁定"
}
# 3. 创建快照
create_snapshots() {
log "创建快照..."
BACKUP_PATH="${BACKUP_BASE_DIR}/snapshot_${DATE}"
mkdir -p ${BACKUP_PATH}
# 记录快照信息
cat > ${BACKUP_PATH}/snapshot_info.json << EOF
{
"snapshot_id": "snapshot_${DATE}",
"create_time": "$(date -Iseconds)",
"components": {
"config_servers": ${CONFIG_SERVERS[@]},
"shards": ${SHARDS[@]}
}
}
EOF
# 如果是LVM环境,创建LVM快照
# lvcreate -L 50G -s -n mongodb_snapshot_${DATE} /dev/vg_data/mongodb
log "快照创建完成: ${BACKUP_PATH}"
}
# 4. 解锁集群
unlock_cluster() {
log "解锁分片集群..."
for cfg in "${CONFIG_SERVERS[@]}"; do
mongosh --host ${cfg} --quiet --eval "db.adminCommand({fsyncUnlock:1})"
done
for shard in "${SHARDS[@]}"; do
mongosh --host ${shard} --quiet --eval "db.adminCommand({fsyncUnlock:1})"
done
log "集群已解锁"
}
# 5. 重启平衡器
start_balancer() {
log "重启平衡器..."
mongosh --host ${MONGO_HOST} --quiet --eval "sh.startBalancer()"
log "平衡器已重启"
}
# ==================== 主流程 ====================
main() {
log "==================== 分片集群快照备份开始 ===================="
stop_balancer
lock_cluster
create_snapshots
unlock_cluster
start_balancer
log "==================== 分片集群快照备份完成 ===================="
}
# 执行备份
main
4.4 分片集群恢复流程
javascript
// sharded_cluster_restore.js
// 分片集群恢复脚本
class ShardedClusterRestore {
constructor(backupPath, clusterConfig) {
this.backupPath = backupPath;
this.clusterConfig = clusterConfig;
}
// 验证备份完整性
verifyBackup() {
print("=== 验证备份完整性 ===");
// 读取备份元数据
const fs = require('fs');
const metadataFile = `${this.backupPath}/backup_metadata.json`;
if (!fs.existsSync(metadataFile)) {
print("✗ 未找到备份元数据文件");
return false;
}
const metadata = JSON.parse(fs.readFileSync(metadataFile, 'utf8'));
print(`备份时间: ${metadata.backup_time}`);
print(`备份类型: ${metadata.type}`);
print(`配置服务器备份: ${metadata.config_backup}`);
print(`分片备份数量: ${metadata.shard_backups.length}`);
// 验证每个分片备份
for (const shardBackup of metadata.shard_backups) {
print(` 分片 ${shardBackup.shard}: ${shardBackup.path}`);
}
return true;
}
// 恢复配置服务器
restoreConfigServers() {
print("恢复配置服务器...");
// 1. 停止配置服务器
print("停止配置服务器服务...");
// systemctl stop mongod-config
// 2. 清空配置服务器数据目录
print("清空配置服务器数据目录...");
// rm -rf /var/lib/mongodb-config/*
// 3. 恢复数据
const configBackup = `${this.backupPath}/config_*`;
const restoreCmd = `mongorestore --drop ${configBackup}`;
print(`执行: ${restoreCmd}`);
// 4. 重启配置服务器
print("启动配置服务器服务...");
// systemctl start mongod-config
print("配置服务器恢复完成");
}
// 恢复分片
restoreShard(shardName, backupPath) {
print(`恢复分片: ${shardName}`);
// 1. 停止分片服务
// systemctl stop mongod-shard
// 2. 清空数据目录
// rm -rf /var/lib/mongodb-shard/*
// 3. 恢复数据(带oplog重放)
const restoreCmd = `
mongorestore \
--oplogReplay \
--drop \
${backupPath}
`;
print(`执行: ${restoreCmd}`);
// 4. 重启分片服务
// systemctl start mongod-shard
print(`分片 ${shardName} 恢复完成`);
}
// 恢复所有分片
restoreAllShards() {
print("恢复所有分片...");
const fs = require('fs');
const metadata = JSON.parse(fs.readFileSync(`${this.backupPath}/backup_metadata.json`, 'utf8'));
for (const shardBackup of metadata.shard_backups) {
this.restoreShard(shardBackup.shard, shardBackup.path);
}
}
// 验证集群恢复
verifyClusterRestore() {
print("=== 验证分片集群恢复 ===");
// 连接到mongos
const mongos = new Mongo(this.clusterConfig.mongosHost);
// 检查分片状态
const shardStatus = mongos.getDB("config").shards.find().toArray();
print("分片状态:");
shardStatus.forEach(shard => {
print(` ${shard._id}: ${shard.host} - ${shard.state}`);
});
// 检查数据库分布
const databases = mongos.getDB("admin").runCommand({ listDatabases: 1 });
print("\n数据库列表:");
databases.databases.forEach(db => {
print(` ${db.name}: ${(db.sizeOnDisk / 1024 / 1024).toFixed(2)} MB`);
});
// 检查块分布
const chunks = mongos.getDB("config").chunks.aggregate([
{ $group: { _id: "$shard", count: { $sum: 1 } } }
]).toArray();
print("\n块分布:");
chunks.forEach(chunk => {
print(` 分片 ${chunk._id}: ${chunk.count} 个块`);
});
print("✓ 集群验证完成");
}
// 执行完整恢复
performFullRestore() {
print("=".repeat(60));
print("开始分片集群完整恢复");
print("=".repeat(60));
// 1. 验证备份
if (!this.verifyBackup()) {
print("恢复中止:备份验证失败");
return false;
}
// 2. 恢复配置服务器
this.restoreConfigServers();
// 3. 等待配置服务器同步
print("等待配置服务器同步 (30秒)...");
sleep(30000);
// 4. 恢复所有分片
this.restoreAllShards();
// 5. 等待分片同步
print("等待分片同步 (60秒)...");
sleep(60000);
// 6. 验证恢复
this.verifyClusterRestore();
print("\n✓ 分片集群恢复完成!");
return true;
}
}
// 使用示例
const restoreConfig = {
mongosHost: "mongos.example.com:27017",
configServers: ["cfg1:27019", "cfg2:27019", "cfg3:27019"],
shards: ["shard1:27018", "shard2:27018", "shard3:27018"]
};
const restore = new ShardedClusterRestore("/backup/mongodb/sharded_cluster/backup_20240101", restoreConfig);
restore.performFullRestore();
4.5 分片集群备份最佳实践总结
javascript
// sharded_cluster_best_practices.js
// 分片集群备份最佳实践
const ShardedClusterBackupBestPractices = {
// 1. 备份频率建议
backupFrequency: {
config_servers: "每次集群变更后立即备份",
shards: "每日全量备份 + 每小时增量备份",
mongos: "配置文件备份,无需数据备份"
},
// 2. 备份前检查清单
preBackupChecklist: [
"✓ 确认平衡器已停止",
"✓ 确认没有正在进行的块迁移",
"✓ 确认oplog窗口足够",
"✓ 确认有足够的磁盘空间",
"✓ 确认备份节点可用"
],
// 3. 备份后验证清单
postBackupChecklist: [
"✓ 验证备份文件完整性",
"✓ 测试在测试环境恢复",
"✓ 记录备份元数据",
"✓ 上传到异地存储",
"✓ 清理过期备份"
],
// 4. 常见问题及解决方案
commonIssues: {
"备份不一致": "确保使用--oplog参数,并在备份期间停止写入",
"备份时间过长": "使用文件系统快照代替逻辑备份",
"恢复失败": "定期进行恢复演练,验证备份有效性",
"空间不足": "启用压缩,设置合理的保留策略"
},
// 5. 监控指标
monitoringMetrics: [
"备份执行时间",
"备份文件大小",
"oplog窗口大小",
"平衡器状态",
"分片锁定状态"
]
};
print(JSON.stringify(ShardedClusterBackupBestPractices, null, 2));
总结
备份策略选择指南
| 场景 | 推荐备份方法 | 关键注意事项 |
|---|---|---|
| 开发/测试环境 | mongodump逻辑备份 | 简单易用,支持部分恢复 |
| 生产环境-小型 | mongodump + oplog | 支持时间点恢复 |
| 生产环境-大型 | 文件系统快照 | 速度快,影响小 |
| 副本集 | 从节点备份 | 使用--oplog确保一致性 |
| 分片集群 | 快照备份 + 停止平衡器 | 需要协调所有组件 |
关键命令速查
bash
# 逻辑备份
mongodump --out /backup --gzip --oplog
# 逻辑恢复
mongorestore --drop --oplogReplay /backup
# 物理备份(冷)
cp -rp /var/lib/mongodb /backup
# 物理恢复(冷)
cp -rp /backup/mongodb /var/lib/mongodb
# 查看备份状态
mongorestore --dryRun /backup