在 PyMongo 中使用 compact
命令进行 MongoDB 碎片回收的完整操作指南如下:
一、核心执行方法
python
from pymongo import MongoClient
import time
# 1. 连接到 MongoDB 实例
client = MongoClient("mongodb://username:password@host:27017/dbname?authSource=admin")
# 2. 选择目标数据库和集合
db = client["your_database"]
collection = db["your_collection"]
# 3. 执行 compact 命令
try:
# 执行碎片回收(返回操作ID)
result = db.command("compact", collection.name)
print(f"Compact operation started. Operation ID: {result['operationTime']}")
# 监控操作进度(可选)
operation_id = result["operationTime"]
while True:
current_ops = db.command("currentOp", {"operationTime": operation_id})
if not current_ops.get("inprog", []):
break
print("Compact in progress...")
time.sleep(10)
print("✅ Compact completed successfully!")
except Exception as e:
print(f"❌ Compact failed: {str(e)}")
finally:
client.close()
二、关键参数配置
python
# 添加额外参数(副本集secondary节点需要force)
result = db.command(
"compact",
collection.name,
force=True, # 强制在secondary节点运行
compression={"type": "zlib"}, # 指定压缩算法
paddingFactor=1.1, # 预留空间因子(0-4.0)
maxPaddingBytes=1024, # 最大填充字节
tieredStorage={"useRecycledSpace": True} # Atlas专用
)
三、集群环境操作方案
1. 副本集自动滚动执行
python
rs_members = [
"rs1/mongo1:27017",
"rs1/mongo2:27017",
"rs1/mongo3:27017"
]
for member in rs_members:
member_client = MongoClient(
f"mongodb://user:pass@{member}/admin?replicaSet=rs1"
)
# 检查节点类型
is_primary = member_client.admin.command("isMaster").get("ismaster")
# 降级主节点(每次处理前)
if is_primary:
member_client.admin.command("replSetStepDown", 300) # 降级300秒
try:
db = member_client["your_db"]
db.command("compact", "your_collection", force=True)
print(f"✅ Compact completed on {member}")
except Exception as e:
print(f"❌ Failed on {member}: {str(e)}")
finally:
member_client.close()
2. 分片集群自动处理
python
# 通过Config Server获取分片列表
config_client = MongoClient("mongodb://config_server:27019")
shards = config_client.config.shards.find()
for shard in shards:
shard_name = shard["_id"]
shard_host = shard["host"].split("/")[-1] # 提取主机地址
try:
shard_client = MongoClient(f"mongodb://{shard_host}/admin")
# 确认是分片主节点
if shard_client.admin.command("isMaster").get("ismaster"):
db = shard_client["your_db"]
db.command("compact", "your_collection")
print(f"✅ Compact on shard {shard_name} completed")
else:
print(f"⚠️ {shard_host} is not primary, skipped")
except Exception as e:
print(f"❌ Shard {shard_name} failed: {str(e)}")
finally:
shard_client.close()
四、操作结果验证
python
# 对比前后存储状态
pre_stats = collection.stats()
# ... compact 执行 ...
post_stats = collection.stats()
print(f"存储优化报告:")
print(f"- 原始大小: {pre_stats['storageSize'] / 1024**2:.2f} MB")
print(f"- 优化后: {post_stats['storageSize'] / 1024**2:.2f} MB")
print(f"- 节省空间: {(pre_stats['storageSize'] - post_stats['storageSize']) / 1024**2:.2f} MB")
print(f"- 碎片率: {100 * (pre_stats['size'] / pre_stats['storageSize'] - 1):.1f}% → "
f"{100 * (post_stats['size'] / post_stats['storageSize'] - 1):.1f}%")
五、安全操作注意事项
-
阻塞机制处理
python# 检查当前操作是否被阻塞 if db.current_op({"command.compact": {"$exists": True}}): print("⚠️ Another compact already running") exit() # 设置超时自动中断 client = MongoClient(connectTimeoutMS=30000, socketTimeoutMS=3600000)
-
磁盘空间保障
python# 检查磁盘空间 disk_stats = client.admin.command("fsInfo") free_space = disk_stats["fsUsedSize"] - disk_stats["fsTotalSize"] coll_size = collection.stats()["storageSize"] if free_space < coll_size * 1.5: print(f"❌ Insufficient disk space. Need {coll_size*1.5} bytes, only {free_space} available") exit()
-
Atlas 云服务专用
python# Atlas需要特殊授权 client = MongoClient(connect_string, authMechanism="MONGODB-AWS") # 使用分层存储API compact_opts = { "tieredStorage": { "useRecycledSpace": True, "reclaimSpace": True } }
六、替代方案实现
无损在线重建方案:
python
def online_recompact(db_name, coll_name):
temp_name = f"{coll_name}_compact_{int(time.time())}"
# 1. 创建临时集合
db.command("create", temp_name)
# 2. 逐步复制数据(避免大事务阻塞)
source = db[coll_name]
dest = db[temp_name]
batch_size = 1000
total_docs = source.count_documents({})
for skip in range(0, total_docs, batch_size):
docs = source.find().skip(skip).limit(batch_size)
dest.insert_many(list(docs))
# 3. 原集合原子替换
source.rename(f"old_{coll_name}", dropTarget=True)
dest.rename(coll_name)
db[f"old_{coll_name}"].drop()
最佳实践总结
-
执行窗口选择
python# 获取当前时间并判断 from datetime import datetime current_hour = datetime.now().hour if 0 <= current_hour < 5: # 凌晨执行 run_compact() else: print("⚠️ Operation declined: Not in maintenance window")
-
定时清理脚本框架
pythonimport schedule import time def weekly_compact(): shard_cluster_compact() # 调用前述集群函数 # 每周日凌晨1点执行 schedule.every().sunday.at("01:00").do(weekly_compact) while True: schedule.run_pending() time.sleep(60)
-
健康检查指标
pythonHEALTH_THRESHOLD = 0.8 # 碎片率阈值 def needs_compact(collection): stats = collection.stats() fragmentation = 1 - (stats["size"] / stats["storageSize"]) return fragmentation > HEALTH_THRESHOLD # 自动检测执行 if needs_compact(collection): run_compact(collection)
关键提示 :在MongoDB Atlas中,建议启用https://docs.atlas.mongodb.com/tiered-storage/替代手动compact。对10GB以上的集合操作时,优先采用`online_recompact`方案确保业务连续性。