python
复制代码
在这#!/usr/bin/python3
# encoding: utf-8
#filename: etcd-backups-restore-compress-defragmentation.py
#author: gaohaixiang
#writetime:202401161055
"""
脚本功能:
etcd 数据备份,使用备份数据进行数据库重构,数据压缩,碎片整理
数据压缩及碎片整理的原因:
etcd数据写入频繁,导致版本不断叠加,从而导致数据库不断变大 \
需要对其进行压缩,进行碎片整理,从而减小etcd数据库的大小
etcd默认的数据存储大小为2G,当超过这个存储大小,可能会限制数据写入 \
或者报错mcc、NOSPACE,除了进行数据压缩碎片整理外,还可以进行参数调整 \
etcd启动添加参数 --quota-backend-bytes ,将etcd存储调整到多少 \
单位为B,10737418240 为10G
etcd启动示例:
/opt/etcd/etcd --quota-backend-bytes=10737418240 --auth-token jwt --config-file=/opt/etcd/nodefile.yml
注意:
备份恢复是选择最新的一个备份进行数据恢复,不是指定固定的备份来进行操作
"""
import json
import subprocess
import datetime
import os
import stat
import getpass
# 命令执行
def run_command(command):
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = process.communicate()
return process.returncode, stdout, stderr
# 目录权限检查
def check_directory_access(directory):
if not os.path.isdir(directory):
# print(f"Directory {directory} does not exist")
# return False
# 创建目录
os.makedirs(directory)
# 更改目录的权限,使得所有用户都可以读取和写入
os.chmod(directory, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
if not os.access(directory, os.R_OK):
#print(f"User {getpass.getuser()} does not have read access to directory {directory}")
#return False
# 更改目录的权限,使得所有用户都可以读取和写入
os.chmod(directory, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
if not os.access(directory, os.W_OK):
#print(f"User {getpass.getuser()} does not have write access to directory {directory}")
#return False
# 更改目录的权限,使得所有用户都可以读取和写入
os.chmod(directory, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
return True
# 备份
def backup_etcd(endpoints, backup_dir):
# 检查备份目录的访问权限
if not check_directory_access(backup_dir):
return False
# 生成备份文件的名称
backup_file = f"{backup_dir}/etcd_backup_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}.db"
# 执行备份操作
backup_command = f"ETCDCTL_API=3 etcdctl --endpoints={endpoints} snapshot save {backup_file}"
ret, stdout, stderr = run_command(backup_command)
if ret != 0:
print(f"Failed to backup etcd. Error: {stderr}")
return False
print(f"Etcd backup completed successfully. Backup file is {backup_file}")
return True
# 备份恢复,etcd重构
def restore_etcd(backup_dir, restore_dir, name, initial_cluster, initial_advertise_peer_urls):
# 检查备份目录和恢复目录的访问权限
if not check_directory_access(backup_dir) or not check_directory_access(restore_dir):
return False
# 选择最新的备份文件
backup_files = [f for f in os.listdir(backup_dir) if os.path.isfile(os.path.join(backup_dir, f))]
backup_files.sort(reverse=True)
if not backup_files:
print("No backup files found")
return False
backup_file = os.path.join(backup_dir, backup_files[0])
# 执行恢复操作
restore_command = f"ETCDCTL_API=3 etcdctl snapshot restore {backup_file} --name {name} --data-dir {restore_dir} --initial-cluster {initial_cluster} --initial-advertise-peer-urls {initial_advertise_peer_urls}"
ret, stdout, stderr = run_command(restore_command)
if ret != 0:
print(f"Failed to restore etcd. Error: {stderr}")
return False
print(f"Etcd restore completed successfully. Restored data is in {restore_dir}")
return True
# 数据压缩及碎片整理
def compact_and_defrag(endpoints):
status_command = f"ETCDCTL_API=3 etcdctl --endpoints={endpoints} endpoint status --write-out=json"
ret, stdout, stderr = run_command(status_command)
if ret != 0:
print(f"Failed to get etcd status. Error: {stderr}")
return
status = json.loads(stdout)
for s in status:
revision = s['Status']['raftIndex']
# 执行压缩操作
compact_command = f"ETCDCTL_API=3 etcdctl --endpoints={endpoints} compact {revision}"
ret, stdout, stderr = run_command(compact_command)
if ret != 0:
print(f"Failed to compact etcd. Error: {stderr}")
return
# 执行碎片整理操作
defrag_command = f"ETCDCTL_API=3 etcdctl --endpoints={endpoints} defrag"
ret, stdout, stderr = run_command(defrag_command)
if ret != 0:
print(f"Failed to defrag etcd. Error: {stderr}")
return
print("Etcd compact and defrag completed successfully")
# etcd数据恢复,配置文件修改及启动
def etcd_start(timenow,restore_dir):
etcdfiledata = """
name: node1
data-dir: %s
listen-client-urls: 'http://192.168.73.10:2380'
advertise-client-urls: 'http://192.168.73.10:2380'
listen-peer-urls: 'http://192.168.73.10:2379'
initial-advertise-peer-urls: 'http://192.168.73.10:2379'
initial-cluster: node1=http://192.168.73.10:2379
initial-cluster-token: etcd-cluster-1
initial-cluster-state: new
""" % restore_dir
etcdfile = "/data/etcd/nodefile%s.yml" % timenow
ff = open(etcdfile,"w")
ff.writelines(etcdfiledata)
ff.close()
# 关闭etcd
etcdKillCommand = "ps -ef |grep etcd|grep 'config-file'|grep -v grep|awk '{print $2}'|xargs kill -9"
ret, stdout, stderr = run_command(etcdKillCommand)
if ret != 0:
print(f"Failed to start etcd. Error: {stderr}")
# 启动etcd
etcdStartCommand = "setsid nohup etcd --config-file=%s >> /data/etcd/etcd%s.log &" % (etcdfile,timenow)
ret = subprocess.Popen(etcdStartCommand, shell=True)
if not ret:
print(f"Failed to start etcd. Error: {stderr}")
def main():
endpoints = "http://192.168.73.10:2379" # 你的 etcd 节点的地址
backup_dir = "/data/etcd/etcddatabak/" # 你的备份目录
timenow = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
restore_dir = "/data/etcd/etcddata%s" % timenow # 你的恢复目录
name = "node1" # 你的 etcd 节点的名称
initial_cluster = "node1=http://192.168.73.10:2379" # 你的初始集群配置
initial_advertise_peer_urls = "http://192.168.73.10:2379" # 你的初始对等广播地址
# # 先进行备份,然后再进行数据压缩,碎片整理
# if backup_etcd(endpoints, backup_dir):
# compact_and_defrag(endpoints)
# 数据备份
backup_etcd(endpoints, backup_dir)
# # 依据最新备份进行数据恢复
# restore_etcd(backup_dir, restore_dir, name, initial_cluster, initial_advertise_peer_urls)
# # 启动etcd
# etcd_start(timenow, restore_dir)
if __name__ == "__main__":
main()