一、工具对比

实际使用场景建议
使用原生 BACKUP 命令的情况:
简单备份需求 - 只需要基本备份功能
小数据量 - 数据量不大,资源消耗可控
测试环境 - 对性能影响不敏感的环境
快速原型 - 快速验证备份方案
使用 clickhouse-backup 的情况:
生产环境 - 对稳定性和性能要求高
大数据量 - TB级别数据需要备份
复杂备份策略 - 需要增量、压缩、加密等高级功能
多环境部署 - 需要统一的备份管理
资源敏感场景 - 不能接受高CPU使用率
二、写入测试数据到ClickHouse
ClickHouse的部署可以查看:ClickHouse部署,100%亲测成功!---单机版&集群版
提前随意插入一些测试数据到ClickHouse实例中,便于后续观察
bash
#!/bin/bash
# ClickHouse连接配置
CH_HOST="localhost"
CH_PORT="9000"
CH_USER="root"
CH_PASSWORD="123456"
DATABASE_NAME="test_db"
TABLE_NAME="test_table"
echo "开始快速插入100万条测试数据..."
# 单次插入100万条数据
clickhouse-client \
--host "$CH_HOST" \
--port "$CH_PORT" \
--user "$CH_USER" \
--password "$CH_PASSWORD" \
--query "
CREATE DATABASE IF NOT EXISTS $DATABASE_NAME;
CREATE TABLE IF NOT EXISTS $DATABASE_NAME.$TABLE_NAME
(
id UInt64,
name String,
age UInt8,
salary Float64,
created_date Date,
timestamp DateTime,
is_active Bool,
description String
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_date)
ORDER BY (id, created_date);
INSERT INTO $DATABASE_NAME.$TABLE_NAME
SELECT
number as id,
concat('user_', toString(number)) as name,
(number % 80) + 18 as age,
(rand() % 100000) / 100.0 + 3000.0 as salary,
today() - (number % 365) as created_date,
now() - (number % 86400) as timestamp,
(number % 2) = 0 as is_active,
concat('Description for user ', toString(number)) as description
FROM numbers(1000000);
SELECT
'数据插入完成!' as status,
count(*) as total_rows,
min(created_date) as earliest_date,
max(created_date) as latest_date
FROM $DATABASE_NAME.$TABLE_NAME;
"
echo "测试数据插入完成!"
每执行一次脚本插入100万条

三、使用BACKUP备份
参照:阿里云ClickHouse数据保护秘籍:本地备份与恢复详解
本地编写备份脚本
bash
vim backup.sh
chmod +x backup.sh
./backup.sh
Backing up database: default
Backup completed for database: default
Backing up database: test_db
四、使用clickhouse-backup备份
1、安装clickhouse-backup
bash
wget https://github.com/Altinity/clickhouse-backup/releases/download/v2.6.39/clickhouse-backup-linux-amd64.tar.gz
tar -xf clickhouse-backup-linux-amd64.tar.gz
cd build/linux/amd64/
cp clickhouse-backup /usr/local/bin/
mkdir -p /etc/clickhouse-backup
2、编写备份脚本
bash
vim backup.sh
#!/bin/bash
set -e
# 配置变量
BACKUP_NAME="clickhouse_backup_$(date +'%Y%m%d_%H%M%S')"
CONFIG_FILE="/etc/clickhouse-backup/config.yml"
BACKUP_DIR="/var/lib/clickhouse/backup"
LOG_DIR="/var/log/clickhouse-backup"
LOG_FILE="$LOG_DIR/${BACKUP_NAME}.log"
RETENTION_DAYS=7
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# 日志函数
log_info() {
echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a $LOG_FILE
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a $LOG_FILE
}
log_error() {
echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a $LOG_FILE
exit 1
}
# ClickHouse 客户端函数(修复认证问题)
clickhouse_client() {
local query="$1"
clickhouse-client \
--host="localhost" \
--port="9000" \
--user="root" \
--password="123456" \
--query="$query" 2>/dev/null || echo "Query failed: $query"
}
# 检查依赖
check_dependencies() {
log_info "检查系统依赖..."
if ! command -v clickhouse-backup &> /dev/null; then
log_error "clickhouse-backup 未安装,请先安装"
fi
if ! command -v clickhouse-client &> /dev/null; then
log_error "clickhouse-client 未安装,请先安装 ClickHouse"
fi
log_info "依赖检查完成"
}
# 创建目录结构
create_directories() {
log_info "创建必要的目录..."
mkdir -p $BACKUP_DIR
mkdir -p $LOG_DIR
mkdir -p $(dirname $CONFIG_FILE)
log_info "目录创建完成"
}
# 创建配置文件
create_config() {
log_info "创建 ClickHouse 备份配置文件..."
cat > $CONFIG_FILE << 'EOF'
# ClickHouse 备份配置文件
general:
remote_storage: none
max_file_size: 1GiB
disable_progress_bar: false
backups_to_keep_local: 7
backups_to_keep_remote: 0
restore_schema_on_cluster: ""
upload_by_part: true
download_by_part: true
check_parts_columns: true
clickhouse:
username: root
password: "123456"
host: localhost
port: 9000
skip_tables:
- "system.*"
- "information_schema.*"
- "INFORMATION_SCHEMA.*"
- "_temporary_and_external_tables.*"
timeout: 30m
freeze_by_part: true
secure: false
skip_verify: false
sync_replicated_tables: true
log_sql_queries: false
dist_backup_restore: false
backup:
include_databases: []
exclude_databases: ["system", "information_schema", "INFORMATION_SCHEMA"]
include_tables: []
exclude_tables: []
skip_empty_databases: true
compression:
format: tar
level: 1
EOF
log_info "配置文件已创建: $CONFIG_FILE"
}
# 检查 ClickHouse 连接
check_clickhouse_connection() {
log_info "检查 ClickHouse 连接..."
if clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SELECT 1" > /dev/null 2>&1; then
log_info "ClickHouse 连接成功"
else
log_error "无法连接到 ClickHouse,请检查服务状态和认证信息"
fi
}
# 显示当前数据库状态
show_database_status() {
log_info "当前数据库状态:"
echo -e "${BLUE}=== 数据库列表 ===${NC}" | tee -a $LOG_FILE
clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SHOW DATABASES" | tee -a $LOG_FILE
echo -e "\n${BLUE}=== 用户表统计 ===${NC}" | tee -a $LOG_FILE
clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="
SELECT
database,
count() as table_count,
formatReadableSize(sum(total_bytes)) as total_size,
sum(total_rows) as total_rows
FROM system.tables
WHERE database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA')
GROUP BY database
ORDER BY sum(total_bytes) DESC
" --format Pretty | tee -a $LOG_FILE
}
# 执行备份
perform_backup() {
log_info "开始创建备份: $BACKUP_NAME"
local start_time=$(date +%s)
# 执行备份
if clickhouse-backup create --config $CONFIG_FILE $BACKUP_NAME >> $LOG_FILE 2>&1; then
local end_time=$(date +%s)
local duration=$((end_time - start_time))
log_info "✓ 备份创建成功: $BACKUP_NAME (耗时: ${duration}秒)"
else
log_error "备份创建失败,请检查日志: $LOG_FILE"
fi
# 显示备份信息
log_info "备份详情:"
clickhouse-backup list $BACKUP_NAME | tee -a $LOG_FILE
}
# 清理旧备份
cleanup_old_backups() {
log_info "清理超过 ${RETENTION_DAYS} 天的旧备份..."
local backups_to_delete=$(clickhouse-backup list | grep -o 'clickhouse_backup_[0-9_]*' | sort -r | tail -n +$((RETENTION_DAYS + 1)))
if [ -n "$backups_to_delete" ]; then
echo "$backups_to_delete" | while read backup; do
log_info "删除旧备份: $backup"
clickhouse-backup delete local $backup >> $LOG_FILE 2>&1
done
log_info "旧备份清理完成"
else
log_info "没有需要清理的旧备份"
fi
}
# 验证备份
verify_backup() {
log_info "验证备份完整性..."
# 检查备份文件是否存在
if [ ! -d "$BACKUP_DIR/$BACKUP_NAME" ]; then
log_error "备份目录不存在: $BACKUP_DIR/$BACKUP_NAME"
fi
# 检查备份大小
local backup_size=$(du -sh "$BACKUP_DIR/$BACKUP_NAME" 2>/dev/null | cut -f1)
if [ -n "$backup_size" ]; then
log_info "备份大小: $backup_size"
else
log_warn "无法获取备份大小"
fi
log_info "备份验证完成"
}
# 主函数
main() {
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE} ClickHouse 完整备份脚本${NC}"
echo -e "${BLUE}========================================${NC}"
# 执行主要流程
check_dependencies
create_directories
if [ ! -f "$CONFIG_FILE" ]; then
create_config
fi
check_clickhouse_connection
show_database_status
perform_backup
verify_backup
cleanup_old_backups
echo -e "${BLUE}========================================${NC}"
log_info "备份流程完成!"
log_info "备份名称: $BACKUP_NAME"
log_info "备份位置: $BACKUP_DIR/$BACKUP_NAME"
log_info "日志文件: $LOG_FILE"
echo -e "${BLUE}========================================${NC}"
}
# 运行主函数
main "$@"
3、执行备份脚本
bash
chmod +x backup.sh
./backup.sh
4、数据恢复
编写数据恢复脚本
bash
vim restore.sh
#!/bin/bash
set -e
# 配置变量
BACKUP_DIR="/var/lib/clickhouse/backup"
CONFIG_FILE="/etc/clickhouse-backup/config.yml"
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# 日志函数
log_info() {
echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
exit 1
}
# 检查依赖
check_dependencies() {
if ! command -v clickhouse-backup &> /dev/null; then
log_error "clickhouse-backup 未安装"
fi
}
# 修复备份列表解析函数
list_backups() {
log_info "可用的备份文件:"
echo -e "${BLUE}========================================${NC}"
# 使用更可靠的方法获取备份列表
local backup_list=$(clickhouse-backup list 2>/dev/null)
if [ -z "$backup_list" ]; then
log_error "没有找到可用的备份文件"
fi
# 提取备份名称(第一列)
local backups=($(echo "$backup_list" | awk '{print $1}' | grep -E '^[a-zA-Z]'))
if [ ${#backups[@]} -eq 0 ]; then
log_error "无法解析备份列表"
fi
local count=1
for backup in "${backups[@]}"; do
# 获取备份的详细信息
local backup_info=$(clickhouse-backup list "$backup" 2>/dev/null | head -1)
echo -e "${GREEN}$count${NC}. $backup_info"
((count++))
done
echo -e "${BLUE}========================================${NC}"
# 返回备份数组
BACKUP_ARRAY=("${backups[@]}")
}
# 选择备份
select_backup() {
if [ ${#BACKUP_ARRAY[@]} -eq 0 ]; then
log_error "没有可用的备份"
fi
echo "请选择要恢复的备份 (输入数字):"
local count=1
for backup in "${BACKUP_ARRAY[@]}"; do
echo "$count) $backup"
((count++))
done
while true; do
read -p "请输入选择 (1-${#BACKUP_ARRAY[@]}): " choice
if [[ $choice =~ ^[0-9]+$ ]] && [ $choice -ge 1 ] && [ $choice -le ${#BACKUP_ARRAY[@]} ]; then
SELECTED_BACKUP="${BACKUP_ARRAY[$((choice-1))]}"
break
else
echo "无效选择,请重新输入"
fi
done
log_info "已选择备份: $SELECTED_BACKUP"
}
# 显示备份中的表
show_backup_tables() {
log_info "备份 $SELECTED_BACKUP 中的表:"
echo -e "${BLUE}========================================${NC}"
# 方法1: 使用 clickhouse-backup tables 命令
local tables=$(clickhouse-backup tables "$SELECTED_BACKUP" 2>/dev/null)
if [ -n "$tables" ]; then
echo "$tables"
local total_tables=$(echo "$tables" | wc -l)
echo -e "\n总表数: $total_tables"
else
# 方法2: 直接从备份目录读取表信息
local metadata_dir="$BACKUP_DIR/$SELECTED_BACKUP/metadata"
if [ ! -d "$metadata_dir" ]; then
log_warn "无法获取备份表信息"
return 1
fi
# 查找所有数据库目录
local databases=$(find "$metadata_dir" -maxdepth 1 -type d ! -path "$metadata_dir" -exec basename {} \; | grep -v -E '^(system|information_schema|INFORMATION_SCHEMA)$')
if [ -z "$databases" ]; then
log_warn "备份中没有找到用户数据库"
return 1
fi
local total_tables=0
for db in $databases; do
local db_tables=$(find "$metadata_dir/$db" -name "*.sql" -type f -exec basename {} .sql \;)
if [ -n "$db_tables" ]; then
echo -e "${YELLOW}数据库: $db${NC}"
echo "$db_tables" | while read table; do
echo " - $db.$table"
((total_tables++))
done
echo ""
fi
done
echo -e "总表数: $total_tables"
fi
echo -e "${BLUE}========================================${NC}"
}
# 选择恢复模式
select_restore_mode() {
echo "请选择恢复模式:"
echo -e "${GREEN}1${NC}. 恢复整个备份"
echo -e "${GREEN}2${NC}. 恢复特定表"
echo -e "${GREEN}3${NC}. 仅恢复表结构"
echo -e "${GREEN}4${NC}. 恢复到新数据库(不覆盖现有数据)"
while true; do
read -p "请输入选择 (1-4): " mode
case $mode in
1)
RESTORE_MODE="full"
break
;;
2)
RESTORE_MODE="tables"
break
;;
3)
RESTORE_MODE="schema"
break
;;
4)
RESTORE_MODE="newdb"
break
;;
*)
echo "无效选择,请重新输入"
;;
esac
done
}
# 输入要恢复的表
input_tables_to_restore() {
show_backup_tables
echo ""
echo "请输入要恢复的表(格式: database.table_name):"
echo "示例:"
echo " - 恢复单个表: test_db.test_table"
echo " - 恢复多个表: test_db.table1 test_db.table2"
echo " - 使用通配符: test_db.*"
echo " - 恢复所有表: all"
echo ""
read -p "请输入表名: " tables_input
if [ "$tables_input" = "all" ]; then
RESTORE_TABLES=""
else
RESTORE_TABLES="$tables_input"
fi
}
# 输入新数据库名
input_new_database_name() {
echo ""
echo "注意:这将创建一个新的数据库,不会覆盖现有数据"
read -p "请输入新数据库名称: " new_db
if [ -z "$new_db" ]; then
log_error "数据库名称不能为空"
fi
# 检查数据库是否已存在
if clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SHOW DATABASES LIKE '$new_db'" 2>/dev/null | grep -q "$new_db"; then
log_warn "数据库 $new_db 已存在"
read -p "是否继续?可能会覆盖现有表 (y/N): " confirm
if [ "$confirm" != "y" ] && [ "$confirm" != "Y" ]; then
exit 0
fi
fi
NEW_DATABASE="$new_db"
}
# 确认恢复操作
confirm_restore() {
echo ""
echo -e "${YELLOW}=== 恢复操作确认 ===${NC}"
echo "备份文件: $SELECTED_BACKUP"
echo "恢复模式: $RESTORE_MODE"
case $RESTORE_MODE in
"full")
echo "恢复内容: 完整备份(所有数据库和表)"
echo -e "${RED}警告: 这将覆盖现有数据!${NC}"
;;
"tables")
if [ -z "$RESTORE_TABLES" ] || [ "$RESTORE_TABLES" = "all" ]; then
echo "恢复表: 所有表"
else
echo "恢复表: $RESTORE_TABLES"
fi
;;
"schema")
echo "恢复内容: 仅表结构"
;;
"newdb")
echo "新数据库: $NEW_DATABASE"
;;
esac
read -p "是否继续恢复操作? (y/N): " confirm
if [ "$confirm" != "y" ] && [ "$confirm" != "Y" ]; then
log_info "恢复操作已取消"
exit 0
fi
}
# 执行恢复操作
perform_restore() {
log_info "开始执行恢复操作..."
local start_time=$(date +%s)
case $RESTORE_MODE in
"full")
log_info "执行完整恢复..."
clickhouse-backup restore "$SELECTED_BACKUP"
;;
"tables")
if [ -z "$RESTORE_TABLES" ] || [ "$RESTORE_TABLES" = "all" ]; then
log_info "恢复所有表..."
clickhouse-backup restore "$SELECTED_BACKUP"
else
log_info "恢复指定表: $RESTORE_TABLES"
clickhouse-backup restore --table "$RESTORE_TABLES" "$SELECTED_BACKUP"
fi
;;
"schema")
log_info "仅恢复表结构..."
clickhouse-backup restore --schema "$SELECTED_BACKUP"
;;
"newdb")
log_info "恢复到新数据库: $NEW_DATABASE"
# 从备份中获取原数据库名
local original_db=$(find "$BACKUP_DIR/$SELECTED_BACKUP/metadata" -maxdepth 1 -type d ! -path "$BACKUP_DIR/$SELECTED_BACKUP/metadata" | head -1 | xargs basename)
if [ -n "$original_db" ] && [ "$original_db" != "system" ]; then
clickhouse-backup restore --rm=false --database-mapping "$original_db:$NEW_DATABASE" "$SELECTED_BACKUP"
else
log_error "无法确定原数据库名"
fi
;;
esac
local end_time=$(date +%s)
local duration=$((end_time - start_time))
log_info "✓ 恢复操作完成 (耗时: ${duration}秒)"
}
# 验证恢复结果
verify_restore() {
log_info "验证恢复结果..."
case $RESTORE_MODE in
"full"|"tables")
log_info "恢复完成,请手动验证数据完整性"
;;
"schema")
log_info "表结构恢复完成,请验证表结构"
;;
"newdb")
if clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SHOW DATABASES LIKE '$NEW_DATABASE'" 2>/dev/null | grep -q "$NEW_DATABASE"; then
local table_count=$(clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SHOW TABLES FROM $NEW_DATABASE" 2>/dev/null | wc -l)
log_info "✓ 新数据库 $NEW_DATABASE 创建成功,包含 $table_count 个表"
else
log_error "新数据库创建失败"
fi
;;
esac
}
# 主函数
main() {
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE} ClickHouse 数据恢复脚本${NC}"
echo -e "${BLUE}========================================${NC}"
# 执行主要流程
check_dependencies
list_backups
select_backup
select_restore_mode
# 根据模式处理
case $RESTORE_MODE in
"tables")
input_tables_to_restore
;;
"newdb")
input_new_database_name
;;
esac
# 确认并执行恢复
confirm_restore
perform_restore
verify_restore
echo -e "${BLUE}========================================${NC}"
log_info "恢复流程完成!"
echo -e "${BLUE}========================================${NC}"
}
# 运行主函数
main "$@"