解决 ClickHouse 备份性能问题:从原生 BACKUP 迁移到 clickhouse-backup 的实战经验

一、工具对比

实际使用场景建议

使用原生 BACKUP 命令的情况:

简单备份需求 - 只需要基本备份功能

小数据量 - 数据量不大,资源消耗可控

测试环境 - 对性能影响不敏感的环境

快速原型 - 快速验证备份方案

使用 clickhouse-backup 的情况:

生产环境 - 对稳定性和性能要求高

大数据量 - TB级别数据需要备份

复杂备份策略 - 需要增量、压缩、加密等高级功能

多环境部署 - 需要统一的备份管理

资源敏感场景 - 不能接受高CPU使用率

二、写入测试数据到ClickHouse

ClickHouse的部署可以查看:ClickHouse部署,100%亲测成功!---单机版&集群版

提前随意插入一些测试数据到ClickHouse实例中,便于后续观察

bash 复制代码
#!/bin/bash

# ClickHouse连接配置
CH_HOST="localhost"
CH_PORT="9000"
CH_USER="root"
CH_PASSWORD="123456"
DATABASE_NAME="test_db"
TABLE_NAME="test_table"

echo "开始快速插入100万条测试数据..."

# 单次插入100万条数据
clickhouse-client \
    --host "$CH_HOST" \
    --port "$CH_PORT" \
    --user "$CH_USER" \
    --password "$CH_PASSWORD" \
    --query "
    CREATE DATABASE IF NOT EXISTS $DATABASE_NAME;
    
    CREATE TABLE IF NOT EXISTS $DATABASE_NAME.$TABLE_NAME
    (
        id UInt64,
        name String,
        age UInt8,
        salary Float64,
        created_date Date,
        timestamp DateTime,
        is_active Bool,
        description String
    ) ENGINE = MergeTree()
    PARTITION BY toYYYYMM(created_date)
    ORDER BY (id, created_date);
    
    INSERT INTO $DATABASE_NAME.$TABLE_NAME 
    SELECT 
        number as id,
        concat('user_', toString(number)) as name,
        (number % 80) + 18 as age,
        (rand() % 100000) / 100.0 + 3000.0 as salary,
        today() - (number % 365) as created_date,
        now() - (number % 86400) as timestamp,
        (number % 2) = 0 as is_active,
        concat('Description for user ', toString(number)) as description
    FROM numbers(1000000);
    
    SELECT 
        '数据插入完成!' as status,
        count(*) as total_rows,
        min(created_date) as earliest_date,
        max(created_date) as latest_date
    FROM $DATABASE_NAME.$TABLE_NAME;
    "

echo "测试数据插入完成!"

每执行一次脚本插入100万条

三、使用BACKUP备份

参照:阿里云ClickHouse数据保护秘籍:本地备份与恢复详解

本地编写备份脚本

bash 复制代码
vim backup.sh
chmod +x backup.sh 
./backup.sh 
Backing up database: default
Backup completed for database: default
Backing up database: test_db

四、使用clickhouse-backup备份

1、安装clickhouse-backup

bash 复制代码
wget https://github.com/Altinity/clickhouse-backup/releases/download/v2.6.39/clickhouse-backup-linux-amd64.tar.gz

tar -xf clickhouse-backup-linux-amd64.tar.gz 

cd build/linux/amd64/
cp clickhouse-backup /usr/local/bin/

mkdir -p /etc/clickhouse-backup

2、编写备份脚本

bash 复制代码
vim backup.sh
#!/bin/bash

set -e

# 配置变量
BACKUP_NAME="clickhouse_backup_$(date +'%Y%m%d_%H%M%S')"
CONFIG_FILE="/etc/clickhouse-backup/config.yml"
BACKUP_DIR="/var/lib/clickhouse/backup"
LOG_DIR="/var/log/clickhouse-backup"
LOG_FILE="$LOG_DIR/${BACKUP_NAME}.log"
RETENTION_DAYS=7

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

# 日志函数
log_info() {
    echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a $LOG_FILE
}

log_warn() {
    echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a $LOG_FILE
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a $LOG_FILE
    exit 1
}

# ClickHouse 客户端函数(修复认证问题)
clickhouse_client() {
    local query="$1"
    clickhouse-client \
        --host="localhost" \
        --port="9000" \
        --user="root" \
        --password="123456" \
        --query="$query" 2>/dev/null || echo "Query failed: $query"
}

# 检查依赖
check_dependencies() {
    log_info "检查系统依赖..."
    
    if ! command -v clickhouse-backup &> /dev/null; then
        log_error "clickhouse-backup 未安装,请先安装"
    fi
    
    if ! command -v clickhouse-client &> /dev/null; then
        log_error "clickhouse-client 未安装,请先安装 ClickHouse"
    fi
    
    log_info "依赖检查完成"
}

# 创建目录结构
create_directories() {
    log_info "创建必要的目录..."
    
    mkdir -p $BACKUP_DIR
    mkdir -p $LOG_DIR
    mkdir -p $(dirname $CONFIG_FILE)
    
    log_info "目录创建完成"
}

# 创建配置文件
create_config() {
    log_info "创建 ClickHouse 备份配置文件..."
    
    cat > $CONFIG_FILE << 'EOF'
# ClickHouse 备份配置文件
general:
  remote_storage: none
  max_file_size: 1GiB
  disable_progress_bar: false
  backups_to_keep_local: 7
  backups_to_keep_remote: 0
  restore_schema_on_cluster: ""
  upload_by_part: true
  download_by_part: true
  check_parts_columns: true

clickhouse:
  username: root
  password: "123456"
  host: localhost
  port: 9000
  skip_tables:
    - "system.*"
    - "information_schema.*"
    - "INFORMATION_SCHEMA.*"
    - "_temporary_and_external_tables.*"
  timeout: 30m
  freeze_by_part: true
  secure: false
  skip_verify: false
  sync_replicated_tables: true
  log_sql_queries: false
  dist_backup_restore: false

backup:
  include_databases: []
  exclude_databases: ["system", "information_schema", "INFORMATION_SCHEMA"]
  include_tables: []
  exclude_tables: []
  skip_empty_databases: true

compression:
  format: tar
  level: 1
EOF

    log_info "配置文件已创建: $CONFIG_FILE"
}

# 检查 ClickHouse 连接
check_clickhouse_connection() {
    log_info "检查 ClickHouse 连接..."
    
    if clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SELECT 1" > /dev/null 2>&1; then
        log_info "ClickHouse 连接成功"
    else
        log_error "无法连接到 ClickHouse,请检查服务状态和认证信息"
    fi
}

# 显示当前数据库状态
show_database_status() {
    log_info "当前数据库状态:"
    
    echo -e "${BLUE}=== 数据库列表 ===${NC}" | tee -a $LOG_FILE
    clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SHOW DATABASES" | tee -a $LOG_FILE
    
    echo -e "\n${BLUE}=== 用户表统计 ===${NC}" | tee -a $LOG_FILE
    clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="
    SELECT 
        database,
        count() as table_count,
        formatReadableSize(sum(total_bytes)) as total_size,
        sum(total_rows) as total_rows
    FROM system.tables 
    WHERE database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA')
    GROUP BY database
    ORDER BY sum(total_bytes) DESC
    " --format Pretty | tee -a $LOG_FILE
}

# 执行备份
perform_backup() {
    log_info "开始创建备份: $BACKUP_NAME"
    
    local start_time=$(date +%s)
    
    # 执行备份
    if clickhouse-backup create --config $CONFIG_FILE $BACKUP_NAME >> $LOG_FILE 2>&1; then
        local end_time=$(date +%s)
        local duration=$((end_time - start_time))
        log_info "✓ 备份创建成功: $BACKUP_NAME (耗时: ${duration}秒)"
    else
        log_error "备份创建失败,请检查日志: $LOG_FILE"
    fi
    
    # 显示备份信息
    log_info "备份详情:"
    clickhouse-backup list $BACKUP_NAME | tee -a $LOG_FILE
}

# 清理旧备份
cleanup_old_backups() {
    log_info "清理超过 ${RETENTION_DAYS} 天的旧备份..."
    
    local backups_to_delete=$(clickhouse-backup list | grep -o 'clickhouse_backup_[0-9_]*' | sort -r | tail -n +$((RETENTION_DAYS + 1)))
    
    if [ -n "$backups_to_delete" ]; then
        echo "$backups_to_delete" | while read backup; do
            log_info "删除旧备份: $backup"
            clickhouse-backup delete local $backup >> $LOG_FILE 2>&1
        done
        log_info "旧备份清理完成"
    else
        log_info "没有需要清理的旧备份"
    fi
}

# 验证备份
verify_backup() {
    log_info "验证备份完整性..."
    
    # 检查备份文件是否存在
    if [ ! -d "$BACKUP_DIR/$BACKUP_NAME" ]; then
        log_error "备份目录不存在: $BACKUP_DIR/$BACKUP_NAME"
    fi
    
    # 检查备份大小
    local backup_size=$(du -sh "$BACKUP_DIR/$BACKUP_NAME" 2>/dev/null | cut -f1)
    if [ -n "$backup_size" ]; then
        log_info "备份大小: $backup_size"
    else
        log_warn "无法获取备份大小"
    fi
    
    log_info "备份验证完成"
}

# 主函数
main() {
    echo -e "${BLUE}========================================${NC}"
    echo -e "${BLUE}    ClickHouse 完整备份脚本${NC}"
    echo -e "${BLUE}========================================${NC}"
    
    # 执行主要流程
    check_dependencies
    create_directories
    
    if [ ! -f "$CONFIG_FILE" ]; then
        create_config
    fi
    
    check_clickhouse_connection
    show_database_status
    perform_backup
    verify_backup
    cleanup_old_backups
    
    echo -e "${BLUE}========================================${NC}"
    log_info "备份流程完成!"
    log_info "备份名称: $BACKUP_NAME"
    log_info "备份位置: $BACKUP_DIR/$BACKUP_NAME"
    log_info "日志文件: $LOG_FILE"
    echo -e "${BLUE}========================================${NC}"
}

# 运行主函数
main "$@"

3、执行备份脚本

bash 复制代码
chmod +x backup.sh
./backup.sh

4、数据恢复

编写数据恢复脚本

bash 复制代码
vim restore.sh 
#!/bin/bash

set -e

# 配置变量
BACKUP_DIR="/var/lib/clickhouse/backup"
CONFIG_FILE="/etc/clickhouse-backup/config.yml"

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

# 日志函数
log_info() {
    echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}

log_warn() {
    echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
    exit 1
}

# 检查依赖
check_dependencies() {
    if ! command -v clickhouse-backup &> /dev/null; then
        log_error "clickhouse-backup 未安装"
    fi
}

# 修复备份列表解析函数
list_backups() {
    log_info "可用的备份文件:"
    echo -e "${BLUE}========================================${NC}"
    
    # 使用更可靠的方法获取备份列表
    local backup_list=$(clickhouse-backup list 2>/dev/null)
    
    if [ -z "$backup_list" ]; then
        log_error "没有找到可用的备份文件"
    fi
    
    # 提取备份名称(第一列)
    local backups=($(echo "$backup_list" | awk '{print $1}' | grep -E '^[a-zA-Z]'))
    
    if [ ${#backups[@]} -eq 0 ]; then
        log_error "无法解析备份列表"
    fi
    
    local count=1
    for backup in "${backups[@]}"; do
        # 获取备份的详细信息
        local backup_info=$(clickhouse-backup list "$backup" 2>/dev/null | head -1)
        echo -e "${GREEN}$count${NC}. $backup_info"
        ((count++))
    done
    
    echo -e "${BLUE}========================================${NC}"
    
    # 返回备份数组
    BACKUP_ARRAY=("${backups[@]}")
}

# 选择备份
select_backup() {
    if [ ${#BACKUP_ARRAY[@]} -eq 0 ]; then
        log_error "没有可用的备份"
    fi
    
    echo "请选择要恢复的备份 (输入数字):"
    
    local count=1
    for backup in "${BACKUP_ARRAY[@]}"; do
        echo "$count) $backup"
        ((count++))
    done
    
    while true; do
        read -p "请输入选择 (1-${#BACKUP_ARRAY[@]}): " choice
        if [[ $choice =~ ^[0-9]+$ ]] && [ $choice -ge 1 ] && [ $choice -le ${#BACKUP_ARRAY[@]} ]; then
            SELECTED_BACKUP="${BACKUP_ARRAY[$((choice-1))]}"
            break
        else
            echo "无效选择,请重新输入"
        fi
    done
    
    log_info "已选择备份: $SELECTED_BACKUP"
}

# 显示备份中的表
show_backup_tables() {
    log_info "备份 $SELECTED_BACKUP 中的表:"
    echo -e "${BLUE}========================================${NC}"
    
    # 方法1: 使用 clickhouse-backup tables 命令
    local tables=$(clickhouse-backup tables "$SELECTED_BACKUP" 2>/dev/null)
    
    if [ -n "$tables" ]; then
        echo "$tables"
        local total_tables=$(echo "$tables" | wc -l)
        echo -e "\n总表数: $total_tables"
    else
        # 方法2: 直接从备份目录读取表信息
        local metadata_dir="$BACKUP_DIR/$SELECTED_BACKUP/metadata"
        
        if [ ! -d "$metadata_dir" ]; then
            log_warn "无法获取备份表信息"
            return 1
        fi
        
        # 查找所有数据库目录
        local databases=$(find "$metadata_dir" -maxdepth 1 -type d ! -path "$metadata_dir" -exec basename {} \; | grep -v -E '^(system|information_schema|INFORMATION_SCHEMA)$')
        
        if [ -z "$databases" ]; then
            log_warn "备份中没有找到用户数据库"
            return 1
        fi
        
        local total_tables=0
        for db in $databases; do
            local db_tables=$(find "$metadata_dir/$db" -name "*.sql" -type f -exec basename {} .sql \;)
            if [ -n "$db_tables" ]; then
                echo -e "${YELLOW}数据库: $db${NC}"
                echo "$db_tables" | while read table; do
                    echo "  - $db.$table"
                    ((total_tables++))
                done
                echo ""
            fi
        done
        
        echo -e "总表数: $total_tables"
    fi
    
    echo -e "${BLUE}========================================${NC}"
}

# 选择恢复模式
select_restore_mode() {
    echo "请选择恢复模式:"
    echo -e "${GREEN}1${NC}. 恢复整个备份"
    echo -e "${GREEN}2${NC}. 恢复特定表"
    echo -e "${GREEN}3${NC}. 仅恢复表结构"
    echo -e "${GREEN}4${NC}. 恢复到新数据库(不覆盖现有数据)"
    
    while true; do
        read -p "请输入选择 (1-4): " mode
        case $mode in
            1)
                RESTORE_MODE="full"
                break
                ;;
            2)
                RESTORE_MODE="tables"
                break
                ;;
            3)
                RESTORE_MODE="schema"
                break
                ;;
            4)
                RESTORE_MODE="newdb"
                break
                ;;
            *)
                echo "无效选择,请重新输入"
                ;;
        esac
    done
}

# 输入要恢复的表
input_tables_to_restore() {
    show_backup_tables
    
    echo ""
    echo "请输入要恢复的表(格式: database.table_name):"
    echo "示例:"
    echo "  - 恢复单个表: test_db.test_table"
    echo "  - 恢复多个表: test_db.table1 test_db.table2"
    echo "  - 使用通配符: test_db.*"
    echo "  - 恢复所有表: all"
    echo ""
    
    read -p "请输入表名: " tables_input
    
    if [ "$tables_input" = "all" ]; then
        RESTORE_TABLES=""
    else
        RESTORE_TABLES="$tables_input"
    fi
}

# 输入新数据库名
input_new_database_name() {
    echo ""
    echo "注意:这将创建一个新的数据库,不会覆盖现有数据"
    read -p "请输入新数据库名称: " new_db
    
    if [ -z "$new_db" ]; then
        log_error "数据库名称不能为空"
    fi
    
    # 检查数据库是否已存在
    if clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SHOW DATABASES LIKE '$new_db'" 2>/dev/null | grep -q "$new_db"; then
        log_warn "数据库 $new_db 已存在"
        read -p "是否继续?可能会覆盖现有表 (y/N): " confirm
        if [ "$confirm" != "y" ] && [ "$confirm" != "Y" ]; then
            exit 0
        fi
    fi
    
    NEW_DATABASE="$new_db"
}

# 确认恢复操作
confirm_restore() {
    echo ""
    echo -e "${YELLOW}=== 恢复操作确认 ===${NC}"
    echo "备份文件: $SELECTED_BACKUP"
    echo "恢复模式: $RESTORE_MODE"
    
    case $RESTORE_MODE in
        "full")
            echo "恢复内容: 完整备份(所有数据库和表)"
            echo -e "${RED}警告: 这将覆盖现有数据!${NC}"
            ;;
        "tables")
            if [ -z "$RESTORE_TABLES" ] || [ "$RESTORE_TABLES" = "all" ]; then
                echo "恢复表: 所有表"
            else
                echo "恢复表: $RESTORE_TABLES"
            fi
            ;;
        "schema")
            echo "恢复内容: 仅表结构"
            ;;
        "newdb")
            echo "新数据库: $NEW_DATABASE"
            ;;
    esac
    
    read -p "是否继续恢复操作? (y/N): " confirm
    
    if [ "$confirm" != "y" ] && [ "$confirm" != "Y" ]; then
        log_info "恢复操作已取消"
        exit 0
    fi
}

# 执行恢复操作
perform_restore() {
    log_info "开始执行恢复操作..."
    local start_time=$(date +%s)
    
    case $RESTORE_MODE in
        "full")
            log_info "执行完整恢复..."
            clickhouse-backup restore "$SELECTED_BACKUP"
            ;;
        "tables")
            if [ -z "$RESTORE_TABLES" ] || [ "$RESTORE_TABLES" = "all" ]; then
                log_info "恢复所有表..."
                clickhouse-backup restore "$SELECTED_BACKUP"
            else
                log_info "恢复指定表: $RESTORE_TABLES"
                clickhouse-backup restore --table "$RESTORE_TABLES" "$SELECTED_BACKUP"
            fi
            ;;
        "schema")
            log_info "仅恢复表结构..."
            clickhouse-backup restore --schema "$SELECTED_BACKUP"
            ;;
        "newdb")
            log_info "恢复到新数据库: $NEW_DATABASE"
            # 从备份中获取原数据库名
            local original_db=$(find "$BACKUP_DIR/$SELECTED_BACKUP/metadata" -maxdepth 1 -type d ! -path "$BACKUP_DIR/$SELECTED_BACKUP/metadata" | head -1 | xargs basename)
            if [ -n "$original_db" ] && [ "$original_db" != "system" ]; then
                clickhouse-backup restore --rm=false --database-mapping "$original_db:$NEW_DATABASE" "$SELECTED_BACKUP"
            else
                log_error "无法确定原数据库名"
            fi
            ;;
    esac
    
    local end_time=$(date +%s)
    local duration=$((end_time - start_time))
    log_info "✓ 恢复操作完成 (耗时: ${duration}秒)"
}

# 验证恢复结果
verify_restore() {
    log_info "验证恢复结果..."
    
    case $RESTORE_MODE in
        "full"|"tables")
            log_info "恢复完成,请手动验证数据完整性"
            ;;
        "schema")
            log_info "表结构恢复完成,请验证表结构"
            ;;
        "newdb")
            if clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SHOW DATABASES LIKE '$NEW_DATABASE'" 2>/dev/null | grep -q "$NEW_DATABASE"; then
                local table_count=$(clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SHOW TABLES FROM $NEW_DATABASE" 2>/dev/null | wc -l)
                log_info "✓ 新数据库 $NEW_DATABASE 创建成功,包含 $table_count 个表"
            else
                log_error "新数据库创建失败"
            fi
            ;;
    esac
}

# 主函数
main() {
    echo -e "${BLUE}========================================${NC}"
    echo -e "${BLUE}    ClickHouse 数据恢复脚本${NC}"
    echo -e "${BLUE}========================================${NC}"
    
    # 执行主要流程
    check_dependencies
    list_backups
    select_backup
    select_restore_mode
    
    # 根据模式处理
    case $RESTORE_MODE in
        "tables")
            input_tables_to_restore
            ;;
        "newdb")
            input_new_database_name
            ;;
    esac
    
    # 确认并执行恢复
    confirm_restore
    perform_restore
    verify_restore
    
    echo -e "${BLUE}========================================${NC}"
    log_info "恢复流程完成!"
    echo -e "${BLUE}========================================${NC}"
}

# 运行主函数
main "$@"
相关推荐
wending-Y1 小时前
clickhouse 物化视图数据查询不稳定分析
clickhouse
l1t3 天前
PostgreSQL pg_clickhouse插件的安装和使用
数据库·clickhouse·postgresql·插件
honder试试4 天前
Springboot实现Clickhouse连接池的配置和接口查询
spring boot·后端·clickhouse
Mr_wilson_liu4 天前
通过DBeaver22.0.5 连接数据库ck(clickhouse)、pg(postgres)
数据库·clickhouse
波波仔866 天前
clickhouse表存储引擎
clickhouse·表存储引擎
波波仔866 天前
clickhouse存储和分区
clickhouse·排序·分区
波波仔866 天前
clickhouse insert与update区别
clickhouse·insert·update
波波仔866 天前
clickhouse简介
数据库·clickhouse
深色風信子6 天前
ClickHouse 快速入门
clickhouse·列式存储
波波仔866 天前
行存储与列存储的区别
数据库·clickhouse·行存储·列储存