解决 ClickHouse 备份性能问题:从原生 BACKUP 迁移到 clickhouse-backup 的实战经验

一、工具对比

实际使用场景建议

使用原生 BACKUP 命令的情况:

简单备份需求 - 只需要基本备份功能

小数据量 - 数据量不大,资源消耗可控

测试环境 - 对性能影响不敏感的环境

快速原型 - 快速验证备份方案

使用 clickhouse-backup 的情况:

生产环境 - 对稳定性和性能要求高

大数据量 - TB级别数据需要备份

复杂备份策略 - 需要增量、压缩、加密等高级功能

多环境部署 - 需要统一的备份管理

资源敏感场景 - 不能接受高CPU使用率

二、写入测试数据到ClickHouse

ClickHouse的部署可以查看:ClickHouse部署,100%亲测成功!---单机版&集群版

提前随意插入一些测试数据到ClickHouse实例中,便于后续观察

bash 复制代码
#!/bin/bash

# ClickHouse连接配置
CH_HOST="localhost"
CH_PORT="9000"
CH_USER="root"
CH_PASSWORD="123456"
DATABASE_NAME="test_db"
TABLE_NAME="test_table"

echo "开始快速插入100万条测试数据..."

# 单次插入100万条数据
clickhouse-client \
    --host "$CH_HOST" \
    --port "$CH_PORT" \
    --user "$CH_USER" \
    --password "$CH_PASSWORD" \
    --query "
    CREATE DATABASE IF NOT EXISTS $DATABASE_NAME;
    
    CREATE TABLE IF NOT EXISTS $DATABASE_NAME.$TABLE_NAME
    (
        id UInt64,
        name String,
        age UInt8,
        salary Float64,
        created_date Date,
        timestamp DateTime,
        is_active Bool,
        description String
    ) ENGINE = MergeTree()
    PARTITION BY toYYYYMM(created_date)
    ORDER BY (id, created_date);
    
    INSERT INTO $DATABASE_NAME.$TABLE_NAME 
    SELECT 
        number as id,
        concat('user_', toString(number)) as name,
        (number % 80) + 18 as age,
        (rand() % 100000) / 100.0 + 3000.0 as salary,
        today() - (number % 365) as created_date,
        now() - (number % 86400) as timestamp,
        (number % 2) = 0 as is_active,
        concat('Description for user ', toString(number)) as description
    FROM numbers(1000000);
    
    SELECT 
        '数据插入完成!' as status,
        count(*) as total_rows,
        min(created_date) as earliest_date,
        max(created_date) as latest_date
    FROM $DATABASE_NAME.$TABLE_NAME;
    "

echo "测试数据插入完成!"

每执行一次脚本插入100万条

三、使用BACKUP备份

参照:阿里云ClickHouse数据保护秘籍:本地备份与恢复详解

本地编写备份脚本

bash 复制代码
vim backup.sh
chmod +x backup.sh 
./backup.sh 
Backing up database: default
Backup completed for database: default
Backing up database: test_db

四、使用clickhouse-backup备份

1、安装clickhouse-backup

bash 复制代码
wget https://github.com/Altinity/clickhouse-backup/releases/download/v2.6.39/clickhouse-backup-linux-amd64.tar.gz

tar -xf clickhouse-backup-linux-amd64.tar.gz 

cd build/linux/amd64/
cp clickhouse-backup /usr/local/bin/

mkdir -p /etc/clickhouse-backup

2、编写备份脚本

bash 复制代码
vim backup.sh
#!/bin/bash

set -e

# 配置变量
BACKUP_NAME="clickhouse_backup_$(date +'%Y%m%d_%H%M%S')"
CONFIG_FILE="/etc/clickhouse-backup/config.yml"
BACKUP_DIR="/var/lib/clickhouse/backup"
LOG_DIR="/var/log/clickhouse-backup"
LOG_FILE="$LOG_DIR/${BACKUP_NAME}.log"
RETENTION_DAYS=7

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

# 日志函数
log_info() {
    echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a $LOG_FILE
}

log_warn() {
    echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a $LOG_FILE
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a $LOG_FILE
    exit 1
}

# ClickHouse 客户端函数(修复认证问题)
clickhouse_client() {
    local query="$1"
    clickhouse-client \
        --host="localhost" \
        --port="9000" \
        --user="root" \
        --password="123456" \
        --query="$query" 2>/dev/null || echo "Query failed: $query"
}

# 检查依赖
check_dependencies() {
    log_info "检查系统依赖..."
    
    if ! command -v clickhouse-backup &> /dev/null; then
        log_error "clickhouse-backup 未安装,请先安装"
    fi
    
    if ! command -v clickhouse-client &> /dev/null; then
        log_error "clickhouse-client 未安装,请先安装 ClickHouse"
    fi
    
    log_info "依赖检查完成"
}

# 创建目录结构
create_directories() {
    log_info "创建必要的目录..."
    
    mkdir -p $BACKUP_DIR
    mkdir -p $LOG_DIR
    mkdir -p $(dirname $CONFIG_FILE)
    
    log_info "目录创建完成"
}

# 创建配置文件
create_config() {
    log_info "创建 ClickHouse 备份配置文件..."
    
    cat > $CONFIG_FILE << 'EOF'
# ClickHouse 备份配置文件
general:
  remote_storage: none
  max_file_size: 1GiB
  disable_progress_bar: false
  backups_to_keep_local: 7
  backups_to_keep_remote: 0
  restore_schema_on_cluster: ""
  upload_by_part: true
  download_by_part: true
  check_parts_columns: true

clickhouse:
  username: root
  password: "123456"
  host: localhost
  port: 9000
  skip_tables:
    - "system.*"
    - "information_schema.*"
    - "INFORMATION_SCHEMA.*"
    - "_temporary_and_external_tables.*"
  timeout: 30m
  freeze_by_part: true
  secure: false
  skip_verify: false
  sync_replicated_tables: true
  log_sql_queries: false
  dist_backup_restore: false

backup:
  include_databases: []
  exclude_databases: ["system", "information_schema", "INFORMATION_SCHEMA"]
  include_tables: []
  exclude_tables: []
  skip_empty_databases: true

compression:
  format: tar
  level: 1
EOF

    log_info "配置文件已创建: $CONFIG_FILE"
}

# 检查 ClickHouse 连接
check_clickhouse_connection() {
    log_info "检查 ClickHouse 连接..."
    
    if clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SELECT 1" > /dev/null 2>&1; then
        log_info "ClickHouse 连接成功"
    else
        log_error "无法连接到 ClickHouse,请检查服务状态和认证信息"
    fi
}

# 显示当前数据库状态
show_database_status() {
    log_info "当前数据库状态:"
    
    echo -e "${BLUE}=== 数据库列表 ===${NC}" | tee -a $LOG_FILE
    clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SHOW DATABASES" | tee -a $LOG_FILE
    
    echo -e "\n${BLUE}=== 用户表统计 ===${NC}" | tee -a $LOG_FILE
    clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="
    SELECT 
        database,
        count() as table_count,
        formatReadableSize(sum(total_bytes)) as total_size,
        sum(total_rows) as total_rows
    FROM system.tables 
    WHERE database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA')
    GROUP BY database
    ORDER BY sum(total_bytes) DESC
    " --format Pretty | tee -a $LOG_FILE
}

# 执行备份
perform_backup() {
    log_info "开始创建备份: $BACKUP_NAME"
    
    local start_time=$(date +%s)
    
    # 执行备份
    if clickhouse-backup create --config $CONFIG_FILE $BACKUP_NAME >> $LOG_FILE 2>&1; then
        local end_time=$(date +%s)
        local duration=$((end_time - start_time))
        log_info "✓ 备份创建成功: $BACKUP_NAME (耗时: ${duration}秒)"
    else
        log_error "备份创建失败,请检查日志: $LOG_FILE"
    fi
    
    # 显示备份信息
    log_info "备份详情:"
    clickhouse-backup list $BACKUP_NAME | tee -a $LOG_FILE
}

# 清理旧备份
cleanup_old_backups() {
    log_info "清理超过 ${RETENTION_DAYS} 天的旧备份..."
    
    local backups_to_delete=$(clickhouse-backup list | grep -o 'clickhouse_backup_[0-9_]*' | sort -r | tail -n +$((RETENTION_DAYS + 1)))
    
    if [ -n "$backups_to_delete" ]; then
        echo "$backups_to_delete" | while read backup; do
            log_info "删除旧备份: $backup"
            clickhouse-backup delete local $backup >> $LOG_FILE 2>&1
        done
        log_info "旧备份清理完成"
    else
        log_info "没有需要清理的旧备份"
    fi
}

# 验证备份
verify_backup() {
    log_info "验证备份完整性..."
    
    # 检查备份文件是否存在
    if [ ! -d "$BACKUP_DIR/$BACKUP_NAME" ]; then
        log_error "备份目录不存在: $BACKUP_DIR/$BACKUP_NAME"
    fi
    
    # 检查备份大小
    local backup_size=$(du -sh "$BACKUP_DIR/$BACKUP_NAME" 2>/dev/null | cut -f1)
    if [ -n "$backup_size" ]; then
        log_info "备份大小: $backup_size"
    else
        log_warn "无法获取备份大小"
    fi
    
    log_info "备份验证完成"
}

# 主函数
main() {
    echo -e "${BLUE}========================================${NC}"
    echo -e "${BLUE}    ClickHouse 完整备份脚本${NC}"
    echo -e "${BLUE}========================================${NC}"
    
    # 执行主要流程
    check_dependencies
    create_directories
    
    if [ ! -f "$CONFIG_FILE" ]; then
        create_config
    fi
    
    check_clickhouse_connection
    show_database_status
    perform_backup
    verify_backup
    cleanup_old_backups
    
    echo -e "${BLUE}========================================${NC}"
    log_info "备份流程完成!"
    log_info "备份名称: $BACKUP_NAME"
    log_info "备份位置: $BACKUP_DIR/$BACKUP_NAME"
    log_info "日志文件: $LOG_FILE"
    echo -e "${BLUE}========================================${NC}"
}

# 运行主函数
main "$@"

3、执行备份脚本

bash 复制代码
chmod +x backup.sh
./backup.sh

4、数据恢复

编写数据恢复脚本

bash 复制代码
vim restore.sh 
#!/bin/bash

set -e

# 配置变量
BACKUP_DIR="/var/lib/clickhouse/backup"
CONFIG_FILE="/etc/clickhouse-backup/config.yml"

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

# 日志函数
log_info() {
    echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}

log_warn() {
    echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
    exit 1
}

# 检查依赖
check_dependencies() {
    if ! command -v clickhouse-backup &> /dev/null; then
        log_error "clickhouse-backup 未安装"
    fi
}

# 修复备份列表解析函数
list_backups() {
    log_info "可用的备份文件:"
    echo -e "${BLUE}========================================${NC}"
    
    # 使用更可靠的方法获取备份列表
    local backup_list=$(clickhouse-backup list 2>/dev/null)
    
    if [ -z "$backup_list" ]; then
        log_error "没有找到可用的备份文件"
    fi
    
    # 提取备份名称(第一列)
    local backups=($(echo "$backup_list" | awk '{print $1}' | grep -E '^[a-zA-Z]'))
    
    if [ ${#backups[@]} -eq 0 ]; then
        log_error "无法解析备份列表"
    fi
    
    local count=1
    for backup in "${backups[@]}"; do
        # 获取备份的详细信息
        local backup_info=$(clickhouse-backup list "$backup" 2>/dev/null | head -1)
        echo -e "${GREEN}$count${NC}. $backup_info"
        ((count++))
    done
    
    echo -e "${BLUE}========================================${NC}"
    
    # 返回备份数组
    BACKUP_ARRAY=("${backups[@]}")
}

# 选择备份
select_backup() {
    if [ ${#BACKUP_ARRAY[@]} -eq 0 ]; then
        log_error "没有可用的备份"
    fi
    
    echo "请选择要恢复的备份 (输入数字):"
    
    local count=1
    for backup in "${BACKUP_ARRAY[@]}"; do
        echo "$count) $backup"
        ((count++))
    done
    
    while true; do
        read -p "请输入选择 (1-${#BACKUP_ARRAY[@]}): " choice
        if [[ $choice =~ ^[0-9]+$ ]] && [ $choice -ge 1 ] && [ $choice -le ${#BACKUP_ARRAY[@]} ]; then
            SELECTED_BACKUP="${BACKUP_ARRAY[$((choice-1))]}"
            break
        else
            echo "无效选择,请重新输入"
        fi
    done
    
    log_info "已选择备份: $SELECTED_BACKUP"
}

# 显示备份中的表
show_backup_tables() {
    log_info "备份 $SELECTED_BACKUP 中的表:"
    echo -e "${BLUE}========================================${NC}"
    
    # 方法1: 使用 clickhouse-backup tables 命令
    local tables=$(clickhouse-backup tables "$SELECTED_BACKUP" 2>/dev/null)
    
    if [ -n "$tables" ]; then
        echo "$tables"
        local total_tables=$(echo "$tables" | wc -l)
        echo -e "\n总表数: $total_tables"
    else
        # 方法2: 直接从备份目录读取表信息
        local metadata_dir="$BACKUP_DIR/$SELECTED_BACKUP/metadata"
        
        if [ ! -d "$metadata_dir" ]; then
            log_warn "无法获取备份表信息"
            return 1
        fi
        
        # 查找所有数据库目录
        local databases=$(find "$metadata_dir" -maxdepth 1 -type d ! -path "$metadata_dir" -exec basename {} \; | grep -v -E '^(system|information_schema|INFORMATION_SCHEMA)$')
        
        if [ -z "$databases" ]; then
            log_warn "备份中没有找到用户数据库"
            return 1
        fi
        
        local total_tables=0
        for db in $databases; do
            local db_tables=$(find "$metadata_dir/$db" -name "*.sql" -type f -exec basename {} .sql \;)
            if [ -n "$db_tables" ]; then
                echo -e "${YELLOW}数据库: $db${NC}"
                echo "$db_tables" | while read table; do
                    echo "  - $db.$table"
                    ((total_tables++))
                done
                echo ""
            fi
        done
        
        echo -e "总表数: $total_tables"
    fi
    
    echo -e "${BLUE}========================================${NC}"
}

# 选择恢复模式
select_restore_mode() {
    echo "请选择恢复模式:"
    echo -e "${GREEN}1${NC}. 恢复整个备份"
    echo -e "${GREEN}2${NC}. 恢复特定表"
    echo -e "${GREEN}3${NC}. 仅恢复表结构"
    echo -e "${GREEN}4${NC}. 恢复到新数据库(不覆盖现有数据)"
    
    while true; do
        read -p "请输入选择 (1-4): " mode
        case $mode in
            1)
                RESTORE_MODE="full"
                break
                ;;
            2)
                RESTORE_MODE="tables"
                break
                ;;
            3)
                RESTORE_MODE="schema"
                break
                ;;
            4)
                RESTORE_MODE="newdb"
                break
                ;;
            *)
                echo "无效选择,请重新输入"
                ;;
        esac
    done
}

# 输入要恢复的表
input_tables_to_restore() {
    show_backup_tables
    
    echo ""
    echo "请输入要恢复的表(格式: database.table_name):"
    echo "示例:"
    echo "  - 恢复单个表: test_db.test_table"
    echo "  - 恢复多个表: test_db.table1 test_db.table2"
    echo "  - 使用通配符: test_db.*"
    echo "  - 恢复所有表: all"
    echo ""
    
    read -p "请输入表名: " tables_input
    
    if [ "$tables_input" = "all" ]; then
        RESTORE_TABLES=""
    else
        RESTORE_TABLES="$tables_input"
    fi
}

# 输入新数据库名
input_new_database_name() {
    echo ""
    echo "注意:这将创建一个新的数据库,不会覆盖现有数据"
    read -p "请输入新数据库名称: " new_db
    
    if [ -z "$new_db" ]; then
        log_error "数据库名称不能为空"
    fi
    
    # 检查数据库是否已存在
    if clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SHOW DATABASES LIKE '$new_db'" 2>/dev/null | grep -q "$new_db"; then
        log_warn "数据库 $new_db 已存在"
        read -p "是否继续?可能会覆盖现有表 (y/N): " confirm
        if [ "$confirm" != "y" ] && [ "$confirm" != "Y" ]; then
            exit 0
        fi
    fi
    
    NEW_DATABASE="$new_db"
}

# 确认恢复操作
confirm_restore() {
    echo ""
    echo -e "${YELLOW}=== 恢复操作确认 ===${NC}"
    echo "备份文件: $SELECTED_BACKUP"
    echo "恢复模式: $RESTORE_MODE"
    
    case $RESTORE_MODE in
        "full")
            echo "恢复内容: 完整备份(所有数据库和表)"
            echo -e "${RED}警告: 这将覆盖现有数据!${NC}"
            ;;
        "tables")
            if [ -z "$RESTORE_TABLES" ] || [ "$RESTORE_TABLES" = "all" ]; then
                echo "恢复表: 所有表"
            else
                echo "恢复表: $RESTORE_TABLES"
            fi
            ;;
        "schema")
            echo "恢复内容: 仅表结构"
            ;;
        "newdb")
            echo "新数据库: $NEW_DATABASE"
            ;;
    esac
    
    read -p "是否继续恢复操作? (y/N): " confirm
    
    if [ "$confirm" != "y" ] && [ "$confirm" != "Y" ]; then
        log_info "恢复操作已取消"
        exit 0
    fi
}

# 执行恢复操作
perform_restore() {
    log_info "开始执行恢复操作..."
    local start_time=$(date +%s)
    
    case $RESTORE_MODE in
        "full")
            log_info "执行完整恢复..."
            clickhouse-backup restore "$SELECTED_BACKUP"
            ;;
        "tables")
            if [ -z "$RESTORE_TABLES" ] || [ "$RESTORE_TABLES" = "all" ]; then
                log_info "恢复所有表..."
                clickhouse-backup restore "$SELECTED_BACKUP"
            else
                log_info "恢复指定表: $RESTORE_TABLES"
                clickhouse-backup restore --table "$RESTORE_TABLES" "$SELECTED_BACKUP"
            fi
            ;;
        "schema")
            log_info "仅恢复表结构..."
            clickhouse-backup restore --schema "$SELECTED_BACKUP"
            ;;
        "newdb")
            log_info "恢复到新数据库: $NEW_DATABASE"
            # 从备份中获取原数据库名
            local original_db=$(find "$BACKUP_DIR/$SELECTED_BACKUP/metadata" -maxdepth 1 -type d ! -path "$BACKUP_DIR/$SELECTED_BACKUP/metadata" | head -1 | xargs basename)
            if [ -n "$original_db" ] && [ "$original_db" != "system" ]; then
                clickhouse-backup restore --rm=false --database-mapping "$original_db:$NEW_DATABASE" "$SELECTED_BACKUP"
            else
                log_error "无法确定原数据库名"
            fi
            ;;
    esac
    
    local end_time=$(date +%s)
    local duration=$((end_time - start_time))
    log_info "✓ 恢复操作完成 (耗时: ${duration}秒)"
}

# 验证恢复结果
verify_restore() {
    log_info "验证恢复结果..."
    
    case $RESTORE_MODE in
        "full"|"tables")
            log_info "恢复完成,请手动验证数据完整性"
            ;;
        "schema")
            log_info "表结构恢复完成,请验证表结构"
            ;;
        "newdb")
            if clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SHOW DATABASES LIKE '$NEW_DATABASE'" 2>/dev/null | grep -q "$NEW_DATABASE"; then
                local table_count=$(clickhouse-client --host=localhost --port=9000 --user=root --password=123456 --query="SHOW TABLES FROM $NEW_DATABASE" 2>/dev/null | wc -l)
                log_info "✓ 新数据库 $NEW_DATABASE 创建成功,包含 $table_count 个表"
            else
                log_error "新数据库创建失败"
            fi
            ;;
    esac
}

# 主函数
main() {
    echo -e "${BLUE}========================================${NC}"
    echo -e "${BLUE}    ClickHouse 数据恢复脚本${NC}"
    echo -e "${BLUE}========================================${NC}"
    
    # 执行主要流程
    check_dependencies
    list_backups
    select_backup
    select_restore_mode
    
    # 根据模式处理
    case $RESTORE_MODE in
        "tables")
            input_tables_to_restore
            ;;
        "newdb")
            input_new_database_name
            ;;
    esac
    
    # 确认并执行恢复
    confirm_restore
    perform_restore
    verify_restore
    
    echo -e "${BLUE}========================================${NC}"
    log_info "恢复流程完成!"
    echo -e "${BLUE}========================================${NC}"
}

# 运行主函数
main "$@"
相关推荐
2301_807583234 天前
ubuntu22.04集群部署clickhouse详细步骤
linux·clickhouse·zookeeper
Azure++6 天前
Centos安装clickhouse
linux·clickhouse·centos
阳爱铭9 天前
ClickHouse 中至关重要的两类复制表引擎——ReplicatedMergeTree和 ReplicatedReplacingMergeTree
大数据·hive·hadoop·sql·clickhouse·spark·hbase
liao__ran14 天前
ClickHouse CPU 排查快速参考指南
运维·服务器·clickhouse
XueminXu14 天前
ClickHouse查看数据库、表、列等元数据信息
clickhouse·system·元数据·databases·system.tables·system.columns·system.settings
liao__ran14 天前
ClickHouse CPU 排查详细指南
运维·clickhouse
现在,此刻15 天前
高可用与高性能数据库配置实践分析(pgSql && clickhouse)
数据库·clickhouse
yourkin66616 天前
clickhouse
clickhouse
努力成为一个程序猿.16 天前
Clickhouse数据副本和分片
运维·clickhouse·debian