解读广告数仓(四) - 指标计算与应用实现

目录

  1. 实时指标计算
  2. Doris应用表设计
  3. BI看板实现
  4. 决策API开发
  5. 自动化优化

实时指标计算

1.1 Flink实时指标计算

sql 复制代码
-- Flink SQL作业: 实时广告指标计算

CREATE TEMPORARY TABLE source_impression (
    impr_id STRING,
    timestamp BIGINT,
    advertiser_id INT,
    campaign_id INT,
    adgroup_id INT,
    creative_id INT,
    platform_id INT,
    region_id INT,
    cost DECIMAL(10, 4),
    proctime AS PROCTIME()
) WITH (
    'connector' = 'kafka',
    'topic' = 'raw_impression',
    'properties.bootstrap.servers' = 'kafka:9092',
    'properties.group.id' = 'flink-impression',
    'format' = 'json'
);

CREATE TEMPORARY TABLE source_click (
    click_id STRING,
    impr_id STRING,
    timestamp BIGINT,
    advertiser_id INT,
    campaign_id INT,
    cost DECIMAL(10, 4),
    proctime AS PROCTIME()
) WITH (
    'connector' = 'kafka',
    'topic' = 'raw_click',
    'properties.bootstrap.servers' = 'kafka:9092',
    'format' = 'json'
);

-- 实时指标表(5分钟更新)
CREATE TEMPORARY TABLE real_time_metrics AS
SELECT
    DATE_TRUNC('minute', FROM_UNIXTIME(timestamp / 1000)) AS stat_time,
    advertiser_id,
    campaign_id,
    COUNT(DISTINCT impr_id) AS pv,
    COUNT(DISTINCT click_id) AS click_cnt,
    SUM(CASE WHEN click_id IS NOT NULL THEN 1 ELSE 0 END) / 
        COUNT(DISTINCT impr_id) AS ctr,
    SUM(cost) AS total_cost,
    SUM(CASE WHEN click_id IS NOT NULL THEN cost ELSE 0 END) / 
        COUNT(DISTINCT click_id) AS cpc
FROM source_impression i
LEFT JOIN source_click c ON i.impr_id = c.impr_id
GROUP BY 1, 2, 3;

-- 输出到Doris
CREATE TEMPORARY TABLE sink_rt_metrics (
    stat_time TIMESTAMP,
    advertiser_id INT,
    campaign_id INT,
    pv BIGINT,
    click_cnt BIGINT,
    ctr DECIMAL(5, 4),
    total_cost DECIMAL(10, 4),
    cpc DECIMAL(10, 4)
) WITH (
    'connector' = 'doris',
    'fenodes' = 'doris-fe:8030',
    'database.name' = 'adtech_dwh',
    'table.name' = 'rt_metrics',
    'sink.properties.format' = 'json'
);

INSERT INTO sink_rt_metrics
SELECT * FROM real_time_metrics;

1.2 Doris汇总指标计算

sql 复制代码
-- 每小时执行一次的汇总计算

-- 1. 小时级汇总表
INSERT INTO hourly_campaign_stats
SELECT
    DATE_TRUNC('hour', FROM_UNIXTIME(event_time / 1000)) AS hour_time,
    advertiser_id,
    campaign_id,
    adgroup_id,
    COUNT(DISTINCT impr_id) AS pv,
    COUNT(DISTINCT CASE WHEN click_id IS NOT NULL THEN impr_id END) AS click_cnt,
    ROUND(COUNT(DISTINCT CASE WHEN click_id IS NOT NULL THEN impr_id END) / 
        COUNT(DISTINCT impr_id), 4) AS ctr,
    SUM(cost) AS total_cost,
    COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN click_id END) AS conv_cnt,
    ROUND(COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN click_id END) / 
        COUNT(DISTINCT click_id), 4) AS cvr,
    SUM(CASE WHEN conversion_id IS NOT NULL THEN conversion_value ELSE 0 END) AS revenue,
    SUM(cost) / COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN conversion_id END) AS cpa
FROM fact_impression i
LEFT JOIN fact_click c ON i.impr_id = c.impr_id
LEFT JOIN fact_conversion v ON c.click_id = v.click_id
WHERE event_time >= UNIX_TIMESTAMP(DATE_SUB(NOW(), INTERVAL 2 HOUR)) * 1000
GROUP BY 1, 2, 3, 4;

-- 2. 日级汇总表
INSERT OVERWRITE daily_campaign_stats
SELECT
    DATE(FROM_UNIXTIME(event_time / 1000)) AS stat_date,
    advertiser_id,
    campaign_id,
    COUNT(DISTINCT impr_id) AS pv,
    COUNT(DISTINCT CASE WHEN click_id IS NOT NULL THEN impr_id END) AS click_cnt,
    ROUND(COUNT(DISTINCT CASE WHEN click_id IS NOT NULL THEN impr_id END) / 
        COUNT(DISTINCT impr_id), 4) AS ctr,
    SUM(cost) AS total_cost,
    COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN click_id END) AS conv_cnt,
    SUM(CASE WHEN conversion_id IS NOT NULL THEN conversion_value ELSE 0 END) AS revenue,
    SUM(cost) / COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN conversion_id END) AS cpa,
    (SUM(CASE WHEN conversion_id IS NOT NULL THEN conversion_value ELSE 0 END) - SUM(cost)) / SUM(cost) AS roi
FROM fact_impression i
LEFT JOIN fact_click c ON i.impr_id = c.impr_id
LEFT JOIN fact_conversion v ON c.click_id = v.click_id
WHERE DATE(FROM_UNIXTIME(event_time / 1000)) = DATE_SUB(CURDATE(), INTERVAL 1 DAY)
GROUP BY 1, 2, 3;

-- 3. 平台对标数据
INSERT INTO platform_comparison_stats
SELECT
    DATE(FROM_UNIXTIME(event_time / 1000)) AS stat_date,
    platform_id,
    advertiser_id,
    COUNT(DISTINCT impr_id) AS pv,
    SUM(cost) AS total_cost,
    COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN conversion_id END) AS conv_cnt,
    SUM(cost) / COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN conversion_id END) AS cpa,
    ROW_NUMBER() OVER (PARTITION BY DATE(FROM_UNIXTIME(event_time / 1000)), advertiser_id ORDER BY total_cost DESC) AS platform_rank
FROM fact_impression i
LEFT JOIN fact_click c ON i.impr_id = c.impr_id
LEFT JOIN fact_conversion v ON c.click_id = v.click_id
WHERE DATE(FROM_UNIXTIME(event_time / 1000)) = DATE_SUB(CURDATE(), INTERVAL 1 DAY)
GROUP BY 1, 2, 3;

Doris应用表设计

2.1 核心应用表

sql 复制代码
-- 实时看板数据表 (更新频率: 5分钟)
CREATE TABLE rt_dashboard_metrics (
    stat_time DATETIME,
    advertiser_id INT,
    campaign_id INT,
    pv BIGINT,
    click_cnt BIGINT,
    ctr DECIMAL(5, 4),
    cost DECIMAL(12, 2),
    roi DECIMAL(6, 2)
) ENGINE=DORIS
DUPLICATE KEY (stat_time, advertiser_id, campaign_id)
PARTITION BY RANGE (stat_time) (
    PARTITION p_latest VALUES LESS THAN ("2025-12-08")
) DISTRIBUTED BY HASH (advertiser_id) BUCKETS 64;

-- 采购商日报表 (更新频率: 每天)
CREATE TABLE daily_advertiser_report (
    report_date DATE,
    advertiser_id INT,
    advertiser_name STRING,
    total_cost DECIMAL(12, 2),
    total_revenue DECIMAL(12, 2),
    roi DECIMAL(6, 2),
    cpa DECIMAL(8, 2),
    top_campaign STRING,
    budget_used_pct DECIMAL(5, 2),
    forecast_exceed_budget INT  -- 0:不超, 1:可能超
) ENGINE=DORIS
UNIQUE KEY (report_date, advertiser_id)
DISTRIBUTED BY HASH (advertiser_id) BUCKETS 64;

-- 渠道对标表 (更新频率: 每小时)
CREATE TABLE hourly_channel_compare (
    compare_hour DATETIME,
    advertiser_id INT,
    baidu_cpa DECIMAL(8, 2),
    douyin_cpa DECIMAL(8, 2),
    kuaishou_cpa DECIMAL(8, 2),
    xiaohongshu_cpa DECIMAL(8, 2),
    best_channel STRING,
    worst_channel STRING,
    avg_cpa DECIMAL(8, 2),
    cpa_variance DECIMAL(8, 2)  -- 标准差
) ENGINE=DORIS
DUPLICATE KEY (compare_hour, advertiser_id)
PARTITION BY RANGE (compare_hour)
DISTRIBUTED BY HASH (advertiser_id) BUCKETS 32;

-- 用户转化漏斗表
CREATE TABLE user_conversion_funnel (
    funnel_date DATE,
    advertiser_id INT,
    campaign_id INT,
    impression_count BIGINT,
    click_count BIGINT,
    landing_count BIGINT,
    add_to_cart_count BIGINT,
    purchase_count BIGINT,
    imp_to_click_rate DECIMAL(5, 4),
    click_to_landing_rate DECIMAL(5, 4),
    landing_to_purchase_rate DECIMAL(5, 4),
    overall_conversion_rate DECIMAL(5, 4)
) ENGINE=DORIS
UNIQUE KEY (funnel_date, advertiser_id, campaign_id)
DISTRIBUTED BY HASH (advertiser_id) BUCKETS 64;

BI看板实现

3.1 实时看板结构

ini 复制代码
┌─────────────────────────────────────────────────────┐
│            广告数仓实时监控看板                      │
├─────────────────────────────────────────────────────┤
│                                                     │
│ [实时数据概览卡]                                     │
│ ├─ 当日投放成本: ¥1,234,567 (↑15% vs 昨日)        │
│ ├─ 当日点击数: 12,345,678 (↑8%)                     │
│ ├─ 当日转化数: 123,456 (↑12%)                       │
│ └─ 实时ROI: 4.2x (目标: 4.0x)  ✓                   │
│                                                     │
│ [时间序列图表]                                      │
│ ├─ 小时成本趋势 (折线图)                            │
│ │  └─ 显示成本在一天内的波动                        │
│ ├─ 小时点击数 (柱状图)                              │
│ │  └─ 显示不同时段的流量差异                        │
│ └─ 实时ROI (仪表板)                                 │
│    └─ 实时显示当日平均ROI值                         │
│                                                     │
│ [渠道对比分析]                                      │
│ ├─ 各平台成本分布 (饼图)                            │
│ │  └─ 百度(25%) | 抖音(35%) | 快手(20%) | 其他(20%)│
│ ├─ 各平台CPA对比 (条形图)                           │
│ │  └─ 左排序: 最好的CPA在最左                       │
│ └─ 各平台转化率对比                                 │
│    └─ 抖音>快手>百度                               │
│                                                     │
│ [采购商排行榜]                                      │
│ ├─ 消费金额Top 10                                   │
│ ├─ ROI Top 10                                       │
│ ├─ CPA最低Top 10                                    │
│ └─ 转化数Top 10                                     │
│                                                     │
│ [告警与异常检测]                                    │
│ ├─ 红色告警区域:                                    │
│ │  ├─ 某采购商成本超额                              │
│ │  ├─ 某渠道CPA突增>30%                             │
│ │  └─ 数据延迟>15分钟                               │
│ └─ 黄色预警区域:                                    │
│    ├─ 某采购商成本接近限额                          │
│    └─ 某渠道流量异常波动                            │
│                                                     │
│ [查询过滤器]                                        │
│ ├─ 日期范围选择                                     │
│ ├─ 采购商过滤 (单选/多选)                           │
│ ├─ 渠道过滤                                         │
│ ├─ 活动过滤                                         │
│ └─ 自定义指标组合                                   │
│                                                     │
└─────────────────────────────────────────────────────┘

3.2 BI工具配置 (Metabase示例)

json 复制代码
{
  "dashboards": [
    {
      "name": "广告实时监控看板",
      "description": "实时广告投放效果监控",
      "refresh_rate": "300s",  // 5分钟刷新
      "cards": [
        {
          "name": "当日投放成本",
          "type": "scalar",
          "query": "SELECT SUM(cost) FROM daily_advertiser_report WHERE report_date = CURDATE()",
          "compare_with": "SELECT SUM(cost) FROM daily_advertiser_report WHERE report_date = DATE_SUB(CURDATE(), INTERVAL 1 DAY)",
          "threshold": {
            "good": 0,
            "bad": 10000000
          }
        },
        {
          "name": "小时成本趋势",
          "type": "line_chart",
          "query": "SELECT hour_time, SUM(total_cost) FROM hourly_campaign_stats WHERE stat_date = CURDATE() GROUP BY hour_time ORDER BY hour_time",
          "x_axis": "hour_time",
          "y_axis": "total_cost"
        },
        {
          "name": "渠道CPA对比",
          "type": "bar_chart",
          "query": "SELECT best_channel, baidu_cpa, douyin_cpa, kuaishou_cpa FROM hourly_channel_compare WHERE compare_hour >= DATE_SUB(NOW(), INTERVAL 24 HOUR)",
          "x_axis": "channel",
          "y_axis": "cpa"
        },
        {
          "name": "转化漏斗",
          "type": "funnel_chart",
          "query": "SELECT impression_count, click_count, landing_count, add_to_cart_count, purchase_count FROM user_conversion_funnel WHERE funnel_date = CURDATE()",
          "funnel_stages": ["展示", "点击", "访问", "加购", "支付"]
        }
      ]
    }
  ]
}

决策API开发

4.1 核心API设计

python 复制代码
# api_adtech.py - FastAPI应用

from fastapi import FastAPI, Query
from typing import List, Optional
from datetime import datetime, timedelta
import doris_client

app = FastAPI(title="广告数仓决策API")
doris = doris_client.DorisClient(host="doris-fe", port=8030)

# API 1: 实时成本监控
@app.get("/v1/advertiser/{advertiser_id}/current_cost")
async def get_current_cost(
    advertiser_id: int,
    time_range: str = Query("today", enum=["today", "week", "month"])
):
    """
    获取采购商当前消费
    
    Args:
        advertiser_id: 采购商ID
        time_range: 时间范围 (today/week/month)
    
    Returns:
        {
          "advertiser_id": 1001,
          "advertiser_name": "品牌A",
          "current_cost": 125000.50,
          "budget_limit": 200000,
          "budget_used_pct": 62.5,
          "daily_avg_cost": 15625.06,
          "forecast_total_cost": 187500,
          "status": "normal"  // or "warning" or "critical"
        }
    """
    query = f"""
    SELECT 
        advertiser_id,
        SUM(total_cost) as current_cost,
        MAX(budget) as budget_limit
    FROM daily_advertiser_report
    WHERE advertiser_id = {advertiser_id}
    AND report_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
    """
    result = doris.query(query)
    # 返回实时成本数据

# API 2: 渠道对比建议
@app.get("/v1/advertiser/{advertiser_id}/channel_optimization")
async def get_channel_optimization(
    advertiser_id: int,
    optimization_goal: str = Query("roi", enum=["roi", "cpa", "volume"])
):
    """
    获取渠道优化建议
    
    Args:
        advertiser_id: 采购商ID
        optimization_goal: 优化目标 (ROI/CPA/成交量)
    
    Returns:
        {
          "recommend_actions": [
            {
              "channel": "douyin",
              "action": "增加投放",
              "expected_improvement": "预计CPA降低15%",
              "current_cpa": 45.2,
              "benchmark_cpa": 38.5
            },
            {
              "channel": "baidu",
              "action": "减少投放",
              "reason": "CPA较高, 转化率低于平均"
            }
          ],
          "total_potential_roi_increase": "25%",
          "confidence_score": 0.92
        }
    """
    query = f"""
    SELECT 
        platform_id,
        AVG(cpa) as avg_cpa,
        AVG(roi) as avg_roi,
        COUNT(*) as data_points
    FROM hourly_channel_compare
    WHERE advertiser_id = {advertiser_id}
    AND compare_hour >= DATE_SUB(NOW(), INTERVAL 7 DAY)
    GROUP BY platform_id
    ORDER BY avg_roi DESC
    """
    # 返回优化建议

# API 3: 预算预警
@app.get("/v1/advertiser/{advertiser_id}/budget_forecast")
async def get_budget_forecast(advertiser_id: int):
    """
    预测本月是否会超预算
    
    Returns:
        {
          "forecast_status": "warning",  // normal/warning/critical
          "monthly_budget": 1000000,
          "current_spent": 625000,
          "projected_end_month": 950000,
          "days_remaining": 8,
          "daily_burn_rate": 75000,
          "recommended_action": "保持当前投放"
        }
    """
    pass

# API 4: A/B测试数据
@app.get("/v1/advertiser/{advertiser_id}/ab_test_results/{test_id}")
async def get_ab_test_results(advertiser_id: int, test_id: str):
    """
    获取A/B测试的实时结果
    
    Returns:
        {
          "test_id": "test_creative_v1",
          "control_group": {
            "sample_size": 50000,
            "conversion_rate": 0.0245,
            "cpa": 48.5
          },
          "test_group": {
            "sample_size": 50000,
            "conversion_rate": 0.0312,
            "cpa": 38.2
          },
          "statistical_significance": 0.95,  // 95% 置信度
          "recommendation": "test_group 胜出, 建议全量投放"
        }
    """
    pass

# API 5: 自动竞价建议
@app.post("/v1/advertiser/{advertiser_id}/auto_bidding_suggestion")
async def get_auto_bidding_suggestion(
    advertiser_id: int,
    target_cpa: Optional[float] = None,
    target_roi: Optional[float] = None
):
    """
    获取自动竞价建议
    
    Args:
        target_cpa: 目标CPA (单位: 元)
        target_roi: 目标ROI倍数
    
    Returns:
        {
          "suggested_bid": 12.5,
          "current_bid": 10.0,
          "adjustment": "+25%",
          "expected_daily_volume": 15000,
          "expected_daily_cost": 187500,
          "expected_cpa": 42.3,
          "execution_recommendation": "立即执行" // or "谨慎调整" or "保持不变"
        }
    """
    pass

自动化优化

5.1 自动化规则引擎

sql 复制代码
-- 自动竞价调整规则

-- 规则1: CPA超过目标值, 自动降低竞价
-- 执行频率: 每小时
SELECT 
    advertiser_id,
    campaign_id,
    CASE 
        WHEN current_cpa > target_cpa * 1.2 THEN ROUND(current_bid * 0.85, 2)  -- 降低15%
        WHEN current_cpa > target_cpa * 1.1 THEN ROUND(current_bid * 0.92, 2)  -- 降低8%
        WHEN current_cpa < target_cpa * 0.8 THEN ROUND(current_bid * 1.15, 2)  -- 提升15%
        ELSE current_bid
    END AS recommended_bid,
    CASE 
        WHEN current_cpa > target_cpa * 1.2 THEN 'reduce'
        WHEN current_cpa < target_cpa * 0.8 THEN 'increase'
        ELSE 'hold'
    END AS action
FROM campaign_daily_stats
WHERE stat_date = CURDATE()
AND current_cpa IS NOT NULL;

-- 规则2: 自动暂停表现差的创意
-- 执行频率: 每天
SELECT 
    advertiser_id,
    campaign_id,
    creative_id,
    conversion_rate,
    CASE 
        WHEN conversion_rate < 0.001 AND impression_count > 100000 THEN 'pause'
        WHEN conversion_rate < 0.002 AND impression_count > 50000 THEN 'reduce_budget'
        ELSE 'keep'
    END AS action
FROM creative_performance
WHERE stat_date = CURDATE()
ORDER BY conversion_rate ASC;

-- 规则3: 自动扩展高表现创意预算
-- 执行频率: 每天
SELECT 
    advertiser_id,
    campaign_id,
    creative_id,
    current_daily_budget,
    CASE 
        WHEN roi > 5 AND conversion_rate > 0.005 THEN ROUND(current_daily_budget * 1.3, 0)
        WHEN roi > 4 AND conversion_rate > 0.003 THEN ROUND(current_daily_budget * 1.15, 0)
        ELSE current_daily_budget
    END AS recommended_budget
FROM creative_performance
WHERE stat_date = CURDATE()
AND roi > 3
ORDER BY roi DESC
LIMIT 100;

总结

该层实现了:

  • 实时指标: 5分钟级指标更新
  • 可视化: 完整的BI看板体系
  • API服务: 5个核心决策API
  • 自动化: 规则驱动的自动优化
相关推荐
自不量力的A同学31 分钟前
Redisson 4.2.0 发布,官方推荐的 Redis 客户端
数据库·redis·缓存
Exquisite.33 分钟前
Mysql
数据库·mysql
全栈前端老曹1 小时前
【MongoDB】深入研究副本集与高可用性——Replica Set 架构、故障转移、读写分离
前端·javascript·数据库·mongodb·架构·nosql·副本集
R1nG8631 小时前
CANN资源泄漏检测工具源码深度解读 实战设备内存泄漏排查
数据库·算法·cann
阿钱真强道1 小时前
12 JetLinks MQTT直连设备事件上报实战(继电器场景)
linux·服务器·网络·数据库·网络协议
逍遥德2 小时前
Sring事务详解之02.如何使用编程式事务?
java·服务器·数据库·后端·sql·spring
笨蛋不要掉眼泪2 小时前
Redis哨兵机制全解析:原理、配置与实战故障转移演示
java·数据库·redis·缓存·bootstrap
Coder_Boy_2 小时前
基于SpringAI的在线考试系统-整体架构优化设计方案
java·数据库·人工智能·spring boot·架构·ddd
fen_fen10 小时前
Oracle建表语句示例
数据库·oracle
砚边数影12 小时前
数据可视化入门:Matplotlib 基础语法与折线图绘制
数据库·信息可视化·matplotlib·数据可视化·kingbase·数据库平替用金仓·金仓数据库