解读广告数仓(四) - 指标计算与应用实现

目录

  1. 实时指标计算
  2. Doris应用表设计
  3. BI看板实现
  4. 决策API开发
  5. 自动化优化

实时指标计算

1.1 Flink实时指标计算

sql 复制代码
-- Flink SQL作业: 实时广告指标计算

CREATE TEMPORARY TABLE source_impression (
    impr_id STRING,
    timestamp BIGINT,
    advertiser_id INT,
    campaign_id INT,
    adgroup_id INT,
    creative_id INT,
    platform_id INT,
    region_id INT,
    cost DECIMAL(10, 4),
    proctime AS PROCTIME()
) WITH (
    'connector' = 'kafka',
    'topic' = 'raw_impression',
    'properties.bootstrap.servers' = 'kafka:9092',
    'properties.group.id' = 'flink-impression',
    'format' = 'json'
);

CREATE TEMPORARY TABLE source_click (
    click_id STRING,
    impr_id STRING,
    timestamp BIGINT,
    advertiser_id INT,
    campaign_id INT,
    cost DECIMAL(10, 4),
    proctime AS PROCTIME()
) WITH (
    'connector' = 'kafka',
    'topic' = 'raw_click',
    'properties.bootstrap.servers' = 'kafka:9092',
    'format' = 'json'
);

-- 实时指标表(5分钟更新)
CREATE TEMPORARY TABLE real_time_metrics AS
SELECT
    DATE_TRUNC('minute', FROM_UNIXTIME(timestamp / 1000)) AS stat_time,
    advertiser_id,
    campaign_id,
    COUNT(DISTINCT impr_id) AS pv,
    COUNT(DISTINCT click_id) AS click_cnt,
    SUM(CASE WHEN click_id IS NOT NULL THEN 1 ELSE 0 END) / 
        COUNT(DISTINCT impr_id) AS ctr,
    SUM(cost) AS total_cost,
    SUM(CASE WHEN click_id IS NOT NULL THEN cost ELSE 0 END) / 
        COUNT(DISTINCT click_id) AS cpc
FROM source_impression i
LEFT JOIN source_click c ON i.impr_id = c.impr_id
GROUP BY 1, 2, 3;

-- 输出到Doris
CREATE TEMPORARY TABLE sink_rt_metrics (
    stat_time TIMESTAMP,
    advertiser_id INT,
    campaign_id INT,
    pv BIGINT,
    click_cnt BIGINT,
    ctr DECIMAL(5, 4),
    total_cost DECIMAL(10, 4),
    cpc DECIMAL(10, 4)
) WITH (
    'connector' = 'doris',
    'fenodes' = 'doris-fe:8030',
    'database.name' = 'adtech_dwh',
    'table.name' = 'rt_metrics',
    'sink.properties.format' = 'json'
);

INSERT INTO sink_rt_metrics
SELECT * FROM real_time_metrics;

1.2 Doris汇总指标计算

sql 复制代码
-- 每小时执行一次的汇总计算

-- 1. 小时级汇总表
INSERT INTO hourly_campaign_stats
SELECT
    DATE_TRUNC('hour', FROM_UNIXTIME(event_time / 1000)) AS hour_time,
    advertiser_id,
    campaign_id,
    adgroup_id,
    COUNT(DISTINCT impr_id) AS pv,
    COUNT(DISTINCT CASE WHEN click_id IS NOT NULL THEN impr_id END) AS click_cnt,
    ROUND(COUNT(DISTINCT CASE WHEN click_id IS NOT NULL THEN impr_id END) / 
        COUNT(DISTINCT impr_id), 4) AS ctr,
    SUM(cost) AS total_cost,
    COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN click_id END) AS conv_cnt,
    ROUND(COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN click_id END) / 
        COUNT(DISTINCT click_id), 4) AS cvr,
    SUM(CASE WHEN conversion_id IS NOT NULL THEN conversion_value ELSE 0 END) AS revenue,
    SUM(cost) / COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN conversion_id END) AS cpa
FROM fact_impression i
LEFT JOIN fact_click c ON i.impr_id = c.impr_id
LEFT JOIN fact_conversion v ON c.click_id = v.click_id
WHERE event_time >= UNIX_TIMESTAMP(DATE_SUB(NOW(), INTERVAL 2 HOUR)) * 1000
GROUP BY 1, 2, 3, 4;

-- 2. 日级汇总表
INSERT OVERWRITE daily_campaign_stats
SELECT
    DATE(FROM_UNIXTIME(event_time / 1000)) AS stat_date,
    advertiser_id,
    campaign_id,
    COUNT(DISTINCT impr_id) AS pv,
    COUNT(DISTINCT CASE WHEN click_id IS NOT NULL THEN impr_id END) AS click_cnt,
    ROUND(COUNT(DISTINCT CASE WHEN click_id IS NOT NULL THEN impr_id END) / 
        COUNT(DISTINCT impr_id), 4) AS ctr,
    SUM(cost) AS total_cost,
    COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN click_id END) AS conv_cnt,
    SUM(CASE WHEN conversion_id IS NOT NULL THEN conversion_value ELSE 0 END) AS revenue,
    SUM(cost) / COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN conversion_id END) AS cpa,
    (SUM(CASE WHEN conversion_id IS NOT NULL THEN conversion_value ELSE 0 END) - SUM(cost)) / SUM(cost) AS roi
FROM fact_impression i
LEFT JOIN fact_click c ON i.impr_id = c.impr_id
LEFT JOIN fact_conversion v ON c.click_id = v.click_id
WHERE DATE(FROM_UNIXTIME(event_time / 1000)) = DATE_SUB(CURDATE(), INTERVAL 1 DAY)
GROUP BY 1, 2, 3;

-- 3. 平台对标数据
INSERT INTO platform_comparison_stats
SELECT
    DATE(FROM_UNIXTIME(event_time / 1000)) AS stat_date,
    platform_id,
    advertiser_id,
    COUNT(DISTINCT impr_id) AS pv,
    SUM(cost) AS total_cost,
    COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN conversion_id END) AS conv_cnt,
    SUM(cost) / COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN conversion_id END) AS cpa,
    ROW_NUMBER() OVER (PARTITION BY DATE(FROM_UNIXTIME(event_time / 1000)), advertiser_id ORDER BY total_cost DESC) AS platform_rank
FROM fact_impression i
LEFT JOIN fact_click c ON i.impr_id = c.impr_id
LEFT JOIN fact_conversion v ON c.click_id = v.click_id
WHERE DATE(FROM_UNIXTIME(event_time / 1000)) = DATE_SUB(CURDATE(), INTERVAL 1 DAY)
GROUP BY 1, 2, 3;

Doris应用表设计

2.1 核心应用表

sql 复制代码
-- 实时看板数据表 (更新频率: 5分钟)
CREATE TABLE rt_dashboard_metrics (
    stat_time DATETIME,
    advertiser_id INT,
    campaign_id INT,
    pv BIGINT,
    click_cnt BIGINT,
    ctr DECIMAL(5, 4),
    cost DECIMAL(12, 2),
    roi DECIMAL(6, 2)
) ENGINE=DORIS
DUPLICATE KEY (stat_time, advertiser_id, campaign_id)
PARTITION BY RANGE (stat_time) (
    PARTITION p_latest VALUES LESS THAN ("2025-12-08")
) DISTRIBUTED BY HASH (advertiser_id) BUCKETS 64;

-- 采购商日报表 (更新频率: 每天)
CREATE TABLE daily_advertiser_report (
    report_date DATE,
    advertiser_id INT,
    advertiser_name STRING,
    total_cost DECIMAL(12, 2),
    total_revenue DECIMAL(12, 2),
    roi DECIMAL(6, 2),
    cpa DECIMAL(8, 2),
    top_campaign STRING,
    budget_used_pct DECIMAL(5, 2),
    forecast_exceed_budget INT  -- 0:不超, 1:可能超
) ENGINE=DORIS
UNIQUE KEY (report_date, advertiser_id)
DISTRIBUTED BY HASH (advertiser_id) BUCKETS 64;

-- 渠道对标表 (更新频率: 每小时)
CREATE TABLE hourly_channel_compare (
    compare_hour DATETIME,
    advertiser_id INT,
    baidu_cpa DECIMAL(8, 2),
    douyin_cpa DECIMAL(8, 2),
    kuaishou_cpa DECIMAL(8, 2),
    xiaohongshu_cpa DECIMAL(8, 2),
    best_channel STRING,
    worst_channel STRING,
    avg_cpa DECIMAL(8, 2),
    cpa_variance DECIMAL(8, 2)  -- 标准差
) ENGINE=DORIS
DUPLICATE KEY (compare_hour, advertiser_id)
PARTITION BY RANGE (compare_hour)
DISTRIBUTED BY HASH (advertiser_id) BUCKETS 32;

-- 用户转化漏斗表
CREATE TABLE user_conversion_funnel (
    funnel_date DATE,
    advertiser_id INT,
    campaign_id INT,
    impression_count BIGINT,
    click_count BIGINT,
    landing_count BIGINT,
    add_to_cart_count BIGINT,
    purchase_count BIGINT,
    imp_to_click_rate DECIMAL(5, 4),
    click_to_landing_rate DECIMAL(5, 4),
    landing_to_purchase_rate DECIMAL(5, 4),
    overall_conversion_rate DECIMAL(5, 4)
) ENGINE=DORIS
UNIQUE KEY (funnel_date, advertiser_id, campaign_id)
DISTRIBUTED BY HASH (advertiser_id) BUCKETS 64;

BI看板实现

3.1 实时看板结构

ini 复制代码
┌─────────────────────────────────────────────────────┐
│            广告数仓实时监控看板                      │
├─────────────────────────────────────────────────────┤
│                                                     │
│ [实时数据概览卡]                                     │
│ ├─ 当日投放成本: ¥1,234,567 (↑15% vs 昨日)        │
│ ├─ 当日点击数: 12,345,678 (↑8%)                     │
│ ├─ 当日转化数: 123,456 (↑12%)                       │
│ └─ 实时ROI: 4.2x (目标: 4.0x)  ✓                   │
│                                                     │
│ [时间序列图表]                                      │
│ ├─ 小时成本趋势 (折线图)                            │
│ │  └─ 显示成本在一天内的波动                        │
│ ├─ 小时点击数 (柱状图)                              │
│ │  └─ 显示不同时段的流量差异                        │
│ └─ 实时ROI (仪表板)                                 │
│    └─ 实时显示当日平均ROI值                         │
│                                                     │
│ [渠道对比分析]                                      │
│ ├─ 各平台成本分布 (饼图)                            │
│ │  └─ 百度(25%) | 抖音(35%) | 快手(20%) | 其他(20%)│
│ ├─ 各平台CPA对比 (条形图)                           │
│ │  └─ 左排序: 最好的CPA在最左                       │
│ └─ 各平台转化率对比                                 │
│    └─ 抖音>快手>百度                               │
│                                                     │
│ [采购商排行榜]                                      │
│ ├─ 消费金额Top 10                                   │
│ ├─ ROI Top 10                                       │
│ ├─ CPA最低Top 10                                    │
│ └─ 转化数Top 10                                     │
│                                                     │
│ [告警与异常检测]                                    │
│ ├─ 红色告警区域:                                    │
│ │  ├─ 某采购商成本超额                              │
│ │  ├─ 某渠道CPA突增>30%                             │
│ │  └─ 数据延迟>15分钟                               │
│ └─ 黄色预警区域:                                    │
│    ├─ 某采购商成本接近限额                          │
│    └─ 某渠道流量异常波动                            │
│                                                     │
│ [查询过滤器]                                        │
│ ├─ 日期范围选择                                     │
│ ├─ 采购商过滤 (单选/多选)                           │
│ ├─ 渠道过滤                                         │
│ ├─ 活动过滤                                         │
│ └─ 自定义指标组合                                   │
│                                                     │
└─────────────────────────────────────────────────────┘

3.2 BI工具配置 (Metabase示例)

json 复制代码
{
  "dashboards": [
    {
      "name": "广告实时监控看板",
      "description": "实时广告投放效果监控",
      "refresh_rate": "300s",  // 5分钟刷新
      "cards": [
        {
          "name": "当日投放成本",
          "type": "scalar",
          "query": "SELECT SUM(cost) FROM daily_advertiser_report WHERE report_date = CURDATE()",
          "compare_with": "SELECT SUM(cost) FROM daily_advertiser_report WHERE report_date = DATE_SUB(CURDATE(), INTERVAL 1 DAY)",
          "threshold": {
            "good": 0,
            "bad": 10000000
          }
        },
        {
          "name": "小时成本趋势",
          "type": "line_chart",
          "query": "SELECT hour_time, SUM(total_cost) FROM hourly_campaign_stats WHERE stat_date = CURDATE() GROUP BY hour_time ORDER BY hour_time",
          "x_axis": "hour_time",
          "y_axis": "total_cost"
        },
        {
          "name": "渠道CPA对比",
          "type": "bar_chart",
          "query": "SELECT best_channel, baidu_cpa, douyin_cpa, kuaishou_cpa FROM hourly_channel_compare WHERE compare_hour >= DATE_SUB(NOW(), INTERVAL 24 HOUR)",
          "x_axis": "channel",
          "y_axis": "cpa"
        },
        {
          "name": "转化漏斗",
          "type": "funnel_chart",
          "query": "SELECT impression_count, click_count, landing_count, add_to_cart_count, purchase_count FROM user_conversion_funnel WHERE funnel_date = CURDATE()",
          "funnel_stages": ["展示", "点击", "访问", "加购", "支付"]
        }
      ]
    }
  ]
}

决策API开发

4.1 核心API设计

python 复制代码
# api_adtech.py - FastAPI应用

from fastapi import FastAPI, Query
from typing import List, Optional
from datetime import datetime, timedelta
import doris_client

app = FastAPI(title="广告数仓决策API")
doris = doris_client.DorisClient(host="doris-fe", port=8030)

# API 1: 实时成本监控
@app.get("/v1/advertiser/{advertiser_id}/current_cost")
async def get_current_cost(
    advertiser_id: int,
    time_range: str = Query("today", enum=["today", "week", "month"])
):
    """
    获取采购商当前消费
    
    Args:
        advertiser_id: 采购商ID
        time_range: 时间范围 (today/week/month)
    
    Returns:
        {
          "advertiser_id": 1001,
          "advertiser_name": "品牌A",
          "current_cost": 125000.50,
          "budget_limit": 200000,
          "budget_used_pct": 62.5,
          "daily_avg_cost": 15625.06,
          "forecast_total_cost": 187500,
          "status": "normal"  // or "warning" or "critical"
        }
    """
    query = f"""
    SELECT 
        advertiser_id,
        SUM(total_cost) as current_cost,
        MAX(budget) as budget_limit
    FROM daily_advertiser_report
    WHERE advertiser_id = {advertiser_id}
    AND report_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
    """
    result = doris.query(query)
    # 返回实时成本数据

# API 2: 渠道对比建议
@app.get("/v1/advertiser/{advertiser_id}/channel_optimization")
async def get_channel_optimization(
    advertiser_id: int,
    optimization_goal: str = Query("roi", enum=["roi", "cpa", "volume"])
):
    """
    获取渠道优化建议
    
    Args:
        advertiser_id: 采购商ID
        optimization_goal: 优化目标 (ROI/CPA/成交量)
    
    Returns:
        {
          "recommend_actions": [
            {
              "channel": "douyin",
              "action": "增加投放",
              "expected_improvement": "预计CPA降低15%",
              "current_cpa": 45.2,
              "benchmark_cpa": 38.5
            },
            {
              "channel": "baidu",
              "action": "减少投放",
              "reason": "CPA较高, 转化率低于平均"
            }
          ],
          "total_potential_roi_increase": "25%",
          "confidence_score": 0.92
        }
    """
    query = f"""
    SELECT 
        platform_id,
        AVG(cpa) as avg_cpa,
        AVG(roi) as avg_roi,
        COUNT(*) as data_points
    FROM hourly_channel_compare
    WHERE advertiser_id = {advertiser_id}
    AND compare_hour >= DATE_SUB(NOW(), INTERVAL 7 DAY)
    GROUP BY platform_id
    ORDER BY avg_roi DESC
    """
    # 返回优化建议

# API 3: 预算预警
@app.get("/v1/advertiser/{advertiser_id}/budget_forecast")
async def get_budget_forecast(advertiser_id: int):
    """
    预测本月是否会超预算
    
    Returns:
        {
          "forecast_status": "warning",  // normal/warning/critical
          "monthly_budget": 1000000,
          "current_spent": 625000,
          "projected_end_month": 950000,
          "days_remaining": 8,
          "daily_burn_rate": 75000,
          "recommended_action": "保持当前投放"
        }
    """
    pass

# API 4: A/B测试数据
@app.get("/v1/advertiser/{advertiser_id}/ab_test_results/{test_id}")
async def get_ab_test_results(advertiser_id: int, test_id: str):
    """
    获取A/B测试的实时结果
    
    Returns:
        {
          "test_id": "test_creative_v1",
          "control_group": {
            "sample_size": 50000,
            "conversion_rate": 0.0245,
            "cpa": 48.5
          },
          "test_group": {
            "sample_size": 50000,
            "conversion_rate": 0.0312,
            "cpa": 38.2
          },
          "statistical_significance": 0.95,  // 95% 置信度
          "recommendation": "test_group 胜出, 建议全量投放"
        }
    """
    pass

# API 5: 自动竞价建议
@app.post("/v1/advertiser/{advertiser_id}/auto_bidding_suggestion")
async def get_auto_bidding_suggestion(
    advertiser_id: int,
    target_cpa: Optional[float] = None,
    target_roi: Optional[float] = None
):
    """
    获取自动竞价建议
    
    Args:
        target_cpa: 目标CPA (单位: 元)
        target_roi: 目标ROI倍数
    
    Returns:
        {
          "suggested_bid": 12.5,
          "current_bid": 10.0,
          "adjustment": "+25%",
          "expected_daily_volume": 15000,
          "expected_daily_cost": 187500,
          "expected_cpa": 42.3,
          "execution_recommendation": "立即执行" // or "谨慎调整" or "保持不变"
        }
    """
    pass

自动化优化

5.1 自动化规则引擎

sql 复制代码
-- 自动竞价调整规则

-- 规则1: CPA超过目标值, 自动降低竞价
-- 执行频率: 每小时
SELECT 
    advertiser_id,
    campaign_id,
    CASE 
        WHEN current_cpa > target_cpa * 1.2 THEN ROUND(current_bid * 0.85, 2)  -- 降低15%
        WHEN current_cpa > target_cpa * 1.1 THEN ROUND(current_bid * 0.92, 2)  -- 降低8%
        WHEN current_cpa < target_cpa * 0.8 THEN ROUND(current_bid * 1.15, 2)  -- 提升15%
        ELSE current_bid
    END AS recommended_bid,
    CASE 
        WHEN current_cpa > target_cpa * 1.2 THEN 'reduce'
        WHEN current_cpa < target_cpa * 0.8 THEN 'increase'
        ELSE 'hold'
    END AS action
FROM campaign_daily_stats
WHERE stat_date = CURDATE()
AND current_cpa IS NOT NULL;

-- 规则2: 自动暂停表现差的创意
-- 执行频率: 每天
SELECT 
    advertiser_id,
    campaign_id,
    creative_id,
    conversion_rate,
    CASE 
        WHEN conversion_rate < 0.001 AND impression_count > 100000 THEN 'pause'
        WHEN conversion_rate < 0.002 AND impression_count > 50000 THEN 'reduce_budget'
        ELSE 'keep'
    END AS action
FROM creative_performance
WHERE stat_date = CURDATE()
ORDER BY conversion_rate ASC;

-- 规则3: 自动扩展高表现创意预算
-- 执行频率: 每天
SELECT 
    advertiser_id,
    campaign_id,
    creative_id,
    current_daily_budget,
    CASE 
        WHEN roi > 5 AND conversion_rate > 0.005 THEN ROUND(current_daily_budget * 1.3, 0)
        WHEN roi > 4 AND conversion_rate > 0.003 THEN ROUND(current_daily_budget * 1.15, 0)
        ELSE current_daily_budget
    END AS recommended_budget
FROM creative_performance
WHERE stat_date = CURDATE()
AND roi > 3
ORDER BY roi DESC
LIMIT 100;

总结

该层实现了:

  • 实时指标: 5分钟级指标更新
  • 可视化: 完整的BI看板体系
  • API服务: 5个核心决策API
  • 自动化: 规则驱动的自动优化
相关推荐
语落心生3 小时前
解读广告数仓(一) - 广告业务模型与指标体系深化分析
数据库
老华带你飞3 小时前
旅游|基于Java旅游信息推荐系统(源码+数据库+文档)
java·开发语言·数据库·vue.js·spring boot·后端·旅游
冉冰学姐3 小时前
SSM石家庄铁道大学影视资料管理系统ql5pa(程序+源码+数据库+调试部署+开发环境)带论文文档1万字以上,文末可获取,系统界面在最后面
数据库·ssm框架·石家庄铁道大学
Sunhen_Qiletian3 小时前
《Python开发之语言基础》第七集:库--时间库
前端·数据库·python
程序边界4 小时前
金仓数据库助力Oracle迁移实战:破局四大挑战,解锁高效迁移新路径
数据库·oracle
白衣衬衫 两袖清风4 小时前
SQL索引优化
数据库·sql
老华带你飞4 小时前
医院挂号|基于Java医院挂号管理系统(源码+数据库+文档)
java·开发语言·数据库·vue.js·spring boot
豐儀麟阁贵4 小时前
9.6使用正则表达式
java·开发语言·数据库·mysql
亿坊电商4 小时前
如何检查CMS建站系统的数据库链接问题?
数据库·cms