目录
实时指标计算
1.1 Flink实时指标计算
sql
-- Flink SQL作业: 实时广告指标计算
CREATE TEMPORARY TABLE source_impression (
impr_id STRING,
timestamp BIGINT,
advertiser_id INT,
campaign_id INT,
adgroup_id INT,
creative_id INT,
platform_id INT,
region_id INT,
cost DECIMAL(10, 4),
proctime AS PROCTIME()
) WITH (
'connector' = 'kafka',
'topic' = 'raw_impression',
'properties.bootstrap.servers' = 'kafka:9092',
'properties.group.id' = 'flink-impression',
'format' = 'json'
);
CREATE TEMPORARY TABLE source_click (
click_id STRING,
impr_id STRING,
timestamp BIGINT,
advertiser_id INT,
campaign_id INT,
cost DECIMAL(10, 4),
proctime AS PROCTIME()
) WITH (
'connector' = 'kafka',
'topic' = 'raw_click',
'properties.bootstrap.servers' = 'kafka:9092',
'format' = 'json'
);
-- 实时指标表(5分钟更新)
CREATE TEMPORARY TABLE real_time_metrics AS
SELECT
DATE_TRUNC('minute', FROM_UNIXTIME(timestamp / 1000)) AS stat_time,
advertiser_id,
campaign_id,
COUNT(DISTINCT impr_id) AS pv,
COUNT(DISTINCT click_id) AS click_cnt,
SUM(CASE WHEN click_id IS NOT NULL THEN 1 ELSE 0 END) /
COUNT(DISTINCT impr_id) AS ctr,
SUM(cost) AS total_cost,
SUM(CASE WHEN click_id IS NOT NULL THEN cost ELSE 0 END) /
COUNT(DISTINCT click_id) AS cpc
FROM source_impression i
LEFT JOIN source_click c ON i.impr_id = c.impr_id
GROUP BY 1, 2, 3;
-- 输出到Doris
CREATE TEMPORARY TABLE sink_rt_metrics (
stat_time TIMESTAMP,
advertiser_id INT,
campaign_id INT,
pv BIGINT,
click_cnt BIGINT,
ctr DECIMAL(5, 4),
total_cost DECIMAL(10, 4),
cpc DECIMAL(10, 4)
) WITH (
'connector' = 'doris',
'fenodes' = 'doris-fe:8030',
'database.name' = 'adtech_dwh',
'table.name' = 'rt_metrics',
'sink.properties.format' = 'json'
);
INSERT INTO sink_rt_metrics
SELECT * FROM real_time_metrics;
1.2 Doris汇总指标计算
sql
-- 每小时执行一次的汇总计算
-- 1. 小时级汇总表
INSERT INTO hourly_campaign_stats
SELECT
DATE_TRUNC('hour', FROM_UNIXTIME(event_time / 1000)) AS hour_time,
advertiser_id,
campaign_id,
adgroup_id,
COUNT(DISTINCT impr_id) AS pv,
COUNT(DISTINCT CASE WHEN click_id IS NOT NULL THEN impr_id END) AS click_cnt,
ROUND(COUNT(DISTINCT CASE WHEN click_id IS NOT NULL THEN impr_id END) /
COUNT(DISTINCT impr_id), 4) AS ctr,
SUM(cost) AS total_cost,
COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN click_id END) AS conv_cnt,
ROUND(COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN click_id END) /
COUNT(DISTINCT click_id), 4) AS cvr,
SUM(CASE WHEN conversion_id IS NOT NULL THEN conversion_value ELSE 0 END) AS revenue,
SUM(cost) / COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN conversion_id END) AS cpa
FROM fact_impression i
LEFT JOIN fact_click c ON i.impr_id = c.impr_id
LEFT JOIN fact_conversion v ON c.click_id = v.click_id
WHERE event_time >= UNIX_TIMESTAMP(DATE_SUB(NOW(), INTERVAL 2 HOUR)) * 1000
GROUP BY 1, 2, 3, 4;
-- 2. 日级汇总表
INSERT OVERWRITE daily_campaign_stats
SELECT
DATE(FROM_UNIXTIME(event_time / 1000)) AS stat_date,
advertiser_id,
campaign_id,
COUNT(DISTINCT impr_id) AS pv,
COUNT(DISTINCT CASE WHEN click_id IS NOT NULL THEN impr_id END) AS click_cnt,
ROUND(COUNT(DISTINCT CASE WHEN click_id IS NOT NULL THEN impr_id END) /
COUNT(DISTINCT impr_id), 4) AS ctr,
SUM(cost) AS total_cost,
COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN click_id END) AS conv_cnt,
SUM(CASE WHEN conversion_id IS NOT NULL THEN conversion_value ELSE 0 END) AS revenue,
SUM(cost) / COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN conversion_id END) AS cpa,
(SUM(CASE WHEN conversion_id IS NOT NULL THEN conversion_value ELSE 0 END) - SUM(cost)) / SUM(cost) AS roi
FROM fact_impression i
LEFT JOIN fact_click c ON i.impr_id = c.impr_id
LEFT JOIN fact_conversion v ON c.click_id = v.click_id
WHERE DATE(FROM_UNIXTIME(event_time / 1000)) = DATE_SUB(CURDATE(), INTERVAL 1 DAY)
GROUP BY 1, 2, 3;
-- 3. 平台对标数据
INSERT INTO platform_comparison_stats
SELECT
DATE(FROM_UNIXTIME(event_time / 1000)) AS stat_date,
platform_id,
advertiser_id,
COUNT(DISTINCT impr_id) AS pv,
SUM(cost) AS total_cost,
COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN conversion_id END) AS conv_cnt,
SUM(cost) / COUNT(DISTINCT CASE WHEN conversion_id IS NOT NULL THEN conversion_id END) AS cpa,
ROW_NUMBER() OVER (PARTITION BY DATE(FROM_UNIXTIME(event_time / 1000)), advertiser_id ORDER BY total_cost DESC) AS platform_rank
FROM fact_impression i
LEFT JOIN fact_click c ON i.impr_id = c.impr_id
LEFT JOIN fact_conversion v ON c.click_id = v.click_id
WHERE DATE(FROM_UNIXTIME(event_time / 1000)) = DATE_SUB(CURDATE(), INTERVAL 1 DAY)
GROUP BY 1, 2, 3;
Doris应用表设计
2.1 核心应用表
sql
-- 实时看板数据表 (更新频率: 5分钟)
CREATE TABLE rt_dashboard_metrics (
stat_time DATETIME,
advertiser_id INT,
campaign_id INT,
pv BIGINT,
click_cnt BIGINT,
ctr DECIMAL(5, 4),
cost DECIMAL(12, 2),
roi DECIMAL(6, 2)
) ENGINE=DORIS
DUPLICATE KEY (stat_time, advertiser_id, campaign_id)
PARTITION BY RANGE (stat_time) (
PARTITION p_latest VALUES LESS THAN ("2025-12-08")
) DISTRIBUTED BY HASH (advertiser_id) BUCKETS 64;
-- 采购商日报表 (更新频率: 每天)
CREATE TABLE daily_advertiser_report (
report_date DATE,
advertiser_id INT,
advertiser_name STRING,
total_cost DECIMAL(12, 2),
total_revenue DECIMAL(12, 2),
roi DECIMAL(6, 2),
cpa DECIMAL(8, 2),
top_campaign STRING,
budget_used_pct DECIMAL(5, 2),
forecast_exceed_budget INT -- 0:不超, 1:可能超
) ENGINE=DORIS
UNIQUE KEY (report_date, advertiser_id)
DISTRIBUTED BY HASH (advertiser_id) BUCKETS 64;
-- 渠道对标表 (更新频率: 每小时)
CREATE TABLE hourly_channel_compare (
compare_hour DATETIME,
advertiser_id INT,
baidu_cpa DECIMAL(8, 2),
douyin_cpa DECIMAL(8, 2),
kuaishou_cpa DECIMAL(8, 2),
xiaohongshu_cpa DECIMAL(8, 2),
best_channel STRING,
worst_channel STRING,
avg_cpa DECIMAL(8, 2),
cpa_variance DECIMAL(8, 2) -- 标准差
) ENGINE=DORIS
DUPLICATE KEY (compare_hour, advertiser_id)
PARTITION BY RANGE (compare_hour)
DISTRIBUTED BY HASH (advertiser_id) BUCKETS 32;
-- 用户转化漏斗表
CREATE TABLE user_conversion_funnel (
funnel_date DATE,
advertiser_id INT,
campaign_id INT,
impression_count BIGINT,
click_count BIGINT,
landing_count BIGINT,
add_to_cart_count BIGINT,
purchase_count BIGINT,
imp_to_click_rate DECIMAL(5, 4),
click_to_landing_rate DECIMAL(5, 4),
landing_to_purchase_rate DECIMAL(5, 4),
overall_conversion_rate DECIMAL(5, 4)
) ENGINE=DORIS
UNIQUE KEY (funnel_date, advertiser_id, campaign_id)
DISTRIBUTED BY HASH (advertiser_id) BUCKETS 64;
BI看板实现
3.1 实时看板结构
ini
┌─────────────────────────────────────────────────────┐
│ 广告数仓实时监控看板 │
├─────────────────────────────────────────────────────┤
│ │
│ [实时数据概览卡] │
│ ├─ 当日投放成本: ¥1,234,567 (↑15% vs 昨日) │
│ ├─ 当日点击数: 12,345,678 (↑8%) │
│ ├─ 当日转化数: 123,456 (↑12%) │
│ └─ 实时ROI: 4.2x (目标: 4.0x) ✓ │
│ │
│ [时间序列图表] │
│ ├─ 小时成本趋势 (折线图) │
│ │ └─ 显示成本在一天内的波动 │
│ ├─ 小时点击数 (柱状图) │
│ │ └─ 显示不同时段的流量差异 │
│ └─ 实时ROI (仪表板) │
│ └─ 实时显示当日平均ROI值 │
│ │
│ [渠道对比分析] │
│ ├─ 各平台成本分布 (饼图) │
│ │ └─ 百度(25%) | 抖音(35%) | 快手(20%) | 其他(20%)│
│ ├─ 各平台CPA对比 (条形图) │
│ │ └─ 左排序: 最好的CPA在最左 │
│ └─ 各平台转化率对比 │
│ └─ 抖音>快手>百度 │
│ │
│ [采购商排行榜] │
│ ├─ 消费金额Top 10 │
│ ├─ ROI Top 10 │
│ ├─ CPA最低Top 10 │
│ └─ 转化数Top 10 │
│ │
│ [告警与异常检测] │
│ ├─ 红色告警区域: │
│ │ ├─ 某采购商成本超额 │
│ │ ├─ 某渠道CPA突增>30% │
│ │ └─ 数据延迟>15分钟 │
│ └─ 黄色预警区域: │
│ ├─ 某采购商成本接近限额 │
│ └─ 某渠道流量异常波动 │
│ │
│ [查询过滤器] │
│ ├─ 日期范围选择 │
│ ├─ 采购商过滤 (单选/多选) │
│ ├─ 渠道过滤 │
│ ├─ 活动过滤 │
│ └─ 自定义指标组合 │
│ │
└─────────────────────────────────────────────────────┘
3.2 BI工具配置 (Metabase示例)
json
{
"dashboards": [
{
"name": "广告实时监控看板",
"description": "实时广告投放效果监控",
"refresh_rate": "300s", // 5分钟刷新
"cards": [
{
"name": "当日投放成本",
"type": "scalar",
"query": "SELECT SUM(cost) FROM daily_advertiser_report WHERE report_date = CURDATE()",
"compare_with": "SELECT SUM(cost) FROM daily_advertiser_report WHERE report_date = DATE_SUB(CURDATE(), INTERVAL 1 DAY)",
"threshold": {
"good": 0,
"bad": 10000000
}
},
{
"name": "小时成本趋势",
"type": "line_chart",
"query": "SELECT hour_time, SUM(total_cost) FROM hourly_campaign_stats WHERE stat_date = CURDATE() GROUP BY hour_time ORDER BY hour_time",
"x_axis": "hour_time",
"y_axis": "total_cost"
},
{
"name": "渠道CPA对比",
"type": "bar_chart",
"query": "SELECT best_channel, baidu_cpa, douyin_cpa, kuaishou_cpa FROM hourly_channel_compare WHERE compare_hour >= DATE_SUB(NOW(), INTERVAL 24 HOUR)",
"x_axis": "channel",
"y_axis": "cpa"
},
{
"name": "转化漏斗",
"type": "funnel_chart",
"query": "SELECT impression_count, click_count, landing_count, add_to_cart_count, purchase_count FROM user_conversion_funnel WHERE funnel_date = CURDATE()",
"funnel_stages": ["展示", "点击", "访问", "加购", "支付"]
}
]
}
]
}
决策API开发
4.1 核心API设计
python
# api_adtech.py - FastAPI应用
from fastapi import FastAPI, Query
from typing import List, Optional
from datetime import datetime, timedelta
import doris_client
app = FastAPI(title="广告数仓决策API")
doris = doris_client.DorisClient(host="doris-fe", port=8030)
# API 1: 实时成本监控
@app.get("/v1/advertiser/{advertiser_id}/current_cost")
async def get_current_cost(
advertiser_id: int,
time_range: str = Query("today", enum=["today", "week", "month"])
):
"""
获取采购商当前消费
Args:
advertiser_id: 采购商ID
time_range: 时间范围 (today/week/month)
Returns:
{
"advertiser_id": 1001,
"advertiser_name": "品牌A",
"current_cost": 125000.50,
"budget_limit": 200000,
"budget_used_pct": 62.5,
"daily_avg_cost": 15625.06,
"forecast_total_cost": 187500,
"status": "normal" // or "warning" or "critical"
}
"""
query = f"""
SELECT
advertiser_id,
SUM(total_cost) as current_cost,
MAX(budget) as budget_limit
FROM daily_advertiser_report
WHERE advertiser_id = {advertiser_id}
AND report_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
"""
result = doris.query(query)
# 返回实时成本数据
# API 2: 渠道对比建议
@app.get("/v1/advertiser/{advertiser_id}/channel_optimization")
async def get_channel_optimization(
advertiser_id: int,
optimization_goal: str = Query("roi", enum=["roi", "cpa", "volume"])
):
"""
获取渠道优化建议
Args:
advertiser_id: 采购商ID
optimization_goal: 优化目标 (ROI/CPA/成交量)
Returns:
{
"recommend_actions": [
{
"channel": "douyin",
"action": "增加投放",
"expected_improvement": "预计CPA降低15%",
"current_cpa": 45.2,
"benchmark_cpa": 38.5
},
{
"channel": "baidu",
"action": "减少投放",
"reason": "CPA较高, 转化率低于平均"
}
],
"total_potential_roi_increase": "25%",
"confidence_score": 0.92
}
"""
query = f"""
SELECT
platform_id,
AVG(cpa) as avg_cpa,
AVG(roi) as avg_roi,
COUNT(*) as data_points
FROM hourly_channel_compare
WHERE advertiser_id = {advertiser_id}
AND compare_hour >= DATE_SUB(NOW(), INTERVAL 7 DAY)
GROUP BY platform_id
ORDER BY avg_roi DESC
"""
# 返回优化建议
# API 3: 预算预警
@app.get("/v1/advertiser/{advertiser_id}/budget_forecast")
async def get_budget_forecast(advertiser_id: int):
"""
预测本月是否会超预算
Returns:
{
"forecast_status": "warning", // normal/warning/critical
"monthly_budget": 1000000,
"current_spent": 625000,
"projected_end_month": 950000,
"days_remaining": 8,
"daily_burn_rate": 75000,
"recommended_action": "保持当前投放"
}
"""
pass
# API 4: A/B测试数据
@app.get("/v1/advertiser/{advertiser_id}/ab_test_results/{test_id}")
async def get_ab_test_results(advertiser_id: int, test_id: str):
"""
获取A/B测试的实时结果
Returns:
{
"test_id": "test_creative_v1",
"control_group": {
"sample_size": 50000,
"conversion_rate": 0.0245,
"cpa": 48.5
},
"test_group": {
"sample_size": 50000,
"conversion_rate": 0.0312,
"cpa": 38.2
},
"statistical_significance": 0.95, // 95% 置信度
"recommendation": "test_group 胜出, 建议全量投放"
}
"""
pass
# API 5: 自动竞价建议
@app.post("/v1/advertiser/{advertiser_id}/auto_bidding_suggestion")
async def get_auto_bidding_suggestion(
advertiser_id: int,
target_cpa: Optional[float] = None,
target_roi: Optional[float] = None
):
"""
获取自动竞价建议
Args:
target_cpa: 目标CPA (单位: 元)
target_roi: 目标ROI倍数
Returns:
{
"suggested_bid": 12.5,
"current_bid": 10.0,
"adjustment": "+25%",
"expected_daily_volume": 15000,
"expected_daily_cost": 187500,
"expected_cpa": 42.3,
"execution_recommendation": "立即执行" // or "谨慎调整" or "保持不变"
}
"""
pass
自动化优化
5.1 自动化规则引擎
sql
-- 自动竞价调整规则
-- 规则1: CPA超过目标值, 自动降低竞价
-- 执行频率: 每小时
SELECT
advertiser_id,
campaign_id,
CASE
WHEN current_cpa > target_cpa * 1.2 THEN ROUND(current_bid * 0.85, 2) -- 降低15%
WHEN current_cpa > target_cpa * 1.1 THEN ROUND(current_bid * 0.92, 2) -- 降低8%
WHEN current_cpa < target_cpa * 0.8 THEN ROUND(current_bid * 1.15, 2) -- 提升15%
ELSE current_bid
END AS recommended_bid,
CASE
WHEN current_cpa > target_cpa * 1.2 THEN 'reduce'
WHEN current_cpa < target_cpa * 0.8 THEN 'increase'
ELSE 'hold'
END AS action
FROM campaign_daily_stats
WHERE stat_date = CURDATE()
AND current_cpa IS NOT NULL;
-- 规则2: 自动暂停表现差的创意
-- 执行频率: 每天
SELECT
advertiser_id,
campaign_id,
creative_id,
conversion_rate,
CASE
WHEN conversion_rate < 0.001 AND impression_count > 100000 THEN 'pause'
WHEN conversion_rate < 0.002 AND impression_count > 50000 THEN 'reduce_budget'
ELSE 'keep'
END AS action
FROM creative_performance
WHERE stat_date = CURDATE()
ORDER BY conversion_rate ASC;
-- 规则3: 自动扩展高表现创意预算
-- 执行频率: 每天
SELECT
advertiser_id,
campaign_id,
creative_id,
current_daily_budget,
CASE
WHEN roi > 5 AND conversion_rate > 0.005 THEN ROUND(current_daily_budget * 1.3, 0)
WHEN roi > 4 AND conversion_rate > 0.003 THEN ROUND(current_daily_budget * 1.15, 0)
ELSE current_daily_budget
END AS recommended_budget
FROM creative_performance
WHERE stat_date = CURDATE()
AND roi > 3
ORDER BY roi DESC
LIMIT 100;
总结
该层实现了:
- 实时指标: 5分钟级指标更新
- 可视化: 完整的BI看板体系
- API服务: 5个核心决策API
- 自动化: 规则驱动的自动优化