主要是日期型数据函数的区别,做了如下改写
- 将
TRUNC(TO_DATE(d), 'MM')改写成date_trunc('month',d::date) - 将
FLOOR(MONTHS_BETWEEN(recharge_date, '1970-01-01'))改写成date_sub('month', DATE '1970-01-01', recharge_date) - 将
ADD_MONTHS(MIN(recharge_date), SUM(recharge_months))改写成date_trunc('month',date_add(MIN(recharge_date), INTERVAL (SUM(recharge_months)::int) month))
sql
with vip_log(uid,d,m) as (VALUES
-- uid=1:有断档的场景
(1, '2025-01-01', 2),
(1, '2025-02-01', 1),
(1, '2025-05-01', 1),
(1, '2025-10-01', 3),
(1, '2025-11-01', 1),
-- uid=2:重叠+跨日充值的场景(核心测试用例)
(2, '2025-01-01', 2),
(2, '2025-02-01', 2),
(2, '2025-04-15', 2),
(2, '2025-06-20', 2),
-- uid=3:乱序充值的场景
(3, '2025-03-01', 3),
(3, '2025-01-01', 2),
(3, '2025-02-01', 2)),
-- =============================================
-- 3. 核心逻辑:追赶指标法计算连续会员区间
-- 优化点:日期标准化(转当月1号)、类型安全(取整)、去重
-- =============================================
--WITH
-- 步骤1:数据预处理(去重+日期标准化)
preprocess AS (
SELECT DISTINCT
uid,
-- 日期标准化:转为当月1号,避免"日"对月索引的干扰
date_trunc('month',d::date) AS recharge_date,
m AS recharge_months
FROM vip_log
),
-- 步骤2:基础指标计算(月索引+累计前序充值月份)
base_metrics AS (
SELECT
uid,
recharge_date,
recharge_months,
-- 月索引:距离1970-01-01的月数(取整,避免小数干扰)
date_sub('month', DATE '1970-01-01', recharge_date) AS month_index,
-- 累计前序充值月份:当前记录之前的总充值月数(不含当前)
COALESCE(
SUM(recharge_months) OVER (
PARTITION BY uid
ORDER BY recharge_date
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
),
0
) AS prev_m_sum
FROM preprocess
),
-- 步骤3:计算追赶指标(核心:判断连续/断档)
gap_detection AS (
SELECT
uid,
recharge_date,
recharge_months,
month_index,
prev_m_sum,
-- 追赶指标:月索引 - 累计前序充值月份
(month_index - prev_m_sum) AS gap_val
FROM base_metrics
),
-- 步骤4:生成唯一分组ID(累计最大值锁定组头)
group_generation AS (
SELECT
uid,
recharge_date,
recharge_months,
-- 累计最大值作为分组ID,确保同一连续区间ID唯一
MAX(gap_val) OVER (PARTITION BY uid ORDER BY recharge_date) AS group_id
FROM gap_detection
)
--from group_generation;
-- 步骤5:最终聚合(计算连续区间)
SELECT
uid,
MIN(recharge_date) AS start_date, -- 区间起始日期(组内最早充值日)
-- 区间结束日期:最早日期 + 组内累计充值月数(匹配业务预期)
date_trunc('month',date_add(MIN(recharge_date), INTERVAL (SUM(recharge_months)::int) month)) AS end_date
FROM group_generation
GROUP BY uid, group_id
ORDER BY uid, start_date;
/*
┌───────┬────────────┬────────────┐
│ uid │ start_date │ end_date │
│ int32 │ date │ date │
├───────┼────────────┼────────────┤
│ 1 │ 2025-01-01 │ 2025-04-01 │
│ 1 │ 2025-05-01 │ 2025-06-01 │
│ 1 │ 2025-10-01 │ 2026-02-01 │
│ 2 │ 2025-01-01 │ 2025-09-01 │
│ 3 │ 2025-01-01 │ 2025-08-01 │
└───────┴────────────┴────────────┘
*/