SQL查询连续登录用户方法详解

1. 基本数据表结构

假设有一个登录记录表:

sql 复制代码
CREATE TABLE login_log (
    user_id INT,
    login_date DATE
);

示例数据:

sql 复制代码
INSERT INTO login_log VALUES
(1, '2024-01-01'),
(1, '2024-01-02'),
(1, '2024-01-03'),
(1, '2024-01-05'),  -- 这里断了一天
(1, '2024-01-06'),
(1, '2024-01-07'),
(2, '2024-01-01'),
(2, '2024-01-02'),
(2, '2024-01-04');  -- 这里断了一天

2. 方法一:使用窗口函数(推荐)

2.1 查询连续登录3天及以上的用户

sql 复制代码
WITH ranked_logs AS (
    SELECT 
        user_id,
        login_date,
        ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) as rn
    FROM login_log
    GROUP BY user_id, login_date  -- 去重,防止一天多次登录
),
date_diff AS (
    SELECT 
        user_id,
        login_date,
        DATE_SUB(login_date, INTERVAL rn DAY) as group_date
    FROM ranked_logs
)
SELECT 
    user_id,
    MIN(login_date) as start_date,
    MAX(login_date) as end_date,
    COUNT(*) as consecutive_days
FROM date_diff
GROUP BY user_id, group_date
HAVING COUNT(*) >= 3
ORDER BY user_id, start_date;

2.2 使用LEAD/LAG函数的简化版

sql 复制代码
WITH consecutive_groups AS (
    SELECT 
        user_id,
        login_date,
        LAG(login_date) OVER (PARTITION BY user_id ORDER BY login_date) as prev_date,
        LEAD(login_date) OVER (PARTITION BY user_id ORDER BY login_date) as next_date
    FROM (
        SELECT DISTINCT user_id, login_date
        FROM login_log
    ) t
)
SELECT 
    user_id,
    login_date as consecutive_date
FROM consecutive_groups
WHERE 
    -- 连续3天的情况:当前日期、前1天、前2天都存在
    (login_date = prev_date + INTERVAL 1 DAY 
     AND login_date = prev_date + INTERVAL 2 DAY)
    OR
    -- 或者检查连续区间
    (login_date = prev_date + INTERVAL 1 DAY 
     AND login_date = next_date - INTERVAL 1 DAY)
    OR
    (login_date = next_date - INTERVAL 1 DAY 
     AND login_date = next_date - INTERVAL 2 DAY);

3. 方法二:使用自连接

sql 复制代码
SELECT DISTINCT 
    l1.user_id
FROM login_log l1
JOIN login_log l2 ON l1.user_id = l2.user_id 
    AND l2.login_date = l1.login_date + INTERVAL 1 DAY
JOIN login_log l3 ON l1.user_id = l3.user_id 
    AND l3.login_date = l1.login_date + INTERVAL 2 DAY
WHERE EXISTS (
    SELECT 1 FROM login_log 
    WHERE user_id = l1.user_id 
    AND login_date = l1.login_date + INTERVAL 1 DAY
)
AND EXISTS (
    SELECT 1 FROM login_log 
    WHERE user_id = l1.user_id 
    AND login_date = l1.login_date + INTERVAL 2 DAY
);

4. 方法三:使用递归CTE(复杂但功能强大)

sql 复制代码
WITH RECURSIVE consecutive_login AS (
    -- 基础查询:每个用户的首次登录
    SELECT 
        user_id,
        login_date,
        login_date as start_date,
        1 as consecutive_days
    FROM (
        SELECT 
            user_id,
            login_date,
            ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) as rn
        FROM login_log
    ) t
    WHERE rn = 1
    
    UNION ALL
    
    -- 递归部分:查找连续的下一天
    SELECT 
        cl.user_id,
        ll.login_date,
        CASE 
            WHEN ll.login_date = cl.login_date + INTERVAL 1 DAY 
            THEN cl.start_date
            ELSE ll.login_date
        END as start_date,
        CASE 
            WHEN ll.login_date = cl.login_date + INTERVAL 1 DAY 
            THEN cl.consecutive_days + 1
            ELSE 1
        END as consecutive_days
    FROM consecutive_login cl
    JOIN login_log ll ON cl.user_id = ll.user_id 
        AND ll.login_date > cl.login_date
    WHERE ll.login_date = (
        SELECT MIN(login_date)
        FROM login_log
        WHERE user_id = cl.user_id 
        AND login_date > cl.login_date
    )
)
SELECT 
    user_id,
    MAX(consecutive_days) as max_consecutive_days
FROM consecutive_login
GROUP BY user_id
HAVING MAX(consecutive_days) >= 3;

5. 实用查询示例

5.1 查询每个用户的最大连续登录天数

sql 复制代码
WITH ranked_logs AS (
    SELECT 
        user_id,
        login_date,
        DATE_SUB(login_date, INTERVAL 
            ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY
        ) as group_date
    FROM (
        SELECT DISTINCT user_id, login_date
        FROM login_log
    ) t
)
SELECT 
    user_id,
    MAX(consecutive_days) as max_consecutive_days
FROM (
    SELECT 
        user_id,
        group_date,
        COUNT(*) as consecutive_days
    FROM ranked_logs
    GROUP BY user_id, group_date
) groups
GROUP BY user_id
ORDER BY max_consecutive_days DESC;

5.2 查询指定时间段内的连续登录

sql 复制代码
WITH ranked_logs AS (
    SELECT 
        user_id,
        login_date,
        DATE_SUB(login_date, INTERVAL 
            ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY
        ) as group_date
    FROM (
        SELECT DISTINCT user_id, login_date
        FROM login_log
        WHERE login_date BETWEEN '2024-01-01' AND '2024-01-31'
    ) t
)
SELECT 
    user_id,
    COUNT(*) as consecutive_days,
    MIN(login_date) as start_date,
    MAX(login_date) as end_date
FROM ranked_logs
GROUP BY user_id, group_date
HAVING COUNT(*) >= 7  -- 连续7天登录
ORDER BY consecutive_days DESC;

5.3 查询连续登录中断的情况

sql 复制代码
WITH login_gaps AS (
    SELECT 
        user_id,
        login_date,
        LEAD(login_date) OVER (PARTITION BY user_id ORDER BY login_date) as next_date,
        DATEDIFF(
            LEAD(login_date) OVER (PARTITION BY user_id ORDER BY login_date),
            login_date
        ) as gap_days
    FROM (
        SELECT DISTINCT user_id, login_date
        FROM login_log
    ) t
)
SELECT 
    user_id,
    login_date as last_login_before_gap,
    next_date as next_login_after_gap,
    gap_days - 1 as missed_days
FROM login_gaps
WHERE gap_days > 1
ORDER BY user_id, login_date;

6. 性能优化建议

  1. 创建索引
sql 复制代码
CREATE INDEX idx_user_login ON login_log(user_id, login_date);
  1. 分区表:如果数据量很大,按月份或用户ID范围分区

  2. 物化视图:对于频繁查询的结果可以创建物化视图

  3. 定期清理:删除历史数据,只保留最近N天的数据

7. 不同数据库的语法差异

函数/特性 MySQL PostgreSQL SQL Server Oracle
日期加减 DATE_ADD() + INTERVAL DATEADD() + INTERVAL
日期差 DATEDIFF() - DATEDIFF() -
行号 ROW_NUMBER() ROW_NUMBER() ROW_NUMBER() ROW_NUMBER()
递归CTE 支持(8.0+) 支持 支持 支持

选择哪种方法取决于:

  • 数据量:大数据量建议使用窗口函数
  • 查询频率:频繁查询建议建立物化视图
  • 数据库版本:确保支持相关函数
  • 业务需求:是否需要实时结果还是可接受延迟
相关推荐
ChineHe7 小时前
Redis数据类型篇002_详解Strings核心命令与存储结构
数据库·redis·缓存
_UMR_7 小时前
springboot集成Jasypt实现配置文件启动时自动解密-ENC
java·spring boot·后端
程序员小假7 小时前
我们来说说 Cookie、Session、Token、JWT
java·后端
清水白石0087 小时前
《从零到进阶:Pydantic v1 与 v2 的核心差异与零成本校验实现原理》
数据库·python
电商API&Tina7 小时前
京东 API 数据采集接口接入与行业分析
运维·服务器·网络·数据库·django·php
短剑重铸之日7 小时前
《SpringBoot4.0初识》第一篇:前瞻与思想
java·开发语言·后端·spring·springboot4.0
蓝色王者8 小时前
springboot 2.6.13 整合flowable6.8.1
java·spring boot·后端
柠檬叶子C8 小时前
PostgreSQL 忘记 postgres 密码怎么办?(已解决)
数据库·postgresql
Tao____8 小时前
基于Ruoyi开发的IOT物联网平台
java·网络·物联网·mqtt·网络协议