以下是 BigQuery 标准 SQL
#standardSQL
SELECT *
FROM (
SELECT
user, eventdate, sessions_in_a_day,
SUM(sessions_in_a_day) OVER(PARTITION BY user ORDER BY eventdate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) total_sessions_before,
DATE_DIFF(eventdate, LAG(eventdate) OVER(PARTITION BY user ORDER BY eventdate), DAY) delay
FROM (
SELECT user, eventdate, COUNT(1) sessions_in_a_day
FROM t
GROUP BY user, eventdate
)
)
WHERE total_sessions_before >= 3
AND delay <= 3
-- ORDER BY user, eventdate
您可以使用虚拟数据测试/玩上面
#standardSQL
WITH t AS (
SELECT 'A' user, DATE '2018-02-05' eventdate, 1 session UNION ALL
SELECT 'A', DATE '2018-02-05', 2 UNION ALL
SELECT 'A', DATE '2018-02-06', 3 UNION ALL
SELECT 'A', DATE '2018-02-06', 4 UNION ALL
SELECT 'A', DATE '2018-02-09', 5 UNION ALL
SELECT 'A', DATE '2018-02-09', 6 UNION ALL
SELECT 'A', DATE '2018-02-10', 7 UNION ALL
SELECT 'A', DATE '2018-02-13', 8
)
SELECT *
FROM (
SELECT
user, eventdate, sessions_in_a_day,
SUM(sessions_in_a_day) OVER(PARTITION BY user ORDER BY eventdate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) total_sessions_before,
DATE_DIFF(eventdate, LAG(eventdate) OVER(PARTITION BY user ORDER BY eventdate), DAY) delay
FROM (
SELECT user, eventdate, COUNT(1) sessions_in_a_day
FROM t
GROUP BY user, eventdate
)
)
WHERE total_sessions_before >= 3
AND delay <= 3
ORDER BY user, eventdate
结果是
Row user eventdate sessions_in_a_day total_sessions_before delay
1 A 2018-02-09 2 4 3
2 A 2018-02-10 1 6 1
3 A 2018-02-13 1 7 3
使用 WHERE 子句,您可以“调整”到您需要的任何情况
在上面的示例中,您只显示在接下来的 3 天内到达下一个会话之前至少有 3 个会话的用户如果您只对那些恰好有 3 个会话的用户感兴趣并且达到他们的第四次会议 - 您可以添加相应的过滤器