1
create table events(
    id char(36) PRIMARY KEY,
    game_id varchar(24) not null, 
    user_device_id char(36) not null, 
    event_name varchar(100) not null, 
    generated_at timestamp with time zone not null
);

SELECT 
    events.generated_at::DATE AS time_stamp, 
    COUNT(DISTINCT ( 
        CASE WHEN 
            events.event_name = 'new_user' THEN events.user_device_id 
        END
        )
    ) as new_users, 
    COUNT(DISTINCT (
        CASE WHEN 
            future_events.event_name <> 'new_user' THEN future_events.user_device_id 
        END
        )
    ) as returned_users, 
    COUNT(DISTINCT (
        CASE WHEN 
            future_events.event_name <> 'new_user' THEN future_events.user_device_id 
        END
    )) / COUNT(DISTINCT (
        CASE WHEN 
            events.event_name = 'new_user' THEN events.user_device_id 
        END
    ))::float as retention 
FROM events 
    LEFT JOIN events AS future_events ON 
        events.user_device_id = future_events.user_device_id AND 
        events.generated_at = future_events.generated_at - interval '1 day' AND 
        events.game_id = future_events.game_id
GROUP BY 
    time_stamp 
ORDER BY 
    time_stamp;

我试图通过上面的 sql 查询获得第 N 天('N' -> 1 到 7 之间的任何数字)用户保留。由于我是 HPE vertica 的菜鸟,我无法提出最佳聚合投影创建语句,因为投影显着提高了查询的性能。

4

1 回答 1

1

聚合投影对连接查询没有帮助。

您可以创建一个常规投影,按连接列进行分段和排序,以实现性能提升:

CREATE PROJECTION events_p1 (
id,
game_id ENCODING RLE,
user_device_id ENCODING RLE,
event_name,
generated_at ENCODING RLE
) AS
SELECT id,
       game_id,
       user_device_id,
       event_name,
       generated_at
FROM events
ORDER BY generated_at,
         game_id, 
         user_device_id 
SEGMENTED BY hash(generated_at,game_id,user_device_id) ALL NODES KSAFE 1;
于 2017-08-25T16:39:23.883 回答