SET search_path=tmp;
-- -------------------------------------------
-- create table and populate it with 10M rows
-- -------------------------------------------
DROP SCHEMA tmp CASCADE;
CREATE SCHEMA tmp ;
SET search_path=tmp;
CREATE TABLE old_echo
( the_time timestamp NOT NULL PRIMARY KEY
, payload DOUBLE PRECISION NOT NULL
);
INSERT INTO old_echo (the_time, payload)
SELECT now() - (gs * interval '1 msec')
, random()
FROM generate_series(1,10000000) gs
;
-- DELETE FROM old_echo WHERE random() < 0.8;
VACUUM ANALYZE old_echo;
SELECT MIN(the_time) AS first
, MAX(the_time) AS last
, (MAX(the_time) - MIN(the_time))::interval AS width
FROM old_echo
;
EXPLAIN ANALYZE
SELECT *
FROM old_echo oe
JOIN (
SELECT MIN(the_time) AS first
, MAX(the_time) AS last
, (MAX(the_time) - MIN(the_time))::interval AS width
, ((MAX(the_time) - MIN(the_time))/2)::interval AS half
FROM old_echo
) mima ON 1=1
WHERE oe.the_time >= mima.first + mima.half
AND oe.the_time < mima.first + mima.half + '1 sec':: interval
;
结果:
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.06..59433.67 rows=1111124 width=64) (actual time=0.101..1.307 rows=1000 loops=1)
-> Result (cost=0.06..0.07 rows=1 width=0) (actual time=0.049..0.050 rows=1 loops=1)
InitPlan 1 (returns $0)
-> Limit (cost=0.00..0.03 rows=1 width=8) (actual time=0.022..0.022 rows=1 loops=1)
-> Index Scan using old_echo_pkey on old_echo (cost=0.00..284873.62 rows=10000115 width=8) (actual time=0.021..0.021 rows=1 loops=1)
Index Cond: (the_time IS NOT NULL)
InitPlan 2 (returns $1)
-> Limit (cost=0.00..0.03 rows=1 width=8) (actual time=0.009..0.010 rows=1 loops=1)
-> Index Scan Backward using old_echo_pkey on old_echo (cost=0.00..284873.62 rows=10000115 width=8) (actual time=0.009..0.009 rows=1 loops=1)
Index Cond: (the_time IS NOT NULL)
-> Index Scan using old_echo_pkey on old_echo oe (cost=0.01..34433.30 rows=1111124 width=16) (actual time=0.042..0.764 rows=1000 loops=1)
Index Cond: ((the_time >= (($0) + ((($1 - $0) / 2::double precision)))) AND (the_time < ((($0) + ((($1 - $0) / 2::double precision))) + '00:00:01'::interval)))
Total runtime: 1.504 ms
(13 rows)
更新:由于时间戳似乎是非唯一的(顺便说一句:在这种情况下重复是什么意思?)我添加了一个额外的键列。一个丑陋的黑客,但它在这里工作。10M -80% 行的查询时间为 11ms。(行数达到 210/222067):
CREATE TABLE old_echo
( the_time timestamp NOT NULL
, the_seq SERIAL NOT NULL -- to catch the duplicate keys
, payload DOUBLE PRECISION NOT NULL
, PRIMARY KEY(the_time, the_seq)
);
-- Adding the random will cause some timestamps to be non-unique.
-- (and others to be non-existent)
INSERT INTO old_echo (the_time, payload)
SELECT now() - ((gs+random()*1000::integer) * interval '1 msec')
, random()
FROM generate_series(1,10000000) gs
;
DELETE FROM old_echo WHERE random() < 0.8;