1

我正在使用 PostgreSQL 10.6。我有几个按天分区的表。每天都有自己的数据。我想在一天内从这个表中选择行。

drop table IF EXISTS request;
drop table IF EXISTS request_identity;

CREATE TABLE IF NOT EXISTS request (
    id bigint not null,    
    record_date date not null,
    payload text not null
) PARTITION BY LIST (record_date);

CREATE TABLE IF NOT EXISTS request_p1 PARTITION OF request FOR VALUES IN ('2001-01-01');
CREATE TABLE IF NOT EXISTS request_p2 PARTITION OF request FOR VALUES IN ('2001-01-02');

CREATE INDEX IF NOT EXISTS i_request_p1_id ON request_p1 (id);
CREATE INDEX IF NOT EXISTS i_request_p2_id ON request_p2 (id);

do $$  
begin
  for i in 1..100000 loop
    INSERT INTO  request (id,record_date,payload) values (i, '2001-01-01', 'abc');
  end loop;  
  for i in 100001..200000 loop
    INSERT INTO  request (id,record_date,payload) values (i, '2001-01-02', 'abc');
  end loop;
end;
$$;

CREATE TABLE IF NOT EXISTS request_identity (
    record_date date not null,
    parent_id bigint NOT NULL,
    identity_name varchar(32),
    identity_value varchar(32)
) PARTITION BY LIST (record_date);   

CREATE TABLE IF NOT EXISTS request_identity_p1 PARTITION OF request_identity FOR VALUES IN ('2001-01-01');
CREATE TABLE IF NOT EXISTS request_identity_p2 PARTITION OF request_identity FOR VALUES IN ('2001-01-02');

CREATE INDEX IF NOT EXISTS i_request_identity_p1_payload ON request_identity_p1 (identity_name, identity_value);
CREATE INDEX IF NOT EXISTS i_request_identity_p2_payload ON request_identity_p2 (identity_name, identity_value);

do $$  
begin
  for i in 1..100000 loop
    INSERT INTO  request_identity (parent_id,record_date,identity_name,identity_value) values (i, '2001-01-01', 'NAME', 'somename'||i);
  end loop;  
  for i in 100001..200000 loop
    INSERT INTO  request_identity (parent_id,record_date,identity_name,identity_value) values (i, '2001-01-02', 'NAME', 'somename'||i);
  end loop;
end;
$$;

analyze request;
analyze request_identity;

我在 1 天内进行了选择,并看到了一个很好的请求计划:

explain analyze select * 
    from request 
   where record_date between '2001-01-01' and '2001-01-01' 
   and exists (select * from request_identity where parent_id = id and identity_name = 'NAME' and identity_value = 'somename555' and record_date between '2001-01-01' and '2001-01-01')    
   limit 100;

Limit  (cost=8.74..16.78 rows=1 width=16)
  ->  Nested Loop  (cost=8.74..16.78 rows=1 width=16)
        ->  HashAggregate  (cost=8.45..8.46 rows=1 width=8)
              Group Key: request_identity_p1.parent_id
              ->  Append  (cost=0.42..8.44 rows=1 width=8)
                    ->  Index Scan using i_request_identity_p1_payload on request_identity_p1  (cost=0.42..8.44 rows=1 width=8)
                          Index Cond: (((identity_name)::text = 'NAME'::text) AND ((identity_value)::text = 'somename555'::text))
                          Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-01'::date))
        ->  Append  (cost=0.29..8.32 rows=1 width=16)
              ->  Index Scan using i_request_p1_id on request_p1  (cost=0.29..8.32 rows=1 width=16)
                    Index Cond: (id = request_identity_p1.parent_id)
                    Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-01'::date))

但是如果我选择了 2 天或更长时间,那么 PostgreSQL 首先追加 request_identity 的所有分区和 request 的所有分区的行,然后加入它们。所以这是我想要的不工作的 SQL:

explain analyze select * 
    from request 
   where record_date between '2001-01-01' and '2001-01-02' 
   and exists (select * from request_identity where parent_id = id and identity_name = 'NAME' and identity_value = 'somename1777' and record_date between '2001-01-01' and '2001-01-02')    
   limit 100;  

Limit  (cost=17.19..50.21 rows=2 width=16)
  ->  Nested Loop  (cost=17.19..50.21 rows=2 width=16)
        ->  Unique  (cost=16.90..16.91 rows=2 width=8)
              ->  Sort  (cost=16.90..16.90 rows=2 width=8)
                    Sort Key: request_identity_p1.parent_id
                    ->  Append  (cost=0.42..16.89 rows=2 width=8)
                          ->  Index Scan using i_request_identity_p1_payload on request_identity_p1  (cost=0.42..8.44 rows=1 width=8)
                                Index Cond: (((identity_name)::text = 'NAME'::text) AND ((identity_value)::text = 'somename1777'::text))
                                Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-02'::date))
                          ->  Index Scan using i_request_identity_p2_payload on request_identity_p2  (cost=0.42..8.44 rows=1 width=8)
                                Index Cond: (((identity_name)::text = 'NAME'::text) AND ((identity_value)::text = 'somename1777'::text))
                                Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-02'::date))
        ->  Append  (cost=0.29..16.63 rows=2 width=16)
              ->  Index Scan using i_request_p1_id on request_p1  (cost=0.29..8.32 rows=1 width=16)
                    Index Cond: (id = request_identity_p1.parent_id)
                    Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-02'::date))
              ->  Index Scan using i_request_p2_id on request_p2  (cost=0.29..8.32 rows=1 width=16)
                    Index Cond: (id = request_identity_p1.parent_id)
                    Filter: ((record_date >= '2001-01-01'::date) AND (record_date <= '2001-01-02'::date))

在我的情况下,加入(使用嵌套循环)这些附加是没有意义的,因为一致的行仅在 1 天的分区组内。

对我来说,想要的结果是 PostgreSQL 首先在 request_p1 到 request_identity_p1 和 request_p2 到 request_identity_p2 之间进行连接,然后才添加结果。

问题是:

有没有办法在 1 天分区组内分别执行分区之间的连接?

谢谢。

4

0 回答 0