在Convert recursive function to view的基础上,我想通过创建节点父节点的快照来加快从图中任意节点到其根节点的路径检索。这个想法是递归树遍历受中间快照的限制,这些快照避免了任何进一步的递归,从而加快了执行时间。我还没有进行负载测试,所以除了这个简单的示例之外,我不知道它的性能如何,但早期的试验已经表明存在一些瓶颈。我很乐意就如何加快/简化查询发表评论。我正在使用 Postgres 9.2.2.0 (20)。
DROP TABLE IF EXISTS revision CASCADE;
CREATE TABLE revision (
id serial primary key,
parent_revision_id int references revision(id),
username varchar(128),
ts timestamp without time zone
);
DROP TABLE IF EXISTS revision_snapshot CASCADE;
CREATE TABLE revision_snapshot (
id serial primary key,
revision_id int,
parent_revision_id int,
depth int
);
CREATE OR REPLACE FUNCTION create_revision_snapshot(_rev int)
RETURNS void AS
$func$
DELETE FROM revision_snapshot WHERE revision_id=$1;
INSERT INTO revision_snapshot (revision_id, parent_revision_id, depth)
(SELECT $1, id, depth FROM revision_tree($1));
$func$ LANGUAGE sql;
-- Recursively return path from '_rev' to root
CREATE OR REPLACE FUNCTION revision_tree(_rev int)
RETURNS TABLE(id int, parent_revision_id int, depth int) AS
$func$
WITH RECURSIVE rev_list(id, parent_revision_id, depth) AS (
SELECT t.id, t.parent_revision_id, 1
FROM revision t
WHERE t.id = $1
UNION ALL
SELECT t.id, t.parent_revision_id, r.depth + 1
FROM rev_list r
JOIN revision t ON t.id = r.parent_revision_id
)
SELECT t.id, t.parent_revision_id, t.depth
FROM rev_list t
ORDER BY t.id;
$func$ LANGUAGE sql;
-- Fast version of 'revision_tree' (to be). This version will return the
-- revision tree making use of snapshots (recursively returning the path from
-- specified revision id to last snapshot of the path to the root + the snapshot)
CREATE OR REPLACE FUNCTION revision_tree_perf(_rev int)
RETURNS TABLE(parent_revision_id int) AS
$func$
BEGIN
CREATE TEMP TABLE graph_result ON COMMIT DROP AS
WITH RECURSIVE rev_list(id, parent_revision_id, depth) AS (
SELECT t.id, t.parent_revision_id, 1
FROM revision t
WHERE t.id = $1
UNION ALL
SELECT t.id, t.parent_revision_id, r.depth + 1
FROM rev_list r
JOIN revision t ON t.id = r.parent_revision_id
WHERE not(t.id in (select revision_id from revision_snapshot))
)
SELECT t.id, t.parent_revision_id, t.depth
FROM rev_list t
ORDER BY t.id;
RETURN QUERY
SELECT g.parent_revision_id FROM graph_result AS g WHERE g.parent_revision_id IS NOT NULL
UNION
SELECT s.parent_revision_id FROM revision_snapshot AS s WHERE
s.revision_id = (SELECT min(q.parent_revision_id) FROM graph_result as q) ORDER BY parent_revision_id;
END;
$func$
LANGUAGE 'plpgsql';
-- Example tree
--
-- +-- <10>
-- /
-- +-- <4> -- <8> --- <9> -+- <11> --- <15> --- <16> --- <17>
-- /
-- <1> --- <2> --- <3> -+
-- \
-- +-- <5> --- <6> --- <7> --- <12> -+- <14> --- <18>
-- \
-- \
-- \
-- \
-- +-- <13> --- <19> --- <20> --- <21>
--
INSERT INTO revision (username, ts, parent_revision_id) VALUES
('someone', now(), null) -- 1
,('someone', now(), 1) -- 2
,('someone', now(), 2) -- 3
,('someone', now(), 3) -- 4
,('someone', now(), 3) -- 5
,('someone', now(), 5) -- 6
,('someone', now(), 6) -- 7
,('someone', now(), 4) -- 8
,('someone', now(), 8) -- 9
,('someone', now(), 9) -- 10
,('someone', now(), 9) -- 11
,('someone', now(), 7) -- 12
,('someone', now(), 12) -- 13
,('someone', now(), 12) -- 14
,('someone', now(), 11) -- 15
,('someone', now(), 15) -- 16
,('someone', now(), 16) -- 17
,('someone', now(), 14) -- 18
,('someone', now(), 13) -- 19
,('someone', now(), 19) -- 20
,('someone', now(), 20); -- 21
-- Create a revision snapsnot
select create_revision_snapshot(13);
-- This query is meant to be faster ...
select * from revision_tree_perf(21);
-- ... than this one
select * from revision_tree(21);
上面的例子
select create_revision_snapshot(13);
select * from revision_tree_perf(21);
旨在产生一个记录集,该记录集表示从 21 到根的路径,即(21, 20, 19, 13, 12, 7, 6, 5, 3, 2, 1)
. 部分解决方案是通过遍历树(21 到 13,因为有 13 的快照,因此无需再遍历树)并使用从 13 到根的已经“缓存”路径(取自修订快照)。希望这会让它更容易理解......
更新:
我想出了一个潜在的改进。这只是在黑暗中刺伤,但我可以想象exists
条款相当昂贵。我现在在修订表中标记了快照的存在:
CREATE TABLE revision (
id serial primary key,
parent_revision_id int references revision(id),
username varchar(128),
has_snapshot boolean default false,
ts timestamp without time zone
);
CREATE OR REPLACE FUNCTION create_revision_snapshot(_rev int) RETURNS void AS $func$
DELETE FROM revision_snapshot WHERE revision_id=$1;
INSERT INTO revision_snapshot (revision_id, parent_revision_id, depth)
(SELECT $1, id, depth FROM revision_tree($1));
-- Mark revision table to avoid costly exists/in clause
UPDATE revision SET has_snapshot = true WHERE id=$1;
$func$ LANGUAGE sql;
revision_tree_perf
这将SP的 CTE 部分更改为
WITH RECURSIVE rev_list(id, parent_revision_id, depth) AS (
SELECT t.id, t.parent_revision_id, 1 -- AS depth
FROM revision t
WHERE t.id = $1
UNION ALL
SELECT t.id, t.parent_revision_id, r.depth + 1
FROM rev_list r
JOIN revision t ON t.id = r.parent_revision_id
WHERE t.has_snapshot = false
)
SELECT t.id, t.parent_revision_id, t.depth FROM rev_list t ORDER BY t.id;
这应该很快执行。难题的另一部分是从修订 id where 返回 revision_snapshot 的内容,has_snapshot=true
并将这两个结果连接起来。问题是如何从 CTE 获取此修订 ID。我可以将 CTE 的查询结果存储在临时表中并查询修订 ID,或者建议不要编写 CTE 并将其编写为循环。这样,人们可以跟踪循环退出的修订 id(何时has_snapshot = true
)。但我不确定这将如何与 CTE 进行比较。
人们如何看待这种方法?