sql - 从树子路径列表中查找完整路径

Question

有没有一种简单的方法可以找到完整路径并为具有这种数据结构的所有主题更新它。

                 TOPIC_0000070 
                     | 
                 TOPIC_0000054
             /       |          \
    TOPIC_0000000 TOPIC_0000001 TOPIC_0000002

初始数据：

CREATE TABLE topics (id varchar, path ltree);

INSERT INTO "topics"(id, path) VALUES ('TOPIC_0000000', 'TOPIC_0000054.TOPIC_0000000');
INSERT INTO "topics"(id, path) VALUES ('TOPIC_0000001', 'TOPIC_0000054.TOPIC_0000001');
INSERT INTO "topics"(id, path) VALUES ('TOPIC_0000002', 'TOPIC_0000054.TOPIC_0000002');
INSERT INTO "topics"(id, path) VALUES ('TOPIC_0000054', 'TOPIC_0000070.TOPIC_0000054');
INSERT INTO "topics"(id, path) VALUES ('TOPIC_0000070', 'TOPIC_0000070');

id           | path                      |
-------------+---------------------------+
TOPIC_0000000|TOPIC_0000054.TOPIC_0000000|
TOPIC_0000001|TOPIC_0000054.TOPIC_0000001|
TOPIC_0000002|TOPIC_0000054.TOPIC_0000002|
...          |...                        |
TOPIC_0000054|TOPIC_0000070.TOPIC_0000054|
TOPIC_0000070|TOPIC_0000070              |

预期结果：

id           | path                                    |
-------------+-----------------------------------------+
TOPIC_0000000|TOPIC_0000070.TOPIC_0000054.TOPIC_0000000|
TOPIC_0000001|TOPIC_0000070.TOPIC_0000054.TOPIC_0000001|
TOPIC_0000002|TOPIC_0000070.TOPIC_0000054.TOPIC_0000002|
...          |...                                      |
TOPIC_0000054|TOPIC_0000070.TOPIC_0000054              |
TOPIC_0000070|TOPIC_0000070                            |

score 1 · Accepted Answer

您可以通过以下一系列步骤来实现：

我将您的表重新创建为

CREATE TABLE topics (id varchar, path varchar);

并根据您的示例插入相同的行

1.递归查询

WITH RECURSIVE path_finder AS
  (SELECT t.id,
          t.path PATH
   FROM topics t
   UNION SELECT pf.id,
                p.path || coalesce('.' || pf.id, p.id)
   FROM topics p
   INNER JOIN path_finder pf ON substring(pf.path, 0, position('.' in pf.path)) = p.id
   WHERE p.id <> p.path 
select * from path_finder order by 1,2

此处的连接检查path字符串的前缀是否等于id锚点的前缀

substring(pf.path,0,position('.' in pf.path)) = p.id

以上输出将正确显示所有组合

defaultdb-> select * from path_finder order by 1,2;
      id       |                   path                    
---------------+-------------------------------------------
 TOPIC_0000000 | TOPIC_0000054.TOPIC_0000000
 TOPIC_0000000 | TOPIC_0000070.TOPIC_0000054.TOPIC_0000000
 TOPIC_0000001 | TOPIC_0000054.TOPIC_0000001
 TOPIC_0000001 | TOPIC_0000070.TOPIC_0000054.TOPIC_0000001
 TOPIC_0000002 | TOPIC_0000054.TOPIC_0000002
 TOPIC_0000002 | TOPIC_0000070.TOPIC_0000054.TOPIC_0000002
 TOPIC_0000054 | TOPIC_0000070.TOPIC_0000054
 TOPIC_0000070 | TOPIC_0000070
(8 rows)

2.排名

现在是时候对第 1 步的输出进行排名了，我们将根据.字符串中的点 ( ) 的数量进行排名，我们可以使用

SELECT *,
          rank() OVER (PARTITION BY id
                       ORDER BY CHAR_LENGTH(PATH) - CHAR_LENGTH(REPLACE(PATH, '.', '')) DESC) rnk
   FROM path_finder
   WHERE PATH <> ''
   ORDER BY 1,
            2

结果

      id       |                   path                    | rnk 
---------------+-------------------------------------------+-----
 TOPIC_0000000 | TOPIC_0000070.TOPIC_0000054.TOPIC_0000000 |   1
 TOPIC_0000000 | TOPIC_0000054.TOPIC_0000000               |   2
 TOPIC_0000001 | TOPIC_0000070.TOPIC_0000054.TOPIC_0000001 |   1
 TOPIC_0000001 | TOPIC_0000054.TOPIC_0000001               |   2
 TOPIC_0000002 | TOPIC_0000070.TOPIC_0000054.TOPIC_0000002 |   1
 TOPIC_0000002 | TOPIC_0000054.TOPIC_0000002               |   2
 TOPIC_0000054 | TOPIC_0000070.TOPIC_0000054               |   1
 TOPIC_0000070 | TOPIC_0000070                             |   1
(8 rows)

3.最终选择只过滤ranking = 1的行

完整代码

WITH RECURSIVE path_finder AS
  (SELECT t.id,
          t.path PATH
   FROM topics t
   UNION SELECT pf.id,
                p.path || coalesce('.' || pf.id, p.id)
   FROM topics p
   INNER JOIN path_finder pf ON substring(pf.path, 0, position('.' in pf.path)) = p.id
   WHERE p.id <> p.path ),
               ranking_rows AS
  (SELECT *,
          rank() OVER (PARTITION BY id
                       ORDER BY CHAR_LENGTH(PATH) - CHAR_LENGTH(REPLACE(PATH, '.', '')) DESC) rnk
   FROM path_finder
   WHERE PATH <> ''
   ORDER BY 1,
            2)
SELECT *
FROM ranking_rows
WHERE rnk=1;

结果

      id       |                   path                    | rnk 
---------------+-------------------------------------------+-----
 TOPIC_0000000 | TOPIC_0000070.TOPIC_0000054.TOPIC_0000000 |   1
 TOPIC_0000001 | TOPIC_0000070.TOPIC_0000054.TOPIC_0000001 |   1
 TOPIC_0000002 | TOPIC_0000070.TOPIC_0000054.TOPIC_0000002 |   1
 TOPIC_0000054 | TOPIC_0000070.TOPIC_0000054               |   1
 TOPIC_0000070 | TOPIC_0000070                             |   1
(5 rows)

score 1 · Accepted Answer

我将首先将您的行拆分为complete并incomplete基于行的路径是否从 root 开始id。

with top_paths as (   -- Find the roots
  select path as top
    from topics 
   where nlevel(path) = 1
), complete as (      -- Find the complete paths
  select t.*
    from topics t
         join top_paths tp
           on tp.top = subpath(t.path, 0, 1)
), incomplete as (    -- Get the incomplete paths
  select *
    from topics t
   where id not in (select id from complete)
)

然后我会将incomplete记录重新加入complete记录并应用一些ltree手术来重写incomplete路径。

select *
  from complete
union all
select i.id, c.path||subpath(i.path, 1, 999) as path
  from incomplete i
       join complete c  -- make this a left join if you are worried about breaking the tree
         on c.id::ltree = subpath(i.path,  0, 1)
 order by path;

┌───────────────┬───────────────────────────────────────────┐
│      id       │                   path                    │
├───────────────┼───────────────────────────────────────────┤
│ TOPIC_0000070 │ TOPIC_0000070                             │
│ TOPIC_0000054 │ TOPIC_0000070.TOPIC_0000054               │
│ TOPIC_0000000 │ TOPIC_0000070.TOPIC_0000054.TOPIC_0000000 │
│ TOPIC_0000001 │ TOPIC_0000070.TOPIC_0000054.TOPIC_0000001 │
│ TOPIC_0000002 │ TOPIC_0000070.TOPIC_0000054.TOPIC_0000002 │
└───────────────┴───────────────────────────────────────────┘
(5 rows)

将此输入到update语句中，您将不需要这些complete行。

with top_paths as (
  select path as top
    from topics 
   where nlevel(path) = 1
), complete as (
  select t.*
    from topics t
         join top_paths tp
           on tp.top = subpath(t.path, 0, 1)
), incomplete as (
  select *
    from topics t
   where id not in (select id from complete)
), fixes as (
  select i.id, c.path||subpath(i.path, 1, 999) as path
    from incomplete i
         join complete c
           on c.id::ltree = subpath(i.path,  0, 1)
)
update topics
   set path = fixes.path
  from fixes
 where fixes.id = topics.id;

结果：

select * from topics order by path;

┌───────────────┬───────────────────────────────────────────┐
│      id       │                   path                    │
├───────────────┼───────────────────────────────────────────┤
│ TOPIC_0000070 │ TOPIC_0000070                             │
│ TOPIC_0000054 │ TOPIC_0000070.TOPIC_0000054               │
│ TOPIC_0000000 │ TOPIC_0000070.TOPIC_0000054.TOPIC_0000000 │
│ TOPIC_0000001 │ TOPIC_0000070.TOPIC_0000054.TOPIC_0000001 │
│ TOPIC_0000002 │ TOPIC_0000070.TOPIC_0000054.TOPIC_0000002 │
└───────────────┴───────────────────────────────────────────┘
(5 rows)

sql - 从树子路径列表中查找完整路径

2 回答 2

1.递归查询

2.排名

3.最终选择只过滤ranking = 1的行

Related

Reference