1

我的 postgres 数据库中有一个大表(8Mio rs),我想使用相同的技术评估由其他一些列(grps)分组的多个列(超过 30 个带过滤器的 val)。我想查看每个可能组合的给定结果,这也意味着不返回任何内容的组合。有必要更好地理解数据。我尝试了几种实现方法,例如以下一种(一种具有多个循环和一个插入语句)。它可以双向工作,但性能不太好,查询变得很难阅读。如果您能给我一些改进的建议,我会非常高兴!

DROP TABLE IF EXISTS testdata;
CREATE TABLE testdata (grp1 text, grp2 text, val1 int, val2 int, val3 int, val4 int, val5 int);

INSERT INTO testdata VALUES ('A', 'X', 1,2,3,4,5);
INSERT INTO testdata VALUES ('A', 'X', 3,3,3,4,5);
INSERT INTO testdata VALUES ('A', 'X', 4,2,3,4,5);
INSERT INTO testdata VALUES ('A', 'Y', 4,2,3,4,5);
INSERT INTO testdata VALUES ('A', 'Y', 3,2,3,4,5);
INSERT INTO testdata VALUES ('B', 'X', 5,2,3,4,5);
--SELECT * FROM testdata;

SELECT g1.*, g2.*, val.*, v.small, v.medium, v.large
FROM (
    SELECT DISTINCT grp1 FROM testdata) g1
CROSS JOIN (
    SELECT DISTINCT grp2 FROM testdata) g2
CROSS JOIN (
    SELECT column_name FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'testdata' AND column_name LIKE 'val%') val

LEFT JOIN (
    SELECT grp1, grp2, 'val1'::text AS val,
        COUNT(*) FILTER (WHERE val1 < 3) as small, 
        COUNT(*) FILTER (WHERE val1 = 3) as medium, 
        COUNT(*) FILTER (WHERE val1 > 3) AS large
    FROM testdata GROUP By grp1, grp2
    UNION
    SELECT grp1, grp2, 'val2'::text AS val,
        COUNT(*) FILTER (WHERE val2 < 3) as small, 
        COUNT(*) FILTER (WHERE val2 = 3) as medium, 
        COUNT(*) FILTER (WHERE val2 > 3) AS large
    FROM testdata GROUP By grp1, grp2
    UNION
    SELECT grp1, grp2, 'val3'::text AS val,
        COUNT(*) FILTER (WHERE val3 < 3) as small, 
        COUNT(*) FILTER (WHERE val3 = 3) as medium, 
        COUNT(*) FILTER (WHERE val3 > 3) AS large
    FROM testdata GROUP By grp1, grp2
    UNION
    SELECT grp1, grp2, 'val4'::text AS val,
        COUNT(*) FILTER (WHERE val4 < 3) as small, 
        COUNT(*) FILTER (WHERE val4 = 3) as medium, 
        COUNT(*) FILTER (WHERE val4 > 3) AS large
    FROM testdata GROUP By grp1, grp2
    UNION
    SELECT grp1, grp2, 'val5'::text AS val,
        COUNT(*) FILTER (WHERE val5 < 3) as small, 
        COUNT(*) FILTER (WHERE val5 = 3) as medium, 
        COUNT(*) FILTER (WHERE val5 > 3) AS large
    FROM testdata GROUP By grp1, grp2
    -- more values
) v
  ON g1.grp1 = v.grp1 AND g2.grp2 = v.grp2 AND val.column_name = v.val
ORDER BY g1.grp1, g2.grp2;
4

0 回答 0