0

我有一个小型查找表,我想通过查找表中的几个值 UUID 值过滤一个较大的表。我一直在努力不扫描整个大表。到目前为止,如果我在 where 语句中硬编码过滤器值,我只能使用较大表中的分区子集。这似乎不对,我错过了什么?

下面的示例设置了示例数据,并提供了三个过滤较大标签值表的示例。如 partitionsAssigned 和 bytesAssigned 数字所示,仅示例 3 正在扫描部分分区。

数据设置

// Dummy reference table to map PATHUUID to a PATH, 20 rows
CREATE OR REPLACE TEMPORARY TABLE  TAGMAP (PATHUUID TEXT, PATH TEXT, PATHTYPE TEXT) ;
INSERT INTO TAGMAP SELECT '28da4f4a-4ede-5482-a6b4-614f7ae32589', 'PATH1', 'TYPE1';
INSERT INTO TAGMAP SELECT '612a7e20-7f73-57ae-a427-1bd6a06790b1', 'PATH2', 'TYPE1';
INSERT INTO TAGMAP SELECT 'e0a2ba9c-f833-5d8c-94c7-e9e5504ae59b', 'PATH3', 'TYPE1';
INSERT INTO TAGMAP SELECT 'b8e5a1eb-555b-50bc-b68a-c91201c66979', 'PATH4', 'TYPE2';
INSERT INTO TAGMAP SELECT '1ae3a8b7-02e1-53a8-a88e-cb72fce8d6a4', 'PATH5', 'TYPE2';
INSERT INTO TAGMAP SELECT '2e7105f3-3d71-5601-9366-00fa5efb43e3', 'PATH6', 'TYPE2';
INSERT INTO TAGMAP SELECT '5cdb2715-cf76-5f6b-a3c8-ce1b34d2f34c', 'PATH7', 'TYPE3';
INSERT INTO TAGMAP SELECT '521d5cd6-d324-500c-8d1b-8132fc08a781', 'PATH8', 'TYPE4';
INSERT INTO TAGMAP SELECT '2c56928f-6832-5028-b760-b6480da38463', 'PATH9', 'TYPE5';
INSERT INTO TAGMAP SELECT '6ba4b93e-1280-5dcd-afb7-656283cc74e9', 'PATH10', 'TYPE6';
INSERT INTO TAGMAP SELECT 'c5378f1f-8d5b-5e05-9caf-127d80b3ceb8', 'PATH11', 'TYPE7';
INSERT INTO TAGMAP SELECT '389a885f-61c6-55d0-9cd3-874ae1c242a5', 'PATH12', 'TYPE8';
INSERT INTO TAGMAP SELECT '71715578-fa01-5497-841a-b874ba758ffb', 'PATH13', 'TYPE9';
INSERT INTO TAGMAP SELECT '321296da-6c4a-51f0-8c2b-db5a6de400c2', 'PATH14', 'TYPE10';
INSERT INTO TAGMAP SELECT '28824be8-bace-5be5-8ff0-80135a27ffd4', 'PATH15', 'TYPE10';
INSERT INTO TAGMAP SELECT '1e37be24-3753-5418-8163-9eead501d343', 'PATH16', 'TYPE10';
INSERT INTO TAGMAP SELECT 'f4de2248-690d-574e-b1f6-412180ae5d23', 'PATH17', 'TYPE13';
INSERT INTO TAGMAP SELECT '40c04ed3-0301-5cd8-91ae-530976e426b2', 'PATH18', 'TYPE14';
INSERT INTO TAGMAP SELECT '57cbc90e-a441-5118-9d2c-aff510c770b5', 'PATH19', 'TYPE15';
INSERT INTO TAGMAP SELECT '81c01629-55f7-5266-abf0-f4ecafc4b0be', 'PATH20', 'TYPE16';

// Dummy data table with a PATHUUID to map back to TAGMAP table 200,000,000 rows
CREATE OR REPLACE TEMPORARY TABLE  TAGVALUES (PATHUUID TEXT, VALUE INT) ;
INSERT INTO TAGVALUES SELECT '28da4f4a-4ede-5482-a6b4-614f7ae32589' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '612a7e20-7f73-57ae-a427-1bd6a06790b1' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT 'e0a2ba9c-f833-5d8c-94c7-e9e5504ae59b' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT 'b8e5a1eb-555b-50bc-b68a-c91201c66979' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '1ae3a8b7-02e1-53a8-a88e-cb72fce8d6a4' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '2e7105f3-3d71-5601-9366-00fa5efb43e3' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '5cdb2715-cf76-5f6b-a3c8-ce1b34d2f34c' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '521d5cd6-d324-500c-8d1b-8132fc08a781' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '2c56928f-6832-5028-b760-b6480da38463' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '6ba4b93e-1280-5dcd-afb7-656283cc74e9' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT 'c5378f1f-8d5b-5e05-9caf-127d80b3ceb8' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '389a885f-61c6-55d0-9cd3-874ae1c242a5' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '71715578-fa01-5497-841a-b874ba758ffb' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '321296da-6c4a-51f0-8c2b-db5a6de400c2' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '28824be8-bace-5be5-8ff0-80135a27ffd4' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '1e37be24-3753-5418-8163-9eead501d343' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT 'f4de2248-690d-574e-b1f6-412180ae5d23' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '40c04ed3-0301-5cd8-91ae-530976e426b2' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '57cbc90e-a441-5118-9d2c-aff510c770b5' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '81c01629-55f7-5266-abf0-f4ecafc4b0be' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;

// For the example below we have 3 target PATHUUIDs
SELECT PATHUUID FROM TAGMAP t where t.PATHTYPE = 'TYPE10'

示例一、全表扫描

// -- Example 1, use join on PATHUUID to find count
// -- Scans entire TAGVALUES table
// -- partitionsTotal: 41
// -- partitionsAssigned: 41
// -- bytesAssigned: 295827968
EXPLAIN 
SELECT count(*)
FROM TAGVALUES v
left join TAGMAP t
on v.PATHUUID = t.PATHUUID
WHERE t.PATHTYPE = 'TYPE10'

示例2、全表扫描

// -- Example 2, use CTE join on PATHUUID to find count
// -- Scans entire TAGVALUES table
// -- partitionsTotal: 41
// -- partitionsAssigned: 41
// -- bytesAssigned: 295827968
EXPLAIN 
with PATHUUIDFilter as 
(SELECT PATHUUID FROM TAGMAP t where t.PATHTYPE = 'TYPE10')
SELECT count(*)
FROM PATHUUIDFilter f
left join TAGVALUES v
on f.PATHUUID = v.PATHUUID

示例 3,扫描了 15% 的分区

// -- Example 3, hard code PATHUUID to find count
// -- Scans 6/40 partitions in  TAGVALUES table
// -- partitionsTotal: 40
// -- partitionsAssigned: 6
// -- bytesAssigned: 44362752
EXPLAIN 
SELECT count(*)
FROM TAGVALUES v
where v.PATHUUID in (
'321296da-6c4a-51f0-8c2b-db5a6de400c2',
'28824be8-bace-5be5-8ff0-80135a27ffd4',
'1e37be24-3753-5418-8163-9eead501d343'
)

谢谢

4

2 回答 2

0

当我发布上述内容时,我发现这个问题指出连接必须是数字(38,0)才能在他的示例中工作。我已经更新了我的示例以使用 md5_number_upper64 而不是 uuid_string,并且所有三个示例都按预期工作。我需要对此进行更多研究,但它似乎可以解决我的示例问题。

更新样本工作数据设置

CREATE OR REPLACE TEMPORARY TABLE  TAGMAP (PATHUUID number (38,0), PATH TEXT, PATHTYPE TEXT) ;

TRUNCATE TABLE  TAGMAP;
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH1'), 'PATH1', 'TYPE1';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH2'), 'PATH2', 'TYPE1';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH3'), 'PATH3', 'TYPE1';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH4'), 'PATH4', 'TYPE2';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH5'), 'PATH5', 'TYPE2';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH6'), 'PATH6', 'TYPE2';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH7'), 'PATH7', 'TYPE3';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH8'), 'PATH8', 'TYPE4';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH9'), 'PATH9', 'TYPE5';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH10'), 'PATH10', 'TYPE6';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH11'), 'PATH11', 'TYPE7';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH12'), 'PATH12', 'TYPE8';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH13'), 'PATH13', 'TYPE9';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH14'), 'PATH14', 'TYPE10';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH15'), 'PATH15', 'TYPE10';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH16'), 'PATH16', 'TYPE10';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH17'), 'PATH17', 'TYPE13';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH18'), 'PATH18', 'TYPE14';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH19'), 'PATH19', 'TYPE15';
INSERT INTO TAGMAP SELECT md5_number_upper64('PATH20'), 'PATH20', 'TYPE16';


CREATE OR REPLACE TEMPORARY TABLE  TAGVALUES (PATHUUID  number (38,0), VALUE INT) ;

TRUNCATE TABLE  TAGVALUES;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH1') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH2') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH3') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH4') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH5') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH6') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH7') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH8') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH9') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH10') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH11') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH12') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH13') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH14') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH15') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH16') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH17') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH18') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH19') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT md5_number_upper64('PATH20') as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;

// three rows
SELECT PATHUUID FROM TAGMAP t where t.PATHTYPE = 'TYPE10'

示例 1

// -- Example 1, use join on PATHUUID to find count
// -- Scans entire TAGVALUES table
// -- partitionsTotal: 41
// -- partitionsAssigned: 1
// -- bytesAssigned: 2048
EXPLAIN 
SELECT count(*)
FROM TAGVALUES v
left join TAGMAP t
on v.PATHUUID = t.PATHUUID
WHERE t.PATHTYPE = 'TYPE10'

示例 2

// -- Example 2, use CTE join on PATHUUID to find count
// -- Scans entire TAGVALUES table
// -- partitionsTotal: 41
// -- partitionsAssigned: 1
// -- bytesAssigned: 2048
EXPLAIN 
with PATHUUIDFilter as 
(SELECT PATHUUID FROM TAGMAP t where t.PATHTYPE = 'TYPE10')
SELECT count(*)
FROM PATHUUIDFilter f
left join TAGVALUES v
on f.PATHUUID = v.PATHUUID

示例 3

// -- Example 3, hard code PATHUUID to find count
// -- Scans 6/40 partitions in  TAGVALUES table
// -- partitionsTotal: 40
// -- partitionsAssigned: 0
// -- bytesAssigned: 0
EXPLAIN 
SELECT count(*)
FROM TAGVALUES v
where v.PATHUUID in (
5944718881441917065,
3379648765010274384,
4532955584134004551
)
于 2021-10-05T17:00:21.917 回答
0

您的左连接告诉查询引擎将所有记录从 TAGVALUES 连接到 TAGMAP,并且 TAGVALUES 中的记录不必匹配,因此查询正在扫描整个表

在没有看到查询配置文件的情况下,我建议在此步骤之后可能会评估 where 子句。

尝试仅使用 JOIN 并可能将 where 条件添加到 join 子句。将条件添加到 join 子句可能不会产生影响,但值得测试

于 2021-10-05T16:43:30.290 回答