我有一个小型查找表,我想通过查找表中的几个值 UUID 值过滤一个较大的表。我一直在努力不扫描整个大表。到目前为止,如果我在 where 语句中硬编码过滤器值,我只能使用较大表中的分区子集。这似乎不对,我错过了什么?
下面的示例设置了示例数据,并提供了三个过滤较大标签值表的示例。如 partitionsAssigned 和 bytesAssigned 数字所示,仅示例 3 正在扫描部分分区。
数据设置
// Dummy reference table to map PATHUUID to a PATH, 20 rows
CREATE OR REPLACE TEMPORARY TABLE TAGMAP (PATHUUID TEXT, PATH TEXT, PATHTYPE TEXT) ;
INSERT INTO TAGMAP SELECT '28da4f4a-4ede-5482-a6b4-614f7ae32589', 'PATH1', 'TYPE1';
INSERT INTO TAGMAP SELECT '612a7e20-7f73-57ae-a427-1bd6a06790b1', 'PATH2', 'TYPE1';
INSERT INTO TAGMAP SELECT 'e0a2ba9c-f833-5d8c-94c7-e9e5504ae59b', 'PATH3', 'TYPE1';
INSERT INTO TAGMAP SELECT 'b8e5a1eb-555b-50bc-b68a-c91201c66979', 'PATH4', 'TYPE2';
INSERT INTO TAGMAP SELECT '1ae3a8b7-02e1-53a8-a88e-cb72fce8d6a4', 'PATH5', 'TYPE2';
INSERT INTO TAGMAP SELECT '2e7105f3-3d71-5601-9366-00fa5efb43e3', 'PATH6', 'TYPE2';
INSERT INTO TAGMAP SELECT '5cdb2715-cf76-5f6b-a3c8-ce1b34d2f34c', 'PATH7', 'TYPE3';
INSERT INTO TAGMAP SELECT '521d5cd6-d324-500c-8d1b-8132fc08a781', 'PATH8', 'TYPE4';
INSERT INTO TAGMAP SELECT '2c56928f-6832-5028-b760-b6480da38463', 'PATH9', 'TYPE5';
INSERT INTO TAGMAP SELECT '6ba4b93e-1280-5dcd-afb7-656283cc74e9', 'PATH10', 'TYPE6';
INSERT INTO TAGMAP SELECT 'c5378f1f-8d5b-5e05-9caf-127d80b3ceb8', 'PATH11', 'TYPE7';
INSERT INTO TAGMAP SELECT '389a885f-61c6-55d0-9cd3-874ae1c242a5', 'PATH12', 'TYPE8';
INSERT INTO TAGMAP SELECT '71715578-fa01-5497-841a-b874ba758ffb', 'PATH13', 'TYPE9';
INSERT INTO TAGMAP SELECT '321296da-6c4a-51f0-8c2b-db5a6de400c2', 'PATH14', 'TYPE10';
INSERT INTO TAGMAP SELECT '28824be8-bace-5be5-8ff0-80135a27ffd4', 'PATH15', 'TYPE10';
INSERT INTO TAGMAP SELECT '1e37be24-3753-5418-8163-9eead501d343', 'PATH16', 'TYPE10';
INSERT INTO TAGMAP SELECT 'f4de2248-690d-574e-b1f6-412180ae5d23', 'PATH17', 'TYPE13';
INSERT INTO TAGMAP SELECT '40c04ed3-0301-5cd8-91ae-530976e426b2', 'PATH18', 'TYPE14';
INSERT INTO TAGMAP SELECT '57cbc90e-a441-5118-9d2c-aff510c770b5', 'PATH19', 'TYPE15';
INSERT INTO TAGMAP SELECT '81c01629-55f7-5266-abf0-f4ecafc4b0be', 'PATH20', 'TYPE16';
// Dummy data table with a PATHUUID to map back to TAGMAP table 200,000,000 rows
CREATE OR REPLACE TEMPORARY TABLE TAGVALUES (PATHUUID TEXT, VALUE INT) ;
INSERT INTO TAGVALUES SELECT '28da4f4a-4ede-5482-a6b4-614f7ae32589' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '612a7e20-7f73-57ae-a427-1bd6a06790b1' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT 'e0a2ba9c-f833-5d8c-94c7-e9e5504ae59b' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT 'b8e5a1eb-555b-50bc-b68a-c91201c66979' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '1ae3a8b7-02e1-53a8-a88e-cb72fce8d6a4' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '2e7105f3-3d71-5601-9366-00fa5efb43e3' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '5cdb2715-cf76-5f6b-a3c8-ce1b34d2f34c' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '521d5cd6-d324-500c-8d1b-8132fc08a781' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '2c56928f-6832-5028-b760-b6480da38463' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '6ba4b93e-1280-5dcd-afb7-656283cc74e9' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT 'c5378f1f-8d5b-5e05-9caf-127d80b3ceb8' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '389a885f-61c6-55d0-9cd3-874ae1c242a5' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '71715578-fa01-5497-841a-b874ba758ffb' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '321296da-6c4a-51f0-8c2b-db5a6de400c2' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '28824be8-bace-5be5-8ff0-80135a27ffd4' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '1e37be24-3753-5418-8163-9eead501d343' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT 'f4de2248-690d-574e-b1f6-412180ae5d23' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '40c04ed3-0301-5cd8-91ae-530976e426b2' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '57cbc90e-a441-5118-9d2c-aff510c770b5' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
INSERT INTO TAGVALUES SELECT '81c01629-55f7-5266-abf0-f4ecafc4b0be' as PATHUUID, uniform(1, 9999, random(12)) from table(generator(rowcount => 10000000)) v order by 2;
// For the example below we have 3 target PATHUUIDs
SELECT PATHUUID FROM TAGMAP t where t.PATHTYPE = 'TYPE10'
示例一、全表扫描
// -- Example 1, use join on PATHUUID to find count
// -- Scans entire TAGVALUES table
// -- partitionsTotal: 41
// -- partitionsAssigned: 41
// -- bytesAssigned: 295827968
EXPLAIN
SELECT count(*)
FROM TAGVALUES v
left join TAGMAP t
on v.PATHUUID = t.PATHUUID
WHERE t.PATHTYPE = 'TYPE10'
示例2、全表扫描
// -- Example 2, use CTE join on PATHUUID to find count
// -- Scans entire TAGVALUES table
// -- partitionsTotal: 41
// -- partitionsAssigned: 41
// -- bytesAssigned: 295827968
EXPLAIN
with PATHUUIDFilter as
(SELECT PATHUUID FROM TAGMAP t where t.PATHTYPE = 'TYPE10')
SELECT count(*)
FROM PATHUUIDFilter f
left join TAGVALUES v
on f.PATHUUID = v.PATHUUID
示例 3,扫描了 15% 的分区
// -- Example 3, hard code PATHUUID to find count
// -- Scans 6/40 partitions in TAGVALUES table
// -- partitionsTotal: 40
// -- partitionsAssigned: 6
// -- bytesAssigned: 44362752
EXPLAIN
SELECT count(*)
FROM TAGVALUES v
where v.PATHUUID in (
'321296da-6c4a-51f0-8c2b-db5a6de400c2',
'28824be8-bace-5be5-8ff0-80135a27ffd4',
'1e37be24-3753-5418-8163-9eead501d343'
)
谢谢