我们有一个 PL/SQL 过程,其中包含从典型事实表返回结果的基本查询。查询中 WHERE 子句所基于的维度值作为参数传入。我的问题是:使用这些参数构造查询的最佳方法是什么?
下面是一些测试代码:
SET SERVEROUTPUT ON 100000;
-- build table
DROP TABLE T_FACT;
CREATE TABLE T_FACT
(CUBE_ID NUMBER
,THE_DATE DATE
,DIM1 NUMBER
,DIM2 NUMBER
,DIM3 NUMBER
,DIM4 NUMBER
,DIM5 NUMBER
,VALUE NUMBER)
PARTITION BY LIST (CUBE_ID)
(
PARTITION P1 VALUES ('1')
,PARTITION P2 VALUES ('2')
,PARTITION P3 VALUES ('3')
,PARTITION P4 VALUES ('4')
,PARTITION P5 VALUES ('5')
,PARTITION PDEFAULT VALUES (DEFAULT)
);
CREATE UNIQUE INDEX T_FACT_UK1 ON T_FACT
(CUBE_ID, THE_DATE, DIM1, DIM2, DIM3, DIM4, DIM5)
LOCAL (
PARTITION P1
,PARTITION P2
,PARTITION P3
,PARTITION P4
,PARTITION P5
,PARTITION PDEFAULT
);
ALTER TABLE T_FACT ADD (
CONSTRAINT T_FACT_UK1
UNIQUE (CUBE_ID, THE_DATE, DIM1, DIM2, DIM3, DIM4, DIM5)
USING INDEX LOCAL);
-- add test data
TRUNCATE TABLE T_FACT;
INSERT INTO T_FACT
SELECT MOD(ROWNUM-1,5)+1 AS CUBE_ID
,ADD_MONTHS( TO_DATE('20010101','YYYYMMDD') , MOD(ROWNUM,48) - 1 ) AS THE_DATE
,MOD(TRUNC((DECODE(ROWNUM-1,0,1,ROWNUM-1)) / (5*POWER(30,4))),30)+1 AS DIM1
,MOD(TRUNC((DECODE(ROWNUM-1,0,1,ROWNUM-1)) / (5*POWER(30,3))),30)+1 AS DIM2
,MOD(TRUNC((DECODE(ROWNUM-1,0,1,ROWNUM-1)) / (5*POWER(30,2))),30)+1 AS DIM3
,MOD(TRUNC((DECODE(ROWNUM-1,0,1,ROWNUM-1)) / (5*30)),30)+1 AS DIM4
,MOD(TRUNC((DECODE(ROWNUM-1,0,1,ROWNUM-1)) / 5),30)+1 AS DIM5
,TRUNC(dbms_random.value(1, 10000)) AS VALUE
FROM DUAL
CONNECT BY ROWNUM <= 1000000;
COMMIT;
CREATE OR REPLACE TYPE DIM_TYPE AS TABLE OF NUMBER;
/
-- slow procedure
CREATE OR REPLACE PROCEDURE P_SLOW
(
CubeId_in IN NUMBER,
DateStart_in IN DATE,
DateEnd_in IN DATE,
Dim1_in IN DIM_TYPE,
Dim2_in IN DIM_TYPE,
Dim3_in IN DIM_TYPE,
Dim4_in IN DIM_TYPE,
Dim5_in IN DIM_TYPE,
Data_out OUT DIM_TYPE
)
IS
Count1 NUMBER := Dim1_in.COUNT;
Count2 NUMBER := Dim2_in.COUNT;
Count3 NUMBER := Dim3_in.COUNT;
Count4 NUMBER := Dim4_in.COUNT;
Count5 NUMBER := Dim5_in.COUNT;
BEGIN
SELECT VALUE
BULK COLLECT INTO Data_out
FROM T_FACT
WHERE CUBE_ID = CubeId_in
AND (THE_DATE BETWEEN DateStart_in AND DateEnd_in)
AND (DIM1 IN (SELECT COLUMN_VALUE FROM TABLE( Dim1_in )) OR Count1 = 0)
AND (DIM2 IN (SELECT COLUMN_VALUE FROM TABLE( Dim2_in )) OR Count2 = 0)
AND (DIM3 IN (SELECT COLUMN_VALUE FROM TABLE( Dim3_in )) OR Count3 = 0)
AND (DIM4 IN (SELECT COLUMN_VALUE FROM TABLE( Dim4_in )) OR Count4 = 0)
AND (DIM5 IN (SELECT COLUMN_VALUE FROM TABLE( Dim5_in )) OR Count5 = 0);
END P_SLOW;
/
-- fast procedure
CREATE OR REPLACE PROCEDURE P_FAST
(
CubeId_in IN NUMBER,
DateStart_in IN DATE,
DateEnd_in IN DATE,
Dim1_in IN DIM_TYPE,
Dim2_in IN DIM_TYPE,
Dim3_in IN DIM_TYPE,
Dim4_in IN DIM_TYPE,
Dim5_in IN DIM_TYPE,
Data_out OUT DIM_TYPE
)
IS
Count1 NUMBER := Dim1_in.COUNT;
Count2 NUMBER := Dim2_in.COUNT;
Count3 NUMBER := Dim3_in.COUNT;
Count4 NUMBER := Dim4_in.COUNT;
Count5 NUMBER := Dim5_in.COUNT;
BEGIN
SELECT VALUE
BULK COLLECT INTO Data_out
FROM T_FACT
WHERE CUBE_ID = CubeId_in
AND (THE_DATE BETWEEN DateStart_in AND DateEnd_in)
AND (DIM1 IN (SELECT COLUMN_VALUE FROM TABLE( Dim1_in )))
AND (DIM2 IN (SELECT COLUMN_VALUE FROM TABLE( Dim2_in )))
AND (DIM3 IN (SELECT COLUMN_VALUE FROM TABLE( Dim3_in )))
AND (DIM4 IN (SELECT COLUMN_VALUE FROM TABLE( Dim4_in )))
AND (DIM5 IN (SELECT COLUMN_VALUE FROM TABLE( Dim5_in )));
END P_FAST;
/
DECLARE
CubeId_in NUMBER := 2;
DateStart_in DATE := TO_DATE('20010101','YYYYMMDD');
DateEnd_in DATE := TO_DATE('20030101','YYYYMMDD');
Dim1_in DIM_TYPE := DIM_TYPE(1,2,3,6,15,21,25);
Dim2_in DIM_TYPE := DIM_TYPE(1,3,4,6,7,8,9,10);
Dim3_in DIM_TYPE := DIM_TYPE(2,3,4,5,6,7,8,13,14,15);
Dim4_in DIM_TYPE := DIM_TYPE(1,4,21,22,23,24,29);
Dim5_in DIM_TYPE := DIM_TYPE(2,15,21);
Data_out DIM_TYPE;
timestart NUMBER;
BEGIN
timestart:=DBMS_UTILITY.GET_TIME();
P_FAST(CubeId_in, DateStart_in, DateEnd_in, Dim1_in, Dim2_in, Dim3_in,
Dim4_in, Dim5_in, Data_out);
DBMS_OUTPUT.PUT_LINE('Number of data values:'||Data_out.COUNT);
DBMS_OUTPUT.PUT_LINE('Fast proc:' || TO_CHAR(DBMS_UTILITY.GET_TIME()-timestart));
timestart:=DBMS_UTILITY.GET_TIME();
P_SLOW(CubeId_in, DateStart_in, DateEnd_in, Dim1_in, Dim2_in, Dim3_in,
Dim4_in, Dim5_in, Data_out);
DBMS_OUTPUT.PUT_LINE('Number of data values:'||Data_out.COUNT);
DBMS_OUTPUT.PUT_LINE('Slow proc:' || TO_CHAR(DBMS_UTILITY.GET_TIME()-timestart));
END;
/
anonymous block completed
Elapsed: 00:00:00.567
Number of data values:642
Fast proc:22
Number of data values:642
Slow proc:32
我在过程 P_SLOW 中有谓词“Countx = 0”的原因是因为该过程的要求是不必指定所有维度值。例如,调用者可以要求传入 Dim1_in 中的值,但将其他维度参数保留为空,这意味着获取 Dim1 in (x,y) 和其他维度值可以是任何值的所有行。
然而,P_SLOW 中查询的问题在于它很慢——你可以从执行时间看出。另一方面,P_FAST 很快,唯一的区别是它的查询中没有“OR Countx = 0”谓词。事实证明,添加任何“或”都会减慢速度。
请参阅下面的执行计划:
> EXPLAIN PLAN FOR
SELECT VALUE
FROM T_FACT
WHERE CUBE_ID = 3
AND (THE_DATE BETWEEN TO_DATE('20010101','YYYYMMDD') AND TO_DATE('20030101','YYYYMMDD'))
AND (DIM1 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )) OR :COUNT1 = 0)
AND (DIM2 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )) OR :COUNT2 = 0)
AND (DIM3 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )) OR :COUNT3 = 0)
AND (DIM4 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )) OR :COUNT4 = 0)
AND (DIM5 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1) )) OR :COUNT5 = 0)
plan FOR succeeded.
> SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY)
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------------------------------------------
Plan hash value: 1947951911
---------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop |
---------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 100 | 291 (0)| 00:00:04 | | |
|* 1 | FILTER | | | | | | | |
| 2 | PARTITION LIST SINGLE | | 5934 | 579K| 291 (0)| 00:00:04 | KEY | KEY |
| 3 | TABLE ACCESS BY LOCAL INDEX ROWID | T_FACT | 5934 | 579K| 291 (0)| 00:00:04 | 3 | 3 |
|* 4 | INDEX RANGE SCAN | T_FACT_UK1 | 5934 | | 290 (0)| 00:00:04 | 3 | 3 |
|* 5 | COLLECTION ITERATOR CONSTRUCTOR FETCH| | | | | | | |
|* 6 | COLLECTION ITERATOR CONSTRUCTOR FETCH| | | | | | | |
|* 7 | COLLECTION ITERATOR CONSTRUCTOR FETCH| | | | | | | |
|* 8 | COLLECTION ITERATOR CONSTRUCTOR FETCH| | | | | | | |
|* 9 | COLLECTION ITERATOR CONSTRUCTOR FETCH| | | | | | | |
---------------------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter((TO_NUMBER(:COUNT1)=0 OR EXISTS (SELECT 0 FROM TABLE() "KOKBF$" WHERE VALUE(KOKBF$)=:B1)) AND
(TO_NUMBER(:COUNT2)=0 OR EXISTS (SELECT 0 FROM TABLE() "KOKBF$" WHERE VALUE(KOKBF$)=:B2)) AND
(TO_NUMBER(:COUNT3)=0 OR EXISTS (SELECT 0 FROM TABLE() "KOKBF$" WHERE VALUE(KOKBF$)=:B3)) AND
(TO_NUMBER(:COUNT4)=0 OR EXISTS (SELECT 0 FROM TABLE() "KOKBF$" WHERE VALUE(KOKBF$)=:B4)) AND
(TO_NUMBER(:COUNT5)=0 OR EXISTS (SELECT 0 FROM TABLE() "KOKBF$" WHERE VALUE(KOKBF$)=:B5)))
4 - access("CUBE_ID"=3 AND "THE_DATE">=TO_DATE(' 2001-01-01 00:00:00', 'syyyy-mm-dd hh24:mi:ss') AND
"THE_DATE"<=TO_DATE(' 2003-01-01 00:00:00', 'syyyy-mm-dd hh24:mi:ss'))
5 - filter(VALUE(KOKBF$)=:B1)
6 - filter(VALUE(KOKBF$)=:B1)
7 - filter(VALUE(KOKBF$)=:B1)
8 - filter(VALUE(KOKBF$)=:B1)
9 - filter(VALUE(KOKBF$)=:B1)
Note
-----
- dynamic sampling used for this statement
36 rows selected
> EXPLAIN PLAN FOR
SELECT VALUE
FROM T_FACT
WHERE CUBE_ID = 3
AND (THE_DATE BETWEEN TO_DATE('20010101','YYYYMMDD') AND TO_DATE('20030101','YYYYMMDD'))
AND (DIM1 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )))
AND (DIM2 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )))
AND (DIM3 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )))
AND (DIM4 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )))
AND (DIM5 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1) )))
plan FOR succeeded.
> SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY)
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------------------------------------------
Plan hash value: 3872369897
-------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop |
-------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 110 | 440 (1)| 00:00:06 | | |
|* 1 | HASH JOIN SEMI | | 1 | 110 | 440 (1)| 00:00:06 | | |
|* 2 | HASH JOIN SEMI | | 1 | 108 | 410 (1)| 00:00:05 | | |
|* 3 | HASH JOIN SEMI | | 1 | 106 | 381 (1)| 00:00:05 | | |
|* 4 | HASH JOIN SEMI | | 1 | 104 | 351 (1)| 00:00:05 | | |
|* 5 | HASH JOIN RIGHT SEMI | | 59 | 6018 | 321 (1)| 00:00:04 | | |
| 6 | COLLECTION ITERATOR CONSTRUCTOR FETCH| | | | | | | |
| 7 | PARTITION LIST SINGLE | | 5934 | 579K| 291 (0)| 00:00:04 | KEY | KEY |
| 8 | TABLE ACCESS BY LOCAL INDEX ROWID | T_FACT | 5934 | 579K| 291 (0)| 00:00:04 | 3 | 3 |
|* 9 | INDEX RANGE SCAN | T_FACT_UK1 | 5934 | | 290 (0)| 00:00:04 | 3 | 3 |
| 10 | COLLECTION ITERATOR CONSTRUCTOR FETCH | | | | | | | |
| 11 | COLLECTION ITERATOR CONSTRUCTOR FETCH | | | | | | | |
| 12 | COLLECTION ITERATOR CONSTRUCTOR FETCH | | | | | | | |
| 13 | COLLECTION ITERATOR CONSTRUCTOR FETCH | | | | | | | |
-------------------------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - access("DIM1"=VALUE(KOKBF$))
2 - access("DIM2"=VALUE(KOKBF$))
3 - access("DIM3"=VALUE(KOKBF$))
4 - access("DIM4"=VALUE(KOKBF$))
5 - access("DIM5"=VALUE(KOKBF$))
9 - access("CUBE_ID"=3 AND "THE_DATE">=TO_DATE(' 2001-01-01 00:00:00', 'syyyy-mm-dd hh24:mi:ss') AND
"THE_DATE"<=TO_DATE(' 2003-01-01 00:00:00', 'syyyy-mm-dd hh24:mi:ss'))
Note
-----
- dynamic sampling used for this statement
35 rows selected
现在,我不能使用 in-list - 也就是说,我们不能只将参数 Dim1_in 等转换为 in-lists 并使用动态 SQL,因为我们可能有数千个维度值。我可以使用临时表并将数组复制到那里,但在我们的实际情况中,我们有 15 个维度,性能可能会受到影响。我们知道的另一个选项是构建动态 SQL,例如:
SELECT VALUE
FROM T_FACT
WHERE CUBE_ID = 3
AND (THE_DATE BETWEEN TO_DATE('20010101','YYYYMMDD') AND TO_DATE('20030101','YYYYMMDD'))
AND (DIM1 IN (SELECT COLUMN_VALUE FROM TABLE( Dim1_in )))
AND (DIM2 IN (SELECT COLUMN_VALUE FROM TABLE( Dim2_in )))
AND (1=1 OR DIM3 IN (SELECT COLUMN_VALUE FROM TABLE( Dim3_in )))
AND (1=1 OR DIM4 IN (SELECT COLUMN_VALUE FROM TABLE( Dim4_in )))
AND (1=1 OR DIM5 IN (SELECT COLUMN_VALUE FROM TABLE( Dim5_in )))
OPEN mycursor
FOR mysql
USING Dim1_in, Dim2_in, Dim3_in, Dim4_in, Dim5_in;
当我知道 Dim3_in、Dim4_in、Dim5_in 为 NULL 时。
您可以提出的任何其他选择将不胜感激。