2

我们有一个 PL/SQL 过程,其中包含从典型事实表返回结果的基本查询。查询中 WHERE 子句所基于的维度值作为参数传入。我的问题是:使用这些参数构造查询的最佳方法是什么?

下面是一些测试代码:

SET SERVEROUTPUT ON 100000;

-- build table
DROP TABLE T_FACT;
CREATE TABLE T_FACT 
(CUBE_ID NUMBER
,THE_DATE DATE
,DIM1 NUMBER
,DIM2 NUMBER
,DIM3 NUMBER
,DIM4 NUMBER
,DIM5 NUMBER
,VALUE NUMBER)
PARTITION BY LIST (CUBE_ID)
(  
  PARTITION P1 VALUES ('1')
 ,PARTITION P2 VALUES ('2')
 ,PARTITION P3 VALUES ('3')
 ,PARTITION P4 VALUES ('4')
 ,PARTITION P5 VALUES ('5')
 ,PARTITION PDEFAULT VALUES (DEFAULT)
);

CREATE UNIQUE INDEX T_FACT_UK1 ON T_FACT
(CUBE_ID, THE_DATE, DIM1, DIM2, DIM3, DIM4, DIM5)
LOCAL (  
  PARTITION P1
 ,PARTITION P2
 ,PARTITION P3
 ,PARTITION P4
 ,PARTITION P5
 ,PARTITION PDEFAULT
);

ALTER TABLE T_FACT ADD (
  CONSTRAINT T_FACT_UK1
  UNIQUE (CUBE_ID, THE_DATE, DIM1, DIM2, DIM3, DIM4, DIM5)
  USING INDEX LOCAL);

-- add test data
TRUNCATE TABLE T_FACT;
INSERT INTO T_FACT
SELECT MOD(ROWNUM-1,5)+1 AS CUBE_ID
      ,ADD_MONTHS( TO_DATE('20010101','YYYYMMDD') , MOD(ROWNUM,48) - 1 ) AS THE_DATE
      ,MOD(TRUNC((DECODE(ROWNUM-1,0,1,ROWNUM-1)) / (5*POWER(30,4))),30)+1 AS DIM1
      ,MOD(TRUNC((DECODE(ROWNUM-1,0,1,ROWNUM-1)) / (5*POWER(30,3))),30)+1 AS DIM2
      ,MOD(TRUNC((DECODE(ROWNUM-1,0,1,ROWNUM-1)) / (5*POWER(30,2))),30)+1 AS DIM3
      ,MOD(TRUNC((DECODE(ROWNUM-1,0,1,ROWNUM-1)) / (5*30)),30)+1 AS DIM4
      ,MOD(TRUNC((DECODE(ROWNUM-1,0,1,ROWNUM-1)) / 5),30)+1 AS DIM5
      ,TRUNC(dbms_random.value(1, 10000)) AS VALUE
FROM DUAL
CONNECT BY ROWNUM <= 1000000;

COMMIT;

CREATE OR REPLACE TYPE DIM_TYPE AS TABLE OF NUMBER;
/

-- slow procedure
CREATE OR REPLACE PROCEDURE P_SLOW
(
    CubeId_in                   IN  NUMBER,
    DateStart_in                IN  DATE,
    DateEnd_in                  IN  DATE,
    Dim1_in                     IN  DIM_TYPE,
    Dim2_in                     IN  DIM_TYPE,
    Dim3_in                     IN  DIM_TYPE,
    Dim4_in                     IN  DIM_TYPE,
    Dim5_in                     IN  DIM_TYPE,
    Data_out                    OUT DIM_TYPE
)

IS
    Count1     NUMBER := Dim1_in.COUNT;
    Count2     NUMBER := Dim2_in.COUNT;
    Count3     NUMBER := Dim3_in.COUNT;
    Count4     NUMBER := Dim4_in.COUNT;
    Count5     NUMBER := Dim5_in.COUNT;

BEGIN

    SELECT  VALUE
    BULK COLLECT INTO Data_out
    FROM    T_FACT
    WHERE   CUBE_ID = CubeId_in
    AND     (THE_DATE BETWEEN DateStart_in AND DateEnd_in)
    AND     (DIM1 IN (SELECT COLUMN_VALUE FROM TABLE( Dim1_in )) OR Count1 = 0)
    AND     (DIM2 IN (SELECT COLUMN_VALUE FROM TABLE( Dim2_in )) OR Count2 = 0)
    AND     (DIM3 IN (SELECT COLUMN_VALUE FROM TABLE( Dim3_in )) OR Count3 = 0)
    AND     (DIM4 IN (SELECT COLUMN_VALUE FROM TABLE( Dim4_in )) OR Count4 = 0)
    AND     (DIM5 IN (SELECT COLUMN_VALUE FROM TABLE( Dim5_in )) OR Count5 = 0);

END P_SLOW;
/

-- fast procedure
CREATE OR REPLACE PROCEDURE P_FAST
(
    CubeId_in                   IN  NUMBER,
    DateStart_in                IN  DATE,
    DateEnd_in                  IN  DATE,
    Dim1_in                     IN  DIM_TYPE,
    Dim2_in                     IN  DIM_TYPE,
    Dim3_in                     IN  DIM_TYPE,
    Dim4_in                     IN  DIM_TYPE,
    Dim5_in                     IN  DIM_TYPE,
    Data_out                    OUT DIM_TYPE
)

IS
    Count1     NUMBER := Dim1_in.COUNT;
    Count2     NUMBER := Dim2_in.COUNT;
    Count3     NUMBER := Dim3_in.COUNT;
    Count4     NUMBER := Dim4_in.COUNT;
    Count5     NUMBER := Dim5_in.COUNT;

BEGIN

    SELECT  VALUE
    BULK COLLECT INTO Data_out
    FROM    T_FACT
    WHERE   CUBE_ID = CubeId_in
    AND     (THE_DATE BETWEEN DateStart_in AND DateEnd_in)
    AND     (DIM1 IN (SELECT COLUMN_VALUE FROM TABLE( Dim1_in )))
    AND     (DIM2 IN (SELECT COLUMN_VALUE FROM TABLE( Dim2_in )))
    AND     (DIM3 IN (SELECT COLUMN_VALUE FROM TABLE( Dim3_in )))
    AND     (DIM4 IN (SELECT COLUMN_VALUE FROM TABLE( Dim4_in )))
    AND     (DIM5 IN (SELECT COLUMN_VALUE FROM TABLE( Dim5_in )));

END P_FAST;
/

DECLARE 
    CubeId_in                     NUMBER := 2;
    DateStart_in                  DATE := TO_DATE('20010101','YYYYMMDD');
    DateEnd_in                    DATE := TO_DATE('20030101','YYYYMMDD');
    Dim1_in                       DIM_TYPE := DIM_TYPE(1,2,3,6,15,21,25);
    Dim2_in                       DIM_TYPE := DIM_TYPE(1,3,4,6,7,8,9,10);
    Dim3_in                       DIM_TYPE := DIM_TYPE(2,3,4,5,6,7,8,13,14,15);
    Dim4_in                       DIM_TYPE := DIM_TYPE(1,4,21,22,23,24,29);
    Dim5_in                       DIM_TYPE := DIM_TYPE(2,15,21);
    Data_out                      DIM_TYPE;

    timestart NUMBER;

BEGIN

    timestart:=DBMS_UTILITY.GET_TIME();

    P_FAST(CubeId_in, DateStart_in, DateEnd_in, Dim1_in, Dim2_in, Dim3_in,
     Dim4_in, Dim5_in, Data_out);
    DBMS_OUTPUT.PUT_LINE('Number of data values:'||Data_out.COUNT);

    DBMS_OUTPUT.PUT_LINE('Fast proc:' || TO_CHAR(DBMS_UTILITY.GET_TIME()-timestart));

    timestart:=DBMS_UTILITY.GET_TIME();

    P_SLOW(CubeId_in, DateStart_in, DateEnd_in, Dim1_in, Dim2_in, Dim3_in,
     Dim4_in, Dim5_in, Data_out);
    DBMS_OUTPUT.PUT_LINE('Number of data values:'||Data_out.COUNT);

    DBMS_OUTPUT.PUT_LINE('Slow proc:' || TO_CHAR(DBMS_UTILITY.GET_TIME()-timestart));

END;
/

anonymous block completed
Elapsed: 00:00:00.567
Number of data values:642
Fast proc:22
Number of data values:642
Slow proc:32

我在过程 P_SLOW 中有谓词“Countx = 0”的原因是因为该过程的要求是不必指定所有维度值。例如,调用者可以要求传入 Dim1_in 中的值,但将其他维度参数保留为空,这意味着获取 Dim1 in (x,y) 和其他维度值可以是任何值的所有行。

然而,P_SLOW 中查询的问题在于它很慢——你可以从执行时间看出。另一方面,P_FAST 很快,唯一的区别是它的查询中没有“OR Countx = 0”谓词。事实证明,添加任何“或”都会减慢速度。

请参阅下面的执行计划:

> EXPLAIN PLAN FOR
    SELECT  VALUE
    FROM    T_FACT
    WHERE   CUBE_ID = 3
    AND     (THE_DATE BETWEEN TO_DATE('20010101','YYYYMMDD') AND TO_DATE('20030101','YYYYMMDD'))
    AND     (DIM1 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )) OR :COUNT1 = 0)
    AND     (DIM2 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )) OR :COUNT2 = 0)
    AND     (DIM3 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )) OR :COUNT3 = 0)
    AND     (DIM4 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )) OR :COUNT4 = 0)
    AND     (DIM5 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1) )) OR :COUNT5 = 0)
plan FOR succeeded.
> SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY)
PLAN_TABLE_OUTPUT              
--------------------------------------------------------------------------------------------------------------------------
Plan hash value: 1947951911    

---------------------------------------------------------------------------------------------------------------------    
| Id  | Operation                              | Name       | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |    
---------------------------------------------------------------------------------------------------------------------    
|   0 | SELECT STATEMENT                       |            |     1 |   100 |   291   (0)| 00:00:04 |       |       |    
|*  1 |  FILTER                                |            |       |       |            |          |       |       |    
|   2 |   PARTITION LIST SINGLE                |            |  5934 |   579K|   291   (0)| 00:00:04 |   KEY |   KEY |    
|   3 |    TABLE ACCESS BY LOCAL INDEX ROWID   | T_FACT     |  5934 |   579K|   291   (0)| 00:00:04 |     3 |     3 |    
|*  4 |     INDEX RANGE SCAN                   | T_FACT_UK1 |  5934 |       |   290   (0)| 00:00:04 |     3 |     3 |    
|*  5 |   COLLECTION ITERATOR CONSTRUCTOR FETCH|            |       |       |            |          |       |       |    
|*  6 |   COLLECTION ITERATOR CONSTRUCTOR FETCH|            |       |       |            |          |       |       |    
|*  7 |   COLLECTION ITERATOR CONSTRUCTOR FETCH|            |       |       |            |          |       |       |    
|*  8 |   COLLECTION ITERATOR CONSTRUCTOR FETCH|            |       |       |            |          |       |       |    
|*  9 |   COLLECTION ITERATOR CONSTRUCTOR FETCH|            |       |       |            |          |       |       |    
---------------------------------------------------------------------------------------------------------------------    

Predicate Information (identified by operation id):   
---------------------------------------------------   

   1 - filter((TO_NUMBER(:COUNT1)=0 OR  EXISTS (SELECT 0 FROM TABLE() "KOKBF$" WHERE VALUE(KOKBF$)=:B1)) AND             
              (TO_NUMBER(:COUNT2)=0 OR  EXISTS (SELECT 0 FROM TABLE() "KOKBF$" WHERE VALUE(KOKBF$)=:B2)) AND             
              (TO_NUMBER(:COUNT3)=0 OR  EXISTS (SELECT 0 FROM TABLE() "KOKBF$" WHERE VALUE(KOKBF$)=:B3)) AND             
              (TO_NUMBER(:COUNT4)=0 OR  EXISTS (SELECT 0 FROM TABLE() "KOKBF$" WHERE VALUE(KOKBF$)=:B4)) AND             
              (TO_NUMBER(:COUNT5)=0 OR  EXISTS (SELECT 0 FROM TABLE() "KOKBF$" WHERE VALUE(KOKBF$)=:B5)))                
   4 - access("CUBE_ID"=3 AND "THE_DATE">=TO_DATE(' 2001-01-01 00:00:00', 'syyyy-mm-dd hh24:mi:ss') AND                  
              "THE_DATE"<=TO_DATE(' 2003-01-01 00:00:00', 'syyyy-mm-dd hh24:mi:ss'))      
   5 - filter(VALUE(KOKBF$)=:B1)      
   6 - filter(VALUE(KOKBF$)=:B1)      
   7 - filter(VALUE(KOKBF$)=:B1)      
   8 - filter(VALUE(KOKBF$)=:B1)      
   9 - filter(VALUE(KOKBF$)=:B1)      

Note     
-----    
   - dynamic sampling used for this statement

 36 rows selected 

> EXPLAIN PLAN FOR
    SELECT  VALUE
    FROM    T_FACT
    WHERE   CUBE_ID = 3
    AND     (THE_DATE BETWEEN TO_DATE('20010101','YYYYMMDD') AND TO_DATE('20030101','YYYYMMDD'))
    AND     (DIM1 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )))
    AND     (DIM2 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )))
    AND     (DIM3 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )))
    AND     (DIM4 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1,2,3) )))
    AND     (DIM5 IN (SELECT COLUMN_VALUE FROM TABLE( DIM_TYPE(1) )))
plan FOR succeeded.
> SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY)
PLAN_TABLE_OUTPUT              
--------------------------------------------------------------------------------------------------------------------------

Plan hash value: 3872369897    

-------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                                  | Name       | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |
-------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                           |            |     1 |   110 |   440   (1)| 00:00:06 |       |       |
|*  1 |  HASH JOIN SEMI                            |            |     1 |   110 |   440   (1)| 00:00:06 |       |       |
|*  2 |   HASH JOIN SEMI                           |            |     1 |   108 |   410   (1)| 00:00:05 |       |       |
|*  3 |    HASH JOIN SEMI                          |            |     1 |   106 |   381   (1)| 00:00:05 |       |       |
|*  4 |     HASH JOIN SEMI                         |            |     1 |   104 |   351   (1)| 00:00:05 |       |       |
|*  5 |      HASH JOIN RIGHT SEMI                  |            |    59 |  6018 |   321   (1)| 00:00:04 |       |       |
|   6 |       COLLECTION ITERATOR CONSTRUCTOR FETCH|            |       |       |            |          |       |       |
|   7 |       PARTITION LIST SINGLE                |            |  5934 |   579K|   291   (0)| 00:00:04 |   KEY |   KEY |
|   8 |        TABLE ACCESS BY LOCAL INDEX ROWID   | T_FACT     |  5934 |   579K|   291   (0)| 00:00:04 |     3 |     3 |
|*  9 |         INDEX RANGE SCAN                   | T_FACT_UK1 |  5934 |       |   290   (0)| 00:00:04 |     3 |     3 |
|  10 |      COLLECTION ITERATOR CONSTRUCTOR FETCH |            |       |       |            |          |       |       |
|  11 |     COLLECTION ITERATOR CONSTRUCTOR FETCH  |            |       |       |            |          |       |       |
|  12 |    COLLECTION ITERATOR CONSTRUCTOR FETCH   |            |       |       |            |          |       |       |
|  13 |   COLLECTION ITERATOR CONSTRUCTOR FETCH    |            |       |       |            |          |       |       |
-------------------------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):   
---------------------------------------------------   

   1 - access("DIM1"=VALUE(KOKBF$))   
   2 - access("DIM2"=VALUE(KOKBF$))   
   3 - access("DIM3"=VALUE(KOKBF$))   
   4 - access("DIM4"=VALUE(KOKBF$))   
   5 - access("DIM5"=VALUE(KOKBF$))   
   9 - access("CUBE_ID"=3 AND "THE_DATE">=TO_DATE(' 2001-01-01 00:00:00', 'syyyy-mm-dd hh24:mi:ss') AND                  
              "THE_DATE"<=TO_DATE(' 2003-01-01 00:00:00', 'syyyy-mm-dd hh24:mi:ss'))      

Note     
-----    
   - dynamic sampling used for this statement

 35 rows selected 

现在,我不能使用 in-list - 也就是说,我们不能只将参数 Dim1_in 等转换为 in-lists 并使用动态 SQL,因为我们可能有数千个维度值。我可以使用临时表并将数组复制到那里,但在我们的实际情况中,我们有 15 个维度,性能可能会受到影响。我们知道的另一个选项是构建动态 SQL,例如:

SELECT VALUE
FROM    T_FACT
WHERE   CUBE_ID = 3
AND     (THE_DATE BETWEEN TO_DATE('20010101','YYYYMMDD') AND TO_DATE('20030101','YYYYMMDD'))
AND     (DIM1 IN (SELECT COLUMN_VALUE FROM TABLE( Dim1_in )))
AND     (DIM2 IN (SELECT COLUMN_VALUE FROM TABLE( Dim2_in )))
AND     (1=1 OR DIM3 IN (SELECT COLUMN_VALUE FROM TABLE( Dim3_in )))
AND     (1=1 OR DIM4 IN (SELECT COLUMN_VALUE FROM TABLE( Dim4_in )))
AND     (1=1 OR DIM5 IN (SELECT COLUMN_VALUE FROM TABLE( Dim5_in )))

OPEN mycursor  
FOR mysql  
USING Dim1_in, Dim2_in, Dim3_in, Dim4_in, Dim5_in;  

当我知道 Dim3_in、Dim4_in、Dim5_in 为 NULL 时。

您可以提出的任何其他选择将不胜感激。

4

2 回答 2

0

问题是你不应该这样做。事实表与不同维度/维度属性之间的关系给出不同的报告。

1) 你不想有一个主 SELECT 语句来管理事实表。

2)收集一天的数据很容易,一个月可能没问题。一年或整个历史呢?

最好的方法是每个需求都有一条 SQL 语句。即使看起来相似,也可以有许多 SQL 语句。将结果写入适当的聚合表并从那里开始工作。

即:可以有一个包含聚合数据的表,然后是基于它的其他聚合。

于 2013-08-15T16:09:57.280 回答
0

到目前为止,我发现的最佳答案或至少是讨论是:

http://asktom.oracle.com/pls/asktom/f?p=100:11:0::::P11_QUESTION_ID:1669972300346534908

于 2013-08-19T11:33:14.997 回答