-2
SELECT TRD.PKG_ID||'_'||TRD_CONT_NBR||'_'||LEG.TRD_LEG_NBR||'_'|| TRD.TRD_ID||'_'||CF.CURR_CODE||'_'||cf.cflw_date||'_'||CF.CFLW_TYPE_CODE
 ||'_'||CF.CFLW_STATUS_CODE as Surrogate_key 
 , CF.EFF_DATE, TRD.PKG_ID, TRD_CONT_NBR, TRD.SRCE_TRD_ID
 , LEG.TRD_LEG_NBR, TRD.TRD_ID, LEG.TRD_LEG_ID
 , CF.CURR_CODE, cf.cflw_date, CF.TRD_CURR_CASH_FLOW_AMT
 , CF.INT_RATE, cf.INT_RATE, CF.CFLW_TYPE_CODE, CF.CFLW_TYPE_GRP_CODE
 , CF.CFLW_STATUS_CODE 
from edw.extv_t_dim_trd TRD
 , edw.extv_t_trade_leg LEG
 , edw.extv_fact_cash_flow CF
where TRD.SRCE_TRD_ID = CF.SRCE_TRD_ID
  and TRD.TRD_ID = CF.TRD_ID
  and CF.SRCE_TRD_ID = LEG.SRCE_TRD_ID
  and CF.TRD_LEG_ID = LEG.TRD_LEG_ID
  and TRD.SRCE_SYS_CODE = 'WSS'
  and cf.SRCE_SYS_CODE = 'WSS'
  and leg.SRCE_SYS_CODE = 'WSS'
  AND TRD.TRD_STATUS_CODE <> 'CANCELED'
  AND LEG.INSTM_TYPE_CODE NOT IN ('FX', 'FX-OPTION')
  AND TRD.TRD_ACTV_TO_DATE >= to_date('04/01/2013','mm/dd/yyyy')
  and TRd.TECH_TRD_FLAG = 'N'
  and cf.cflw_status_code = 'FINAL'
  and TRD.ACTV_FLAG = 'Y'
  and LEG.ACTV_FLAG = 'Y'
  and cf.actv_flag ='Y'

使用上面的查询,如果在 Surrogate_key 中添加 distinct,我能够找出唯一值,但我的问题是查询的总记录是 300 万。有不同的 250 万,但我想找到不间断的唯一值。500万。那么我该如何实现呢?

还有一个在某些情况下,我们在表中没有主键,所以我用它来形成 Surrogate_key 。即使那也包含一些重复的值。未来如果我需要使用什么样的方法来避免这样的问题。

谢谢,斯里尼

4

1 回答 1

0

我不明白你所有的问题,但假设你想搜索surrogate_key的重复项,那么这个脚本可能会很有用:

CREATE TABLE TEST(
  Surrogate_key VARCHAR2(100));

INSERT INTO TEST VALUES('AAAA');
INSERT INTO TEST VALUES('ACAA');
INSERT INTO TEST VALUES('AAAA');
INSERT INTO TEST VALUES('AAAB');
INSERT INTO TEST VALUES('AAAA');
INSERT INTO TEST VALUES('ACAA');

/*HERE THE QUERY*/
SELECT T.Surrogate_key, COUNT(1) AS MATCHES
FROM TEST T
GROUP BY T.Surrogate_key
HAVING COUNT(1) > 1 

你可以在这里试试这个。

(2013-07-15 编辑) 假设您的查询有效,然后试试这个(这样你可以找到重复的行):

SELECT T.Surrogate_key, COUNT(1) AS MATCHES 
FROM (
    SELECT TRD.PKG_ID||'_'||TRD_CONT_NBR||'_'||LEG.TRD_LEG_NBR||'_'|| TRD.TRD_ID||'_'||CF.CURR_CODE||'_'||cf.cflw_date||'_'||CF.CFLW_TYPE_CODE
     ||'_'||CF.CFLW_STATUS_CODE as Surrogate_key
    from edw.extv_t_dim_trd TRD
     , edw.extv_t_trade_leg LEG
     , edw.extv_fact_cash_flow CF
    where TRD.SRCE_TRD_ID = CF.SRCE_TRD_ID
      and TRD.TRD_ID = CF.TRD_ID
      and CF.SRCE_TRD_ID = LEG.SRCE_TRD_ID
      and CF.TRD_LEG_ID = LEG.TRD_LEG_ID
      and TRD.SRCE_SYS_CODE = 'WSS'
      and cf.SRCE_SYS_CODE = 'WSS'
      and leg.SRCE_SYS_CODE = 'WSS'
      AND TRD.TRD_STATUS_CODE <> 'CANCELED'
      AND LEG.INSTM_TYPE_CODE NOT IN ('FX', 'FX-OPTION')
      AND TRD.TRD_ACTV_TO_DATE >= to_date('04/01/2013','mm/dd/yyyy')
      and TRd.TECH_TRD_FLAG = 'N'
      and cf.cflw_status_code = 'FINAL'
      and TRD.ACTV_FLAG = 'Y'
      and LEG.ACTV_FLAG = 'Y'
      and cf.actv_flag ='Y'
) T
GROUP BY T.Surrogate_key
HAVING COUNT(1) > 1
于 2013-07-14T19:33:05.260 回答