我正在尝试为我在 T-SQL 中的工作实现类型 2 SCD。我正在遵循此链接中给出的绝妙方法:https ://www.mssqltips.com/sqlservertip/2883/using-the-sql-server-merge-statement-to-process-type-2-slowly-changing-方面/
但是,在我的情况下,表中没有内置 BinaryCheckSum 列。我需要即时计算相同的值。我将在下面发布我的代码(创建了一些示例数据)以实现此实现。
有人可以让我知道这种方法是否有效吗?还是我需要在列本身中有 DimensionCheckSum?另外,就我而言,我不需要代理 ID。(请原谅任何语法错误)
/*
--CREATING A SAMPLE SCD TABLE WITH NO DATA
CREATE TABLE SCD_ACCOUNT
(
SCD_ACCOUNT_ID INT IDENTITY(1,1) NOT NULL,
SCD_SRC_ID INT NULL,
SCD_SF_SRC_ID INT NULL,
SCD_SF_ATTR1 VARCHAR(100) NULL,
SCD_SF_ATTR2 VARCHAR(100) NULL,
SCD_SF_ATTR3 VARCHAR(100) NULL,
SCD_SF_ATTR4 VARCHAR(100) NULL,
SCD_SF_ATTR5 VARCHAR(100) NULL,
SCD_EFF_DATE DATETIME NOT NULL CONSTRAINT DF_SCD_EFF_DATE DEFAULT GETDATE(),
SCD_END_DATE DATETIME NOT NULL CONSTRAINT DF_SCD_END_DATE DEFAULT '12/31/9999',
SCD_IS_ACTIVE VARCHAR(10) NULL CONSTRAINT DF_SCD_IS_ACTIVE DEFAULT 'Y',
SCD_LAST_UPD_DATE DATETIME NOT NULL CONSTRAINT DF_SCD_LAST_UPD_DATE DEFAULT GETDATE(),
SCD_LAST_UPD_BY VARCHAR(100) NOT NULL CONSTRAINT DF_SCD_LAST_UPD_BY DEFAULT SUSER_SNAME());
SELECT * FROM SCD_ACCOUNT;
--CREATING A SOURCE TABLE WHICH WILL INSERT/UPDATE DATA INTO SCD TABLE
--THIS WILL HAVE THE SAME COLUMNS AS ABOVE TABLE EXCEPT SCD_ACCOUNT_ID, SCD_SRC_ID, SCD_IS_ACTIVE, SCD_EFF_DATE, SCD_END_DATE
CREATE TABLE SF_SRC
(
SF_SRC_ID INT NULL,
SF_ATTR1 VARCHAR(100) NULL,
SF_ATTR2 VARCHAR(100) NULL,
SF_ATTR3 VARCHAR(100) NULL,
SF_ATTR4 VARCHAR(100) NULL,
SF_ATTR5 VARCHAR(100) NULL);
--INSERTING DATA INTO SF_SRC TABLE
INSERT INTO SF_SRC VALUES(100,'Row1Attr1','Row1Attr2','Row1Attr3','Row1Attr4','Row1Attr5');
INSERT INTO SF_SRC VALUES(200,'Row2Attr1','Row2Attr2','Row2Attr3','Row2Attr4','Row2Attr5');
INSERT INTO SF_SRC VALUES(300,'Row3Attr1','Row3Attr2','Row3Attr3','Row3Attr4','Row3Attr5');
SELECT * FROM SF_SRC;
--DROP TABLE SCD_ACCOUNT;
--DROP TABLE SF_SRC;
*/
-- BEGIN OF INSERT USING MERGE
INSERT INTO dbo.SCD_ACCOUNT
( --TABLE AND COLUMNS IN WHICH TO INSERT THE DATA
SCD_SF_SRC_ID,
SCD_SF_ATTR1,
SCD_SF_ATTR2,
SCD_SF_ATTR3,
SCD_SF_ATTR4,
SCD_SF_ATTR5,
SCD_EFF_DATE,
SCD_END_DATE,
BINARY_CHECKSUM(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SCD_SF_ATTR5) AS SCD_DIMENSION_CHECKSUM
)
-- SELECT THE ROWS/COLUMNS TO INSERT THAT ARE OUTPUT FROM THIS MERGE STATEMENT
-- IN THIS EXAMPLE, THE ROWS TO BE INSERTED ARE THE ROWS THAT HAVE CHANGED (UPDATE).
SELECT
SCD_SF_SRC_ID,
SCD_SF_ATTR1,
SCD_SF_ATTR2,
SCD_SF_ATTR3,
SCD_SF_ATTR4,
SCD_SF_ATTR5,
SCD_EFF_DATE,
SCD_END_DATE,
BINARY_CHECKSUM(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SCD_SF_ATTR5) AS SCD_DIMENSION_CHECKSUM
FROM
(
-- THIS IS THE BEGINNING OF THE MERGE STATEMENT.
-- THE TARGET MUST BE DEFINED, IN THIS EXAMPLE IT IS OUR SLOWLY CHANGING
-- DIMENSION TABLE
MERGE INTO dbo.SCD_ACCOUNT AS target
-- THE SOURCE MUST BE DEFINED WITH THE USING CLAUSE
USING
(
-- THE SOURCE IS MADE UP OF THE ATTRIBUTE COLUMNS FROM THE STAGING TABLE.
SELECT
SF_SRC_ID,
SF_ATTR1,
SF_ATTR2,
SF_ATTR3,
SF_ATTR4
SF_ATTR5,
BINARY_CHECKSUM(SF_SRC_ID, SF_ATTR1, SF_ATTR2, SF_ATTR3, SF_ATTR4, SF_ATTR5) AS SRC_DIMENSION_CHECKSUM
FROM dbo.SF_SRC
) AS SOURCE
(
SF_SRC_ID,
SF_ATTR1,
SF_ATTR2,
SF_ATTR3,
SF_ATTR4,
SF_ATTR5,
BINARY_CHECKSUM(SF_SRC_ID, SF_ATTR1, SF_ATTR2, SF_ATTR3, SF_ATTR4, SF_ATTR5) AS SRC_DIMENSION_CHECKSUM
) ON --WE ARE MATCHING ON THE SOURCESYSTEMID IN THE TARGET TABLE AND THE SOURCE TABLE.
(
TARGET.SCD_SF_SRC_ID = SOURCE.SF_SRC_ID
)
-- IF THE ID'S MATCH BUT THE CHECKSUMS ARE DIFFERENT, THEN THE RECORD HAS CHANGED;
-- THEREFORE, UPDATE THE EXISTING RECORD IN THE TARGET, END DATING THE RECORD
-- AND SET THE CURRENTRECORD FLAG TO N
WHEN MATCHED AND TARGET.BINARY_CHECKSUM(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SF_ATTR5)
<> SOURCE.BINARY_CHECKSUM(SF_SRC_ID, SF_ATTR1, SF_ATTR2, SF_ATTR3, SF_ATTR4, SF_ATTR5)
AND TARGET.SCD_IS_ACTIVE = 'Y'
THEN
UPDATE SET
SCD_END_DATE = GETDATE()-1,
SCD_IS_ACTIVE = 'N',
SCD_LAST_UPD_DATE = GETDATE(),
SCD_LAST_UPD_BY = SUSER_SNAME()
-- IF THE ID'S DO NOT MATCH, THEN THE RECORD IS NEW;
-- THEREFORE, INSERT THE NEW RECORD INTO THE TARGET USING THE VALUES FROM THE SOURCE.
WHEN NOT MATCHED THEN
INSERT
(
SCD_SF_SRC_ID,
SCD_SF_ATTR1,
SCD_SF_ATTR2,
SCD_SF_ATTR3,
SCD_SF_ATTR4,
SCD_SF_ATTR5
)
VALUES
(
SOURCE.SF_SRC_ID,
SOURCE.SF_ATTR1,
SOURCE.SF_ATTR2,
SOURCE.SF_ATTR3,
SOURCE.SF_ATTR4,
SOURCE.SF_ATTR5
)
OUTPUT $ACTION,
SOURCE.SF_SRC_ID,
SOURCE.SF_ATTR1,
SOURCE.SF_ATTR2,
SOURCE.SF_ATTR3,
SOURCE.SF_ATTR4,
SOURCE.SF_ATTR5,
GETDATE(),
'12/31/9999'
) -- THE END OF THE MERGE STATEMENT
--THE CHANGES OUTPUT BELOW ARE THE RECORDS THAT HAVE CHANGED AND WILL NEED
--TO BE INSERTED INTO THE SLOWLY CHANGING DIMENSION.
AS CHANGES
(
ACTION,
SCD_SF_SRC_ID,
SCD_SF_ATTR1,
SCD_SF_ATTR2,
SCD_SF_ATTR3,
SCD_SF_ATTR4,
SCD_SF_ATTR5,
SCD_EFF_DATE,
SCD_END_DATE
)
WHERE ACTION='UPDATE';