2

我在这个问题上花费的时间比我愿意承认的要多。我有一个实现游标的解决方案(如下),但我想知道其他方法是否可行?对我来说,这尤其困难,因为 SQL 中没有常见的构造,例如数组。

这似乎也是使用递归的一个好问题,但我无法弄清楚。该平台是 MSSQL 2008 或 T-SQL。

考虑一个具有两列非唯一标识符和一个日期列的表。对于每个日期,我想将一列 (X) 中的标识符合并或分组到第二列 (Y) 中的标识符是唯一的集合中。

  • X 标识符是非唯一的
  • Y 标识符在每组 X 标识符中是唯一的,但总体上不唯一
  • 组合时,使用最小可能的 X 标识符
  • X 标识符不会跨越多个日期

也许最好的起点是一些样本数据。解决方案中还有一些扩展的示例数据。在我使用它的实际实现中,通常少于 200 行,通常少于 100 行。

Dt         X     Y     newX
6/1/2012   1     1     1
6/1/2012   1     2     1
6/1/2012   2     3     1
6/1/2012   3     1     3   <-- because Y=1 is already in X=1
6/1/2012   3     4     3
6/1/2012   4     5     1
6/1/2012   5     4     1   <-- Y=4 is in X=3 but not X=1
6/1/2012   5     6     1
6/1/2012   6     4     6   <-- Y=4 is in X=1 and X=3
6/1/2012   6     7     6

解决方案...

-- task: combine/condense/reassign/coalesce/collapse/consolidate sets of X identifiers into groups with unique Y identifiers, and by date
--  - X identifiers are non-unique
--  - Y identifiers are unique within each set of X identifiers, but non-unique overall
--  - When combining, the minimum possible X identifier is used
--  - An X identifier will not span more than a single date

--drop table #tmpA

CREATE TABLE #tmpA (Dt DATETIME, X INT, Y INT, newX INT, origX INT)

-- sample data
--                        Dt                              X  Y  newX
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 1, 2, 0, 1)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 1, 1, 0, 1)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 2, 5, 0, 2)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 3, 2, 0, 3)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 3, 3, 0, 3)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 4, 3, 0, 4)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 5, 5, 0, 5)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 6, 5, 0, 6)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 7, 2, 0, 7)
INSERT INTO #tmpA VALUES (CAST('2012-06-01' AS DATETIME), 7, 1, 0, 7)       -- causes a debug 4

INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 1, 2, 0, 1)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 1, 1, 0, 1)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 2, 5, 0, 2)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 3, 2, 0, 3)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 3, 3, 0, 3)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 4, 3, 0, 4)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 5, 5, 0, 5)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 6, 5, 0, 6)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 7, 7, 0, 7)
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 7, 1, 0, 7)       -- causes a debug 3 if below not used
--INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 7, 6, 0, 7)     -- causes a debug 8 if above not used
INSERT INTO #tmpA VALUES (CAST('2012-06-02' AS DATETIME), 7, 5, 0, 7)

DECLARE @X          INT
DECLARE @tX         INT     -- temporary X
DECLARE @Y          INT
DECLARE @Dt         DATETIME
DECLARE @tDt        DATETIME = CAST('1900-01-01' AS DATETIME)   -- temporary date
DECLARE @newX       INT
DECLARE @min_X      INT     -- minimum X without Y duplicate
DECLARE @min_newX   INT

DECLARE CursorA CURSOR FOR SELECT Dt, X, Y, newX FROM #tmpA
ORDER BY Dt, X
OPEN CursorA
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
SET @tX = @X        -- initialize for change in X detection

WHILE (@@FETCH_STATUS = 0)
BEGIN

-- a change in X?
IF (@tX != @X)
BEGIN
    -- change in X, update all prior X to their newX (which should all be the same)
    UPDATE #tmpA SET X = newX WHERE Dt = @tDt AND X = @tX
select 1 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
    SET @tX = @X
END

IF (@newX != 0)
BEGIN
    -- newX for this X and Y already assigned, move on
    FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
    CONTINUE
END

IF (@Dt != @tDt)
BEGIN
    -- date change
    SET @tDt = @Dt

    -- all for this first X are simply the same identifier
    UPDATE #tmpA SET newX = @X WHERE Dt = @Dt AND X = @X
select 2 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
    FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
    CONTINUE
END

-- still on same date
-- is there any duplicate Y already assigned a newX?
SELECT @min_X = MIN(X) FROM #tmpA
    WHERE Dt = @Dt AND X != @X AND Y = @Y AND newX != 0

IF @min_X IS NOT NULL
BEGIN

    -- there is a Y duplicate within this date
    -- find the earliest X which does not have a duplicate Y
    SELECT @min_newX = MIN(X) FROM #tmpA
        WHERE Dt = @Dt AND X != @X AND Y != @Y AND newX != 0
        AND X NOT IN (SELECT X FROM #tmpA
            WHERE Dt = @Dt AND X != @X AND Y = @Y AND newX != 0)

    IF @min_newX IS NOT NULL
    BEGIN

        -- is there an "earlier" X already assigned a newX?
        SELECT @min_X = MIN(newX) FROM #tmpA
            WHERE Dt = @Dt AND X = @X AND newX !=0

        IF @min_X IS NOT NULL
        BEGIN

            -- there is another X already assigned a newX
            IF @min_newX >= @min_X
            BEGIN
                -- set the other one to this one
                UPDATE #tmpA SET newX = @min_newX
                    WHERE Dt = @Dt AND X = @X AND newX = @min_X
                UPDATE #tmpA SET newX = @min_newX
                    WHERE Dt = @Dt AND X = @X AND Y = @Y
select 3 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
            END
            ELSE
            BEGIN
                UPDATE #tmpA SET newX = @min_X
                    WHERE Dt = @Dt AND X = @X AND Y = @Y
select 4 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
            END

            FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
            CONTINUE
        END

        -- 
        UPDATE #tmpA SET newX = @min_newX
            WHERE Dt = @Dt AND X = @X AND Y = @Y
select 5 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
        FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
        CONTINUE

    END

    -- no other X without a duplicate Y already assigned a newX so assign this entire X set to itself
    UPDATE #tmpA SET newX = @X WHERE Dt = @Dt AND X = @X
select 6 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
    FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
    CONTINUE
END

-- no other Y but it's possible that another newX for this X is set to something different
SELECT @min_newX = MIN(newX) FROM #tmpA
    WHERE Dt = @Dt AND X = @X AND Y != @Y AND newX != 0

-- also find the min X for this Y
SELECT @min_X = MIN(X) FROM #tmpA
    WHERE Dt = @Dt AND X != @X AND Y != @Y AND newX != 0

IF @min_newX IS NULL
BEGIN
    -- no other Y for this X is assigned so set it to the minimum X already found
    UPDATE #tmpA SET newX = @min_X
        WHERE Dt = @Dt AND X = @X AND Y = @Y
select 7 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
    FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
    CONTINUE
END

-- there is another of the same X with a newX
IF (@min_X = @min_newX OR @min_X > @min_newX OR @min_newX IS NULL)
BEGIN
    -- there is a different Y for this X which has already been assigned the same newX as this one should be
    -- or a later one was found
    UPDATE #tmpA SET newX = @min_X
        WHERE Dt = @Dt AND X = @X AND Y = @Y
select 8 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
    FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX
    CONTINUE
END

UPDATE #tmpA SET newX = @min_newX
    WHERE Dt = @Dt AND X = @X AND Y = @Y
select 9 as debug, @tX as tX, @min_X as minX, @min_newX as minR, @X as X, @Y as Y, @newX as newX
FETCH NEXT FROM CursorA INTO @Dt, @X, @Y, @newX

END

-- gotta catch the last set
UPDATE #tmpA SET X = newX WHERE Dt = @Dt AND X = @tX

SELECT * FROM #tmpA
--  ORDER BY Dt, X, Y
CLOSE CursorA
DEALLOCATE CursorA
RETURN

这是输出的样子......

Dt                      X   Y   newX    origX
2012-06-01 00:00:00.000 1   2   1   1
2012-06-01 00:00:00.000 1   1   1   1
2012-06-01 00:00:00.000 1   5   1   2
2012-06-01 00:00:00.000 3   2   3   3
2012-06-01 00:00:00.000 3   3   3   3
2012-06-01 00:00:00.000 1   3   1   4
2012-06-01 00:00:00.000 3   5   3   5
2012-06-01 00:00:00.000 6   5   6   6
2012-06-01 00:00:00.000 6   2   6   7
2012-06-01 00:00:00.000 6   1   6   7
2012-06-02 00:00:00.000 1   2   1   1
2012-06-02 00:00:00.000 1   1   1   1
2012-06-02 00:00:00.000 1   5   1   2
2012-06-02 00:00:00.000 3   2   3   3
2012-06-02 00:00:00.000 3   3   3   3
2012-06-02 00:00:00.000 1   3   1   4
2012-06-02 00:00:00.000 3   5   3   5
2012-06-02 00:00:00.000 6   5   6   6
2012-06-02 00:00:00.000 7   7   7   7
2012-06-02 00:00:00.000 7   1   7   7
2012-06-02 00:00:00.000 7   5   7   7
4

1 回答 1

0

你想要的是这样的。在插入临时表后将其粘贴

select * from 
(
select X, Y, origX, ROW_NUMBER() OVER(PARTITION BY X ORDER BY x) newX
from #tmpA
)
results 
于 2012-07-16T17:20:56.607 回答