sql - 同比 (YOY) 不同计数

Question

编辑：

我在 Sql Server 2005 中工作，我正在尝试获取当前财政年度（比如 6 月 1 日至 5 月 30 日）和过去 3 年不同用户的年同比（YOY）计数。我可以通过四次运行 select 语句来做我需要的事情，但目前我似乎找不到更好的方法。我可以在一个查询中获得每年的不同计数，但我需要它来累积不同的计数。以下是我目前所拥有的模型：

选择 [年份]，计数（不同的用户 ID）
从
(
  选择 u.uID 作为用户 ID，
      案子
        当 dd.ddEnd 在 @yearOneStart 和 @yearOneEnd 之间，然后是 'Year1'
        当 dd.ddEnd 在 @yearTwoStart 和 @yearTwoEnd 之间时，则为“Year2”
        当 dd.ddEnd 在 @yearThreeStart 和 @yearThreeEnd 之间时，则为“Year3”
        当 dd.ddEnd 在 @yearFourStart 和 @yearFourEnd 之间时 'Year4'
        ELSE“其他”
      结束为 [年份]
  来自用户作为你   
  INNER JOIN UserDataIDMatch AS udim
    ON u.uID = udim.udim_FK_uID
  INNER JOIN DataDump AS dd
    ON udim.udimUserSystemID = dd.ddSystemID
) 作为数据
WHERE LOWER([Year]) '其他'
通过...分组
  [年]

我得到类似的东西：

第 1 年 1
第二年 1
第 3 年 1
第 4 年 1

但我真的需要：

第 1 年 1
第二年 2
第 3 年 3
第 4 年 4

下面是一个粗略的模式和一组值（为简单起见进行了更新）。我试图创建一个 SQL Fiddle，但是当我尝试构建架构时出现磁盘空间错误。

创建表用户
(
  uID int 身份主键，
  uFirstName varchar(75),
  uLastName varchar(75)
);
 
插入用户（uFirstName、uLastName）
价值观
（'用户1'，'用户1'），
（“用户 2”，“用户 2”）
（“用户 3”，“用户 3”）
（“用户 4”，“用户 4”）；

创建表用户数据IDMatch
(
  udimID int indentity 主键，
  udim.udim_FK_uID int 外键引用用户（uID），
  udimUserSystemID varchar(75)
);

插入到 UserDataIDMatch (udim_FK_uID, udimUserSystemID)
价值观
(1, 'SystemID1'),
(2, 'SystemID2'),
(3, 'SystemID3'),
(4, 'SystemID4');

创建表数据转储
(
  ddID int 身份主键，
  ddSystemID varchar(75),
  ddEnd 日期时间
);

插入数据转储（ddSystemID，ddEnd）
价值观
('SystemID1', '10-01-2013'),
('SystemID2', '10-01-2014'),
('SystemID3', '10-01-2015'),
('SystemID4', '10-01-2016');

score 1 · Accepted Answer

除非我遗漏了什么，否则您只想知道有多少记录的日期小于或等于当前会计年度。

DECLARE @YearOneStart DATETIME, @YearOneEnd DATETIME,
        @YearTwoStart DATETIME, @YearTwoEnd DATETIME,
        @YearThreeStart DATETIME, @YearThreeEnd DATETIME,
        @YearFourStart DATETIME, @YearFourEnd DATETIME

SELECT @YearOneStart = '06/01/2013', @YearOneEnd = '05/31/2014',
        @YearTwoStart = '06/01/2014', @YearTwoEnd = '05/31/2015',
        @YearThreeStart = '06/01/2015', @YearThreeEnd = '05/31/2016',
        @YearFourStart = '06/01/2016', @YearFourEnd = '05/31/2017'

;WITH cte AS 
(
      SELECT u.uID AS UserID, 
      CASE 
        WHEN dd.ddEnd BETWEEN @yearOneStart AND @yearOneEnd THEN 'Year1'
        WHEN dd.ddEnd BETWEEN @yearTwoStart AND @yearTwoEnd THEN 'Year2'
        WHEN dd.ddEnd BETWEEN @yearThreeStart AND @yearThreeEnd THEN 'Year3'
        WHEN dd.ddEnd BETWEEN @yearFourStart AND @yearFourEnd THEN 'Year4'
        ELSE 'Other'
      END AS [Year]
      FROM Users AS u   
      INNER JOIN UserDataIDMatch AS udim
        ON u.uID = udim.udim_FK_uID
      INNER JOIN DataDump AS dd
        ON udim.udimUserSystemID = dd.ddSystemID
)

SELECT 
    DISTINCT [Year],
    (SELECT COUNT(*) FROM cte cteInner WHERE cteInner.[Year] <= cteMain.[Year] )
FROM cte cteMain

score 1 · Accepted Answer

使用现有查询的概念

我做了类似的事情来找出在几年之间购买东西的不同客户的数量，我对其进行了修改以使用您的年份概念，您添加的变量将是一年中的开始日期和开始月份以及开始年份和年底。

从技术上讲，有一种方法可以避免使用循环，但这很清楚，你不能超过 9999 年，所以不要觉得编写巧妙的代码来避免循环是有意义的

加快查询速度的技巧

此外，当匹配日期时，请确保您正在比较日期，而不是比较列的函数评估，因为这意味着在每个记录集上运行该函数，并且如果索引存在于日期上（它们应该存在），则它们将变得无用。使用日期加零来启动您的目标日期，从年份中减去 1900，从月份中减去 1，从目标日期中减去 1。

然后在日期创建有效范围（即 yearlessthan 到 yearmorethan）的表上进行自联接，并使用子查询根据该范围创建总和。由于您希望从第一年到最后一年的累积限制结果从第一年开始。

最后，您将丢失第一年，因为根据我们的定义，它不符合范围的条件，要解决此问题，只需在您创建的临时表上执行 union all 以添加丢失的年份和其中不同值的数量。

DECLARE @yearStartMonth INT = 6, @yearStartDay INT = 1
DECLARE @yearStart INT = 2008, @yearEnd INT  = 2012

DECLARE @firstYearStart DATE = 
        DATEADD(day,@yearStartDay-1, 
           DATEADD(month, @yearStartMonth-1,
             DATEADD(year, @yearStart- 1900,0)))

DECLARE @lastYearEnd DATE = 
        DATEADD(day, @yearStartDay-2,
            DATEADD(month, @yearStartMonth-1,
               DATEADD(year, @yearEnd -1900,0)))    

DECLARE @firstdayofcurrentyear DATE = @firstYearStart 
DECLARE @lastdayofcurrentyear DATE = DATEADD(day,-1,DATEADD(year,1,@firstdayofcurrentyear))
DECLARE @yearnumber INT = YEAR(@firstdayofcurrentyear)

DECLARE @tempTableYearBounds TABLE
(
    startDate DATE NOT NULL,
    endDate DATE NOT NULL,
    YearNumber INT NOT NULL
)

WHILE @firstdayofcurrentyear < @lastYearEnd
BEGIN
INSERT INTO @tempTableYearBounds 
VALUES(@firstdayofcurrentyear,@lastdayofcurrentyear,@yearNumber)
SET @firstdayofcurrentyear = DATEADD(year,1,@firstdayofcurrentyear)
SET @lastdayofcurrentyear = DATEADD(year,1,@lastdayofcurrentyear)
SET @yearNumber = @yearNumber + 1
END



DECLARE @tempTableCustomerCount TABLE
(
    [Year] INT NOT NULL,
    [CustomerCount] INT NOT NULL
)


INSERT INTO @tempTableCustomerCount
SELECT 
YearNumber as [Year],
COUNT(DISTINCT CustomerNumber) as CutomerCount
FROM Ticket
     JOIN @tempTableYearBounds ON
          TicketDate >= startDate AND TicketDate <=endDate
GROUP BY YearNumber


SELECT * FROM(
SELECT t2.Year as [Year], 
(SELECT 
    SUM(CustomerCount) 
    FROM @tempTableCustomerCount 
    WHERE Year>=t1.Year 
         AND Year <=t2.Year) AS CustomerCount
FROM @tempTableCustomerCount t1 JOIN @tempTableCustomerCount t2
    ON t1.Year < t2.Year
WHERE t1.Year = @yearStart
UNION
SELECT [Year], [CustomerCount] 
FROM @tempTableCustomerCount
WHERE [YEAR] = @yearStart
) tt
ORDER BY tt.Year

它效率不高，但最后您正在处理的临时表是如此之小，我认为这并不重要，并且与您使用的方法相比，它增加了更多的多功能性。

更新：我用我的数据集更新了查询以反映您想要的结果，我基本上是在测试这是否更快，它快了 10 秒，但我正在处理的数据集相对较小。（从 12 秒到 2 秒）。

使用您的数据

我将您提供的表更改为临时表，因此它不会影响我的环境，并且我删除了外键，因为临时表不支持它们，逻辑与包含的示例相同，但只是针对您的数据集进行了更改。

DECLARE @startYear INT = 2013, @endYear INT = 2016
DECLARE @yearStartMonth INT = 10 , @yearStartDay INT = 1
DECLARE @startDate DATETIME = DATEADD(day,@yearStartDay-1,
                                 DATEADD(month, @yearStartMonth-1,
                                     DATEADD(year,@startYear-1900,0)))
DECLARE @endDate DATETIME = DATEADD(day,@yearStartDay-1,
                                DATEADD(month,@yearStartMonth-1,
                                    DATEADD(year,@endYear-1899,0)))

DECLARE @tempDateRangeTable TABLE
(
    [Year] INT NOT NULL,
    StartDate DATETIME NOT NULL, 
    EndDate DATETIME NOT NULL
)

DECLARE @currentDate DATETIME = @startDate
WHILE @currentDate < @endDate
BEGIN
DECLARE @nextDate DATETIME  = DATEADD(YEAR, 1, @currentDate)
INSERT INTO @tempDateRangeTable(Year,StartDate,EndDate)
VALUES(YEAR(@currentDate),@currentDate,@nextDate)
SET @currentDate = @nextDate
END

CREATE TABLE Users
(
  uID int identity primary key, 
  uFirstName varchar(75), 
  uLastName varchar(75)
);

INSERT INTO Users (uFirstName, uLastName)
VALUES
('User1', 'User1'),
('User2', 'User2'),
('User3', 'User3'),
('User4', 'User4');

CREATE TABLE UserDataIDMatch
(
  udimID int indentity primary key,
  udim.udim_FK_uID int foreign key references Users(uID),
  udimUserSystemID varchar(75)
);

INSERT INTO UserDataIDMatch (udim_FK_uID, udimUserSystemID)
VALUES
(1, 'SystemID1'),
(2, 'SystemID2'),
(3, 'SystemID3'),
(4, 'SystemID4');

CREATE TABLE DataDump
(
  ddID int identity primary key,
  ddSystemID varchar(75),
  ddEnd datetime
);   


INSERT INTO DataDump (ddSystemID, ddEnd)
VALUES
('SystemID1', '10-01-2013'),
('SystemID2', '10-01-2014'),
('SystemID3', '10-01-2015'),
('SystemID4', '10-01-2016');


DECLARE @tempIndividCount TABLE
(
    [Year] INT NOT NULL,
    UserCount INT NOT NULL
)


-- no longer need to filter out other because you are using an
--inclusion statement rather than an exclusion one, this will 
--also make your query faster (when using real tables not temp ones)
INSERT INTO @tempIndividCount(Year,UserCount)
SELECT tdr.Year, COUNT(DISTINCT UId) FROM
    Users u JOIN UserDataIDMatch um
       ON um.udim_FK_uID  = u.uID
    JOIN DataDump dd ON
       um.udimUserSystemID = dd.ddSystemID
    JOIN @tempDateRangeTable tdr ON
        dd.ddEnd >= tdr.StartDate AND dd.ddEnd < tdr.EndDate
GROUP BY tdr.Year

-- will show you your result
SELECT * FROM @tempIndividCount

--add any ranges that did not have an entry but were in your range
--can easily remove this by taking this part out.
INSERT INTO @tempIndividCount
SELECT t1.Year,0 FROM
    @tempDateRangeTable t1 LEFT OUTER JOIN @tempIndividCount t2
        ON  t1.Year = t2.Year
    WHERE t2.Year IS NULL


SELECT YearNumber,UserCount FROM (
SELECT 'Year'+CAST(((t2.Year-t1.Year)+1) AS CHAR) [YearNumber] ,t2.Year,(
    SELECT SUM(UserCount) 
    FROM @tempIndividCount 
    WHERE Year >= t1.Year AND Year <=t2.Year 
    ) AS UserCount 
FROM @tempIndividCount t1 
    JOIN @tempIndividCount t2
        ON t1.Year < t2.Year
WHERE t1.Year = @startYear
UNION ALL 
--add the missing first year, union it to include the value
SELECT 'Year1',Year, UserCount FROM @tempIndividCount
WHERE Year = @startYear) tt
ORDER BY tt.Year

使用基于 WHEN CASE 的方法的好处

更强大

不需要明确地确定每一年的结束和开始日期，就像在一个逻辑年中只需要知道开始和结束日期。可以通过一些简单的修改轻松更改您正在寻找的内容（即说您想要所有 2 年范围或 3 年）。

如果数据库被正确索引会更快

由于您基于相同的数据类型进行搜索，因此您可以利用应在数据库中的日期列上创建的索引。

缺点

更复杂

查询要复杂得多，尽管它更健壮，但实际查询中有很多额外的逻辑。

在某些情况下不会很好地提高执行时间

如果数据集非常小，或者被比较的日期数量并不重要，那么这将无法节省足够的时间来值得。

score 0 · Accepted Answer

一旦在aSQL Server中匹配 a WHEN，CASE它就会停止评估，不会继续评估下一个WHEN子句。因此你不能这样积累。

如果我理解正确，这将显示您的结果。

;WITH cte AS 
(F
    SELECT dd.ddEnd [dateEnd], u.uID AS UserID
    FROM Users AS u     
    INNER JOIN UserDataIDMatch AS udim
        ON u.uID = udim.udim_FK_uID
    INNER JOIN DataDump AS dd
        ON udim.udimUserSystemID = dd.ddSystemID
    WHERE ddEnd BETWEEN @FiscalYearStart AND @FiscalYearEnd3
) 
SELECT datepart(year, @FiscalYearStart) AS [Year], COUNT(DISTINCT UserID) AS CntUserID
FROM cte
WHERE dateEnd BETWEEN  @FiscalYearStart AND @FiscalYearEnd1
GROUP BY @FiscalYearStart
UNION 
SELECT datepart(year, @FiscalYearEnd1) AS [Year], COUNT(DISTINCT UserID) AS CntUserID
FROM cte
WHERE dateEnd BETWEEN  @FiscalYearStart AND @FiscalYearEnd2
GROUP BY @FiscalYearEnd1
UNION 
SELECT datepart(year, @FiscalYearEnd3) AS [Year], COUNT(DISTINCT UserID) AS CntUserID
FROM cte
WHERE dateEnd BETWEEN  @FiscalYearStart AND @FiscalYearEnd3
GROUP BY @FiscalYearEnd2