1

我们有这个数据集:

CREATE TABLE #Changes 
    (
    [GUID] varchar(250), 
    Value numeric(36,6), 
    DocumentNumber varchar(250), 
    Approved bit, 
    ApprovedDate varchar(250), 
    IssuedDate varchar(250), 
    Category varchar(250)
    );
INSERT INTO #Changes 
    (
    [GUID], 
    DocumentNumber, 
    Approved, 
    Value, 
    ApprovedDate, 
    IssuedDate, 
    Category
    ) 
 values
 ('4F7253A4E1B3D841B84D4A82B4F0E7A2', 11, 0, 18526.7, '', '2009-03-31T05:00:00Z', 'UNKNOWN'),
 ('D97537852E927B499C21C14F3D13CF06', 1, 0, 0, '', '2008-11-10T05:00:00Z', 'UNKNOWN'),
 ('857DADB463807345918729B33399B36F', 2, 0, 0, '', '2008-11-10T05:00:00Z', 'UNKNOWN'),
 ('7989D242E05AFF4FB5EE99114822BF80', 21, 0, 50112, '', '2009-07-22T05:00:00Z', 'UNKNOWN'),
 ('16A0AB27FD3A784D9E0A14406C7683E0', 3, 0, 0, '', '2009-01-15T05:00:00Z', 'UNKNOWN'),
 ('D3D7B1C306D38C438FC3DEDFCB57D411', 131, 0, 17204, '', '2010-12-14T05:00:00Z', 'UNKNOWN'),
 ('2C89D974DDF86743A0D7D62B385FBDEF', 147, 0, 0, '', '2010-12-01T05:00:00Z', 'UNKNOWN'),
 ('F371D4237C837D448824697EB0162905', 198, 0, 0, '', '2011-01-10T05:00:00Z', 'UNKNOWN'),
  ('433D64C871AE4E46A0E1BFCE2BB69BA7', 364, 0, 0, '', '2011-11-14T05:00:00Z', 'UNKNOWN'),
  ('808496DBDE76CB4F911396BB817724F3', 352, 0, 0, '', '2011-10-17T05:00:00Z', 'UNKNOWN'),
  ('9545DEF1666B5F4D8626F19F8E9E9333', 418, 0, 10948, '', '2012-03-07T22:19:18Z', 'UNKNOWN'),
  ('244D7D89B79E0F4E91100E4ADB300656', 439, 0, 50945, '', '2012-04-27T20:33:26Z', 'UNKNOWN'),
  ('115A427BBB1D2C43BA11D9E5875FAA2C', 465, 0, 480049, '', '2012-07-20T16:17:54Z', 'UNKNOWN'),
  ('3A2271EFCC767E4CA40017E68802F10C', 478, 0, 54298, '', '2012-08-01T17:26:38Z', 'UNKNOWN'),
  ('99D0EFC5A9F1AA498DB1A4CDF294129B', 490, 0, 11500, '', '2012-09-18T14:23:13Z', 'ALTER'),
  ('38B2E3A379C5084998E6A84D496AC555', 491, 0, 26088, '', '2012-09-25T06:00:00Z', 'ALTER'),
  ('8902831C8FAD4941841EE2847656BDAF', 494, 0, -825, '', '2012-10-16T14:20:06Z', 'ALTER'),
  ('7AFDB08A002AE54A8DE7699855AEBE30', 495, 0, 221, '', '2012-10-16T14:21:27Z', 'ALTER'),
  ('38A2CCEF5F0B294AA8B8752F461D121D', 496, 0, 0, '', '2012-12-24T01:11:15Z', 'ALTER'),
  ('24CCD5CE409E674593108CBD816DBCCE', 486, 1, -825, '2012-10-01T21:42:52Z', '2012-09-17T20:42:12Z', 'ALTER'),
  ('C7458704E36C8F448C1F3A485EB08304', 485, 1, 10000, '2012-10-01T21:25:56Z', '2012-09-11T21:29:44Z', 'ALTER'),
  ('B511953AE6FB6446A63AA83C159057BE', 487, 1, 82170, '2012-10-01T21:42:51Z', '2012-09-17T20:46:41Z', 'ALTER'),
  ('EC977BC304A971439D04BB9DF4D8188A',488, 1, 15500, '2012-10-01T20:58:15Z', '2012-09-18T06:00:00Z', 'ALTER'),
  ('D9B1F0C0A8E490448697B783639E09E0', 489, 1, 11503, '2012-10-01T21:42:50Z', '2012-09-18T13:56:18Z', 'ALTER'),
  ('698BB6D65832D146A49727C717A591A1', 492, 1, 2787, '2012-10-01T21:10:06Z', '2012-09-25T15:55:02Z', 'ALTER'),
  ('155D4F2B1854B34FABCDE8CF20F1E44C', 493, 1, 12162, '2012-10-01T21:10:06Z', '2012-09-25T16:04:40Z', 'ALTER'),
  ('137C9BF2B1EFD34B8831ADA70C5F9431', 1, 1, 369543, '2011-12-08T13:41:04Z', '1899-12-30T05:00:00Z', 'DRAW'),
  ('7F29FC7114BD10468AE92A047345B5DB', 2, 1, 7258, '2011-12-08T13:41:04Z', '2011-10-20T05:00:00Z', 'DRAW'),
  ('6B66D8EAD88E6E4FA29401CD524B978A', 3, 1, 979321, '2011-12-08T13:41:04Z', '2011-11-08T05:00:00Z', 'DRAW'),
  ('7F393B712B213041A6DD211E04F6DCA6', 4, 1, 14998, '2012-04-20T15:16:21Z', '2012-04-18T21:07:07Z', 'DRAW'),
  ('2255F84E7C7DA04389765724872D6413', 5, 1, 58926, '2012-04-20T15:16:23Z', '2012-04-18T21:13:15Z', 'DRAW'),
  ('DB4A5588DEB9F34C868F7AD1CB13ACC3', 6, 1, 13232, '2012-04-20T15:16:05Z', '2012-04-18T21:17:00Z', 'DRAW'),
  ('B5231AE40F8E7D41BA0A4D09614CBDF9', 7, 1, 10176, '2012-04-20T15:16:25Z', '2012-04-18T21:19:41Z', 'DRAW'),
  ('2362D54FCC53E447AC7D8289EA89FD05', 8, 1, 17556, '2012-04-20T15:16:04Z', '2012-04-18T21:21:20Z', 'DRAW'),
  ('6ED4565CA041704B8D006EDA4A1E4CF9', 9, 1, 399639, '2012-05-30T16:32:43Z', '2012-05-17T06:00:00Z', 'DRAW'),
  ('B21BE07E3E42C2418C70AD17862D3AE1', 10, 1, 6231, '2012-08-16T16:55:00Z', '2012-08-02T16:02:03Z', 'DRAW'),
  ('8FD252A50137754A98698F93AC9B01A7', 11, 1, 629, '2012-08-16T16:54:58Z', '2012-08-02T16:07:57Z', 'DRAW'),
  ('1B9AFD2C20362F48A486E8A535B29AF5', 20, 1, -113810, '2011-12-13T17:15:53Z', '2010-02-10T05:00:00Z', 'UNKNOWN');

这是查询:

SELECT 
    a.[GUID], 
    [positive_previous_total] = SUM(CASE WHEN b.Value>0 THEN b.Value ELSE 0 END), 
    [negative_previous_total] = SUM(CASE WHEN b.Value<0 THEN b.Value ELSE 0 END) 
FROM 
    #Changes a 
    LEFT OUTER JOIN #Changes b 
        ON 
        b.[GUID]    <> a.[GUID] AND
        b.Approved  = a.Approved AND
        b.Category  = a.Category 
        AND 
        (
        ISNULL(SUBSTRING(CASE WHEN b.Approved=1 THEN b.ApprovedDate ELSE b.IssuedDate END, 1, 10), '0000-00-00') 
          < ISNULL(SUBSTRING(CASE WHEN a.Approved=1 THEN a.ApprovedDate ELSE a.IssuedDate END, 1, 10), '0000-00-00')
        OR 
            (
            ISNULL(SUBSTRING(CASE WHEN b.Approved=1 THEN b.ApprovedDate ELSE b.IssuedDate END, 1, 10), '0000-00-00')
              =ISNULL(SUBSTRING(CASE WHEN a.Approved=1 THEN a.ApprovedDate ELSE a.IssuedDate END, 1, 10), '0000-00-00') 
            AND 
            b.DocumentNumber<a.DocumentNumber
            )
        ) 
GROUP BY a.[GUID]

这个记录数量很快,但是当扩展到 700 条记录时,它需要几秒钟,我们希望将其降低到 0.5 秒。

这是一个关于 SQL FIDDLE 的架构和脚本的实例

4

4 回答 4

4

就个人而言,我会使用计算列来消除查询中的复杂性。

例如:你的表定义可能变成(注意我在这里也更正了数据类型):

CREATE TABLE #Changes 
(
    [GUID] varchar(250), 
    Value numeric(36,6), 
    DocumentNumber varchar(250), 
    Approved numeric(36,6), -- Is there any reason this is not a BIT field?
    ApprovedDate datetime,
    ApprovedDate_NoTime AS (CASE WHEN ApprovedDate IS NULL THEN CONVERT(DATETIME, '0000-00-00') ELSE DATEADD(DAY, DATEDIFF(DAY, 0, ApprovedDate), 0) END) PERSISTED
    IssuedDate datetime, 
    IssuedDate_NoTime AS (CASE WHEN IssuedDate IS NULL THEN CONVERT(DATETIME, '0000-00-00') ELSE DATEADD(DAY, DATEDIFF(DAY, 0, IssuedDate), 0) END) PERSISTED
    ApprovedOrIssuedDate AS (CASE WHEN Approved = 1 THEN ApprovedDate_NoTime ELSE IssuedDate_NoTime) PERSISTED,
    Category varchar(250)
);

然后,您可以将查询重写为:

SELECT 
    a.[GUID], 
    [positive_previous_total] = SUM(CASE WHEN b.Value>0 THEN b.Value ELSE 0 END), 
    [negative_previous_total] = SUM(CASE WHEN b.Value<0 THEN b.Value ELSE 0 END) 
FROM 
    #Changes a 
    LEFT OUTER JOIN #Changes b 
        ON 
        b.[GUID]    <> a.[GUID] AND
        b.Approved  = a.Approved AND
        b.Category  = a.Category 
        AND 
        (
            b.ApprovedOrIssuedDate < a.ApprovedOrIssuedDate
            OR 
            (
                b.ApprovedOrIssuedDate = a.ApprovedOrIssuedDate
                AND 
                b.DocumentNumber < a.DocumentNumber
            )
        ) 
GROUP BY a.[GUID]

有什么理由为什么Approved不是位字段?我已将数据类型更改为datetime并为您提供了一列以将日期的时间部分归零。此外,未经测试,但你明白了。

我也会读这个这个(假设你想在比较中忽略日期时间的时间部分)

于 2012-12-28T17:49:13.047 回答
3

好的,有几件事:

1. 永远不要日期存储为字符串。

除非这确实是您数据库中的#temporary 表(在这种情况下还有其他一些问题/问题),否则您绝对不应该将真实日期存储为字符串。这只会导致未来的问题。

在 30 多年的咨询中,我已经看到了数百个将日期存储为字符串的数据库,其中每一个数据库中都有无效的日期字符串。

2. 始终为您的表添加适当的键和索引。

这甚至适用于#Temp 表,除非它们很小或者您确定它们不会有帮助。

在您的情况下,您可能应该在 GUID 上有一个唯一/主键。为了提高性能,您应该在 {GUID, Approved, Category}(可能是集群)上有一个索引。

于 2012-12-28T17:40:33.463 回答
3

我尝试了一些东西 - 这是独家新闻:

  • 我在临时表的新主键列上添加了一个聚集索引INT IDENTITY- 是的,这似乎适得其反,但在许多情况下,实际上并非如此;它确实加快了很多事情 - 甚至插入和删除!请参阅Kimberly Tripp 的The Clustered Index Debate Continues...了解为什么会这样

  • 我还将ApprovedDateandIssuedDate设为实际DATETIME数据类型 - 而不是 varchars。如果它感觉像一个日期,看起来像一个日期,嘎嘎声像一个日期 - 那么它就是一个日期,应该这样存储!

    请参阅要改掉的坏习惯:选择错误的数据类型——您应该始终使用最合适的数据类型——毕竟这就是它们的目的!

  • 我在用作外键的列上添加了一个索引JOIN以加快查询速度

  • 我添加了一个计算列,如果它被批准,则ApprovedDateIssuedDate封装整个,使用,否则将逻辑放在一个地方 - 使查询易于阅读!由于这是一个伪DATE(时间部分归零),这基本上会处理您在查询中(重复)拥有的所有丑陋CONVERT/ISNULLSUBSTRING语句。

所以这是我创建临时表的更改脚本

CREATE TABLE #Changes 
    (
    ID INT IDENTITY(1,1) NOT NULL PRIMARY KEY CLUSTERED,
    [GUID] varchar(250), 
    Value numeric(36,6), 
    DocumentNumber varchar(250), 
    Approved numeric(36,6), 
    ApprovedDate DATETIME,
    IssuedDate DATETIME, 
    Category varchar(250),

    ApprovedOrIssuedDate AS CASE 
                               WHEN Approved = 1 
                               THEN DATEADD(DAY, 0, DATEDIFF(DAY, 0, ApprovedDate))
                               ELSE DATEADD(DAY, 0, DATEDIFF(DAY, 0, IssuedDate))
                            END PERSISTED
    );

CREATE NONCLUSTERED INDEX IX_Index01 ON #Changes([GUID], Approved, Category) 
                                     INCLUDE(DocumentNumber, ApprovedDate, IssuedDate)

然后您的查询变得更加容易:

SELECT 
    a.[GUID], 
    [positive_previous_total] = SUM(CASE WHEN b.Value > 0 THEN b.Value ELSE 0 END), 
    [negative_previous_total] = SUM(CASE WHEN b.Value < 0 THEN b.Value ELSE 0 END) 
FROM 
    #Changes a 
LEFT OUTER JOIN 
    #Changes b ON b.[GUID] <> a.[GUID] 
               AND b.Approved  = a.Approved 
               AND b.Category  = a.Category 
               AND 
                  (b.ApprovedOrIssuedDate < a.ApprovedOrISsuedDate
                   OR 
                   (b.ApprovedOrIssuedDate = a.ApprovedOrIssuedDate
                    AND b.DocumentNumber < a.DocumentNumber)
                  ) 
GROUP BY 
     a.[GUID]

在我的测量中,我的查询成本有了很好的显着改善(从 0.022 下降到大约 0.0146)

于 2012-12-28T17:43:44.670 回答
2

从根本上说,您正在尝试进行累计。在 2012 之前的 SQL Server 版本中,您必须按照您的操作方式使用连接(或相关的 subuqery,它应该具有类似的执行计划)。我将您的查询简化为:

SELECT a.[GUID], 
       [positive_previous_total] = SUM(CASE WHEN b.Value>0 THEN b.Value ELSE 0 END), 
       [negative_previous_total] = SUM(CASE WHEN b.Value<0 THEN b.Value ELSE 0 END) 
FROM #Changes a LEFT OUTER JOIN
     #Changes b 
     ON b.[GUID]    <> a.[GUID] AND
         b.Approved  = a.Approved AND
         b.Category  = a.Category and
         ((b.ApprovedDate < a.ApprovedDate and a.Approved = 1) or
          (b.IssuedDate < a.IssuedDate and a.Approved <> 1)
         ) or
         ((b.ApprovedDate = a.ApprovedDate and a.Approved = 1 and b.DocumentNumber<a.DocumentNumber) or
          (b.IssuedDate = a.IssuedDate and a.Approved <> 1 and b.DocumentNumber<a.DocumentNumber)
         ) 
        ) 
GROUP BY a.[GUID]

isNULL是不必要的。在#Change 的定义中,您应该简单地将值替换为您想要的值。另外,因为 a.Approved = b.Approved 由连接条件决定,所以不需要 case 语句。

由于您正在创建#Changes 表,因此您应该向它添加一个 DateDoc 键。这将具有日期 - 定义为 ApprovedDate 或 IssueDate 或适当的 NULL 日期 - 并在其上连接文档编号。文档编号应在左侧填充 0。日期格式应为 YYYYMMDD。

现在,您可以将 from 子句编写为:

FROM #Changes a LEFT OUTER JOIN
     #Changes b 
     ON b.[GUID]    <> a.[GUID] AND
         b.Approved  = a.Approved AND
         b.Category  = a.Category and
         b.datedoc < a.datedoc

使用这种结构,我认为 GUID、Approved、Category 和 Datedoc 上的索引将有助于查询。我不确定最后增加价值是否会有所帮助。但是,您的数据应该适合内存,因为您没有很多行。

于 2012-12-28T17:34:41.170 回答