1

我们需要每晚在 SQL Server 2008R2 上做一些报告。计算报告需要几个小时。为了缩短我们预先计算表格的时间。该表是基于 JOINining 12 个相当大(数千万行)的表创建的。

直到几天前,这个聚合表的计算才花了大约 4 个小时。我们的 DBA 将这个大连接拆分为 3 个较小的连接(每个连接 4 个表)。临时结果每次都保存到临时表中,用于下一次连接。

DBA 增强的结果是,聚合表在 15 分钟内计算完毕。我想知道这怎么可能。DBA 告诉我这是因为服务器必须处理的数据数量较少。换句话说,在原始的大连接中,服务器必须处理比汇总的较小连接更多的数据。但是,我认为优化器会使用原始大连接有效地完成它,自行拆分连接并仅发送下一个连接所需的列数。

他所做的另一件事是他在其中一个临时表上创建了一个索引。但是,我再次认为优化器将在需要时创建适当的哈希表,从而更好地优化计算。

我与我们的 DBA 讨论过这个问题,但他自己不确定是什么导致了处理时间的改进。他刚刚提到,他不会责怪服务器,因为计算如此大的数据可能会让人不知所措,并且优化器可能很难预测最佳执行计划......。我明白这一点,但我想有更多明确的答案来说明原因。

所以,问题是:

1.“什么可能导致大的进步?”

2.“将大连接拆分成小连接是标准程序吗?”

3. “在多个较小连接的情况下,服务器必须处理的数据量真的更小吗?”

这是原始查询:

    Insert Into FinalResult_Base
SELECT       
    TC.TestCampaignContainerId,
    TC.CategoryId As TestCampaignCategoryId,
    TC.Grade,
    TC.TestCampaignId,    
    T.TestSetId
    ,TL.TestId
    ,TSK.CategoryId
    ,TT.[TestletId]
    ,TL.SectionNo
    ,TL.Difficulty
    ,TestletName = Char(65+TL.SectionNo) + CONVERT(varchar(4),6 - TL.Difficulty) 
    ,TQ.[QuestionId]
    ,TS.StudentId
    ,TS.ClassId
    ,RA.SubjectId
    ,TQ.[QuestionPoints] 
    ,GoodAnswer  = Case When TQ.[QuestionPoints] Is null Then 0
                      When TQ.[QuestionPoints] > 0 Then 1 
                      Else 0 End
    ,WrongAnswer = Case When TQ.[QuestionPoints] = 0 Then 1 
                      When TQ.[QuestionPoints] Is null Then 1
                     Else 0 End
    ,NoAnswer    = Case When TQ.[QuestionPoints] Is null Then 1 Else 0 End
    ,TS.Redizo
    ,TT.ViewCount
    ,TT.SpentTime
    ,TQ.[Position]  
    ,RA.SpecialNeeds        
    ,[Version] = 1 
    ,TestAdaptationId = TA.Id
    ,TaskId = TSK.TaskId
    ,TaskPosition = TT.Position
    ,QuestionRate = Q.Rate
    ,TestQuestionId = TQ.Guid
    ,AnswerType = TT.TestletAnswerTypeId
FROM 
    [TestQuestion] TQ WITH (NOLOCK)
    Join [TestTask] TT WITH (NOLOCK)            On TT.Guid = TQ.TestTaskId
    Join [Question] Q WITH (NOLOCK)         On TQ.QuestionId =  Q.QuestionId
    Join [Testlet] TL WITH (NOLOCK)         On TT.TestletId  = TL.Guid 
    Join [Test]     T WITH (NOLOCK)         On TL.TestId     =  T.Guid
    Join [TestSet] TS WITH (NOLOCK)         On T.TestSetId   = TS.Guid 
    Join [RoleAssignment] RA WITH (NOLOCK)  On TS.StudentId  = RA.PersonId And RA.RoleId = 1
    Join [Task] TSK WITH (NOLOCK)       On TSK.TaskId = TT.TaskId
    Join [Category] C WITH (NOLOCK)     On C.CategoryId = TSK.CategoryId
    Join [TimeWindow] TW WITH (NOLOCK)      On TW.Id = TS.TimeWindowId 
    Join [TestAdaptation] TA WITH (NOLOCK)  On TA.Id = TW.TestAdaptationId
    Join [TestCampaign] TC WITH (NOLOCK)        On TC.TestCampaignId = TA.TestCampaignId 
WHERE
    T.TestTypeId = 1    -- eliminuji ankety 
    And t.ProcessedOn is not null -- ne vsechny, jen dokoncene
    And TL.ShownOn is not null
    And TS.Redizo not in (999999999, 111111119)
END;

DBA 出色工作后的新拆分联接:

    SELECT       
    TC.TestCampaignContainerId,
    TC.CategoryId As TestCampaignCategoryId,
    TC.Grade,
    TC.TestCampaignId,    
    T.TestSetId
    ,TL.TestId
    ,TL.SectionNo
    ,TL.Difficulty
    ,TestletName = Char(65+TL.SectionNo) + CONVERT(varchar(4),6 - TL.Difficulty) -- prevod na A5, B4, B5 ...
    ,TS.StudentId
    ,TS.ClassId
    ,TS.Redizo
    ,[Version] = 1 -- ? 
    ,TestAdaptationId = TA.Id
    ,TL.Guid AS TLGuid
    ,TS.TimeWindowId
INTO
    [#FinalResult_Base_1]
FROM 
    [TestSet] [TS] WITH (NOLOCK)
    JOIN [Test] [T] WITH (NOLOCK) 
        ON [T].[TestSetId] = [TS].[Guid] AND [TS].[Redizo] NOT IN (999999999, 111111119) AND [T].[TestTypeId] = 1 AND [T].[ProcessedOn] IS NOT NULL
    JOIN [Testlet] [TL] WITH (NOLOCK)
        ON [TL].[TestId] = [T].[Guid] AND [TL].[ShownOn] IS NOT NULL
    JOIN [TimeWindow] [TW] WITH (NOLOCK)
        ON [TW].[Id] = [TS].[TimeWindowId] AND [TW].[IsActive] = 1
    JOIN [TestAdaptation] [TA] WITH (NOLOCK)
        ON [TA].[Id] = [TW].[TestAdaptationId] AND [TA].[IsActive] = 1
    JOIN [TestCampaign] [TC] WITH (NOLOCK)
        ON [TC].[TestCampaignId] = [TA].[TestCampaignId] AND [TC].[IsActive] = 1
    JOIN [TestCampaignContainer] [TCC] WITH (NOLOCK)
        ON [TCC].[TestCampaignContainerId] = [TC].[TestCampaignContainerId] AND [TCC].[IsActive] = 1
    ;

 SELECT       
    FR1.TestCampaignContainerId,
    FR1.TestCampaignCategoryId,
    FR1.Grade,
    FR1.TestCampaignId,    
    FR1.TestSetId
    ,FR1.TestId
    ,TSK.CategoryId AS [TaskCategoryId]
    ,TT.[TestletId]
    ,FR1.SectionNo
    ,FR1.Difficulty
    ,TestletName = Char(65+FR1.SectionNo) + CONVERT(varchar(4),6 - FR1.Difficulty) -- prevod na A5, B4, B5 ...
    ,FR1.StudentId
    ,FR1.ClassId
    ,FR1.Redizo
    ,TT.ViewCount
    ,TT.SpentTime
    ,[Version] = 1 -- ? 
    ,FR1.TestAdaptationId
    ,TaskId = TSK.TaskId
    ,TaskPosition = TT.Position
    ,AnswerType = TT.TestletAnswerTypeId
    ,TT.Guid AS TTGuid

INTO
    [#FinalResult_Base_2]
FROM 
    #FinalResult_Base_1 FR1
    JOIN [TestTask] [TT] WITH (NOLOCK)
        ON [TT].[TestletId] = [FR1].[TLGuid] 
    JOIN [Task] [TSK] WITH (NOLOCK)
        ON [TSK].[TaskId] = [TT].[TaskId] AND [TSK].[IsActive] = 1
    JOIN [Category] [C] WITH (NOLOCK)
        ON [C].[CategoryId] = [TSK].[CategoryId]AND [C].[IsActive] = 1
    ;    

DROP TABLE [#FinalResult_Base_1]

CREATE NONCLUSTERED INDEX [#IX_FR_Student_Class]
ON [dbo].[#FinalResult_Base_2] ([StudentId],[ClassId])
INCLUDE ([TTGuid])

SELECT       
    FR2.TestCampaignContainerId,
    FR2.TestCampaignCategoryId,
    FR2.Grade,
    FR2.TestCampaignId,    
    FR2.TestSetId
    ,FR2.TestId
    ,FR2.[TaskCategoryId]
    ,FR2.[TestletId]
    ,FR2.SectionNo
    ,FR2.Difficulty
    ,FR2.TestletName
    ,TQ.[QuestionId]
    ,FR2.StudentId
    ,FR2.ClassId
    ,RA.SubjectId
    ,TQ.[QuestionPoints] -- 1+ good, 0 wrong, null no answer
    ,GoodAnswer  = Case When TQ.[QuestionPoints] Is null Then 0
                      When TQ.[QuestionPoints] > 0 Then 1 -- cookie
                      Else 0 End
    ,WrongAnswer = Case When TQ.[QuestionPoints] = 0 Then 1 
                      When TQ.[QuestionPoints] Is null Then 1
                     Else 0 End
    ,NoAnswer    = Case When TQ.[QuestionPoints] Is null Then 1 Else 0 End
    ,FR2.Redizo
    ,FR2.ViewCount
    ,FR2.SpentTime
    ,TQ.[Position] AS [QuestionPosition]  
    ,RA.SpecialNeeds -- identifikace SVP        
    ,[Version] = 1 -- ? 
    ,FR2.TestAdaptationId
    ,FR2.TaskId
    ,FR2.TaskPosition
    ,QuestionRate = Q.Rate
    ,TestQuestionId = TQ.Guid
    ,FR2.AnswerType
INTO
    [#FinalResult_Base]
FROM 
    [#FinalResult_Base_2] FR2
    JOIN [TestQuestion] [TQ] WITH (NOLOCK)
        ON [TQ].[TestTaskId] = [FR2].[TTGuid]
    JOIN [Question] [Q] WITH (NOLOCK)
        ON [Q].[QuestionId] = [TQ].[QuestionId] AND [Q].[IsActive] = 1

    JOIN [RoleAssignment] [RA] WITH (NOLOCK)
        ON [RA].[PersonId] = [FR2].[StudentId]
        AND [RA].[ClassId] = [FR2].[ClassId] AND [RA].[IsActive] = 1 AND [RA].[RoleId] = 1

    drop table #FinalResult_Base_2;

    truncate table [dbo].[FinalResult_Base];
    insert into [dbo].[FinalResult_Base] select * from #FinalResult_Base;

    drop table #FinalResult_Base;
4

2 回答 2

2

首先,请通过此脚本在您的表上重建索引(这可能需要很长时间) -

SET NOCOUNT ON;

DECLARE 
      @SQL NVARCHAR(MAX)
    , @IndexName SYSNAME
    , @Output VARCHAR(200)
    , @ServerVersion VARCHAR(100)

SELECT @ServerVersion = CAST(SERVERPROPERTY('Edition') AS VARCHAR(100))

DECLARE cur CURSOR LOCAL READ_ONLY FORWARD_ONLY FOR
    SELECT
        'ALTER INDEX [' + ix.name + N'] ON [' + SCHEMA_NAME(t.[schema_id]) + '].[' + t.name + '] ' + 
        CASE
            WHEN ps.avg_fragmentation_in_percent > 50 THEN
                CASE WHEN @ServerVersion LIKE 'Enterprise%' OR @ServerVersion LIKE 'Developer%' THEN
                  'REBUILD WITH (SORT_IN_TEMPDB = ON, ONLINE = ON ' + CASE WHEN ix.fill_factor > 0 THEN ', FILLFACTOR = ' + CAST(ix.fill_factor AS VARCHAR(3)) ELSE '' END + ') '
                ELSE
                  'REBUILD WITH (SORT_IN_TEMPDB = ON' + CASE WHEN ix.fill_factor > 0 THEN ', FILLFACTOR = ' + CAST(ix.fill_factor AS VARCHAR(3)) ELSE '' END + ') '
            END
            ELSE 'REORGANIZE ' 
        END + 
        CASE
            WHEN ps.partition_number > 1 THEN N' PARTITION = ' + CAST(ps.partition_number AS NVARCHAR(MAX))
            ELSE N''
        END + ';', ix.name
    FROM sys.indexes ix
    JOIN sys.objects t ON t.[object_id] = ix.[object_id]
    JOIN (
        SELECT 
              [object_id]
            , index_id
            , avg_fragmentation_in_percent
            , partition_number
        FROM sys.dm_db_index_physical_stats(DB_ID(), NULL, NULL, NULL, N'LIMITED')
        WHERE page_count > 100
            AND avg_fragmentation_in_percent > 10
    ) ps ON t.[object_id] = ps.[object_id] AND ix.index_id = ps.index_id
    WHERE t.[type] = 'U'
        AND t.name IN (
            'TestQuestion', 'TestTask', 'Question', 'Testlet', 
            'Test', 'TestSet', 'RoleAssignment', 'Task', 
            'category', 'TimeWindow', 'TestAdaptation', 'TestCampaign')

OPEN cur

FETCH NEXT FROM cur INTO @SQL, @IndexName

WHILE @@FETCH_STATUS = 0 BEGIN

    SELECT @Output = CONVERT(NVARCHAR(15), GETDATE(), 114) + ': ' + @IndexName
    RAISERROR(@Output, 0, 1) WITH NOWAIT

    EXEC sys.sp_executesql @SQL

    FETCH NEXT FROM cur INTO @SQL, @IndexName

END 

CLOSE cur 
DEALLOCATE cur

在它尝试这个查询之后 -

INSERT INTO dbo.FinalResult_Base
SELECT  
      TC.TestCampaignContainerId
    , TestCampaignCategoryId = TC.CategoryID 
    , TC.Grade
    , TC.TestCampaignId
    , T.TestSetId
    , TL.TestId
    , TSK.CategoryID
    , TT.[TestletId]
    , TL.SectionNo
    , TL.Difficulty
    , TestletName = CHAR(65 + TL.SectionNo) + CONVERT(VARCHAR(4), 6 - TL.Difficulty)
    , TQ.[QuestionId]
    , TS.StudentId
    , TS.ClassId
    , RA.SubjectId
    , TQ.[QuestionPoints]
    , GoodAnswer =
        CASE WHEN ISNULL(TQ.[QuestionPoints], 0) > 0
            THEN 1 
            ELSE 0
        END
    , WrongAnswer =
        CASE
            WHEN ISNULL(TQ.[QuestionPoints], 0) = 0 
            THEN 1
            ELSE 0
        END
    , NoAnswer =
        CASE WHEN TQ.[QuestionPoints] IS NULL 
            THEN 1 
            ELSE 0
        END
    , TS.Redizo
    , TT.ViewCount
    , TT.SpentTime
    , TQ.[Position]
    , RA.SpecialNeeds
    , [Version] = 1
    , TestAdaptationId = TA.id
    , TaskId = TSK.TaskId
    , TaskPosition = TT.Position
    , QuestionRate = Q.Rate
    , TestQuestionId = TQ.guid
    , AnswerType = TT.TestletAnswerTypeId 
FROM dbo.TestQuestion TQ WITH (NOLOCK) 
JOIN dbo.TestTask TT WITH (NOLOCK) ON TT.[guid] = TQ.TestTaskId 
JOIN dbo.Question Q WITH (NOLOCK) ON TQ.QuestionId = Q.QuestionId 
JOIN (
    SELECT *
    FROM dbo.Testlet TL WITH (NOLOCK) 
    WHERE TL.ShownOn IS NOT NULL
) TL ON TT.TestletId = TL.[guid] 
JOIN (
    SELECT *
    FROM dbo.Test T WITH (NOLOCK)
    WHERE T.TestTypeId = 1 
        AND T.ProcessedOn IS NOT NULL
) T ON TL.TestId = T.[guid] 
JOIN (
    SELECT *
    FROM dbo.TestSet TS WITH (NOLOCK)
    WHERE TS.Redizo NOT IN (999999999, 111111119)
) TS ON T.TestSetId = TS.[guid] 
JOIN dbo.RoleAssignment RA WITH (NOLOCK) ON TS.StudentId = RA.PersonID AND RA.RoleId = 1 
JOIN dbo.Task TSK WITH (NOLOCK) ON TSK.TaskId = TT.TaskId 
JOIN dbo.category C WITH (NOLOCK) ON C.CategoryID = TSK.CategoryID 
JOIN dbo.TimeWindow TW WITH (NOLOCK) ON TW.id = TS.TimeWindowId 
JOIN dbo.TestAdaptation TA WITH (NOLOCK) ON TA.id = TW.TestAdaptationId 
JOIN dbo.TestCampaign TC WITH (NOLOCK) ON TC.TestCampaignId = TA.TestCampaignId

并尝试这个查询 -

SELECT  TC.TestCampaignContainerId
        ,TC.CategoryID AS TestCampaignCategoryId
        ,TC.Grade
        ,TC.TestCampaignId
        ,T.TestSetId
        ,TL.TestId
        ,TL.SectionNo
        ,TL.Difficulty
        ,TestletName = CHAR(65 + TL.SectionNo) + CONVERT(VARCHAR(4), 6 - TL.Difficulty) -- prevod na A5, B4, B5 ...
        ,TS.StudentId
        ,TS.ClassId
        ,TS.Redizo
        ,[Version] = 1 -- ? 
        ,TestAdaptationId = TA.id
        ,TL.guid AS TLGuid
        ,TS.TimeWindowId
INTO [#FinalResult_Base_1] 
FROM (
    SELECT *
    FROM dbo.[TestSet] [TS] WITH (NOLOCK) 
    WHERE [TS].[Redizo] NOT IN (999999999, 111111119) 
) TS
JOIN (
    SELECT *
    FROM dbo.[Test] [T] WITH (NOLOCK)
    WHERE [T].[TestTypeId] = 1 
        AND [T].[ProcessedOn] IS NOT NULL 
) T ON [T].[TestSetId] = [TS].[guid] 
JOIN (
    SELECT *
    FROM dbo.[Testlet] [TL] WITH (NOLOCK) 
    WHERE [TL].[ShownOn] IS NOT NULL 
) TL ON [TL].[TestId] = [T].[guid]
JOIN (
    SELECT *
    FROM dbo.[TimeWindow] [TW] WITH (NOLOCK) 
    WHERE [TW].[IsActive] = 1 
) TW ON [TW].[id] = [TS].[TimeWindowId] 
JOIN (
    SELECT *
    FROM dbo.[TestAdaptation] [TA] WITH (NOLOCK)
    WHERE [TA].[IsActive] = 1 
) TA ON [TA].[id] = [TW].[TestAdaptationId] 
JOIN (
    SELECT *
    FROM dbo.[TestCampaign] [TC] WITH (NOLOCK) 
    WHERE [TC].[IsActive] = 1 
) TC ON [TC].[TestCampaignId] = [TA].[TestCampaignId]
--possible unused join in this query
--JOIN dbo.[TestCampaignContainer] [TCC] WITH (NOLOCK) ON [TCC].[TestCampaignContainerId] = [TC].[TestCampaignContainerId] AND [TCC].[IsActive] = 1


SELECT  FR1.TestCampaignContainerId
        ,FR1.TestCampaignCategoryId
        ,FR1.Grade
        ,FR1.TestCampaignId
        ,FR1.TestSetId
        ,FR1.TestId
        ,TSK.CategoryID AS [TaskCategoryId]
        ,TT.[TestletId]
        ,FR1.SectionNo
        ,FR1.Difficulty
        ,TestletName = CHAR(65 + FR1.SectionNo) + CONVERT(VARCHAR(4), 6 - FR1.Difficulty) -- prevod na A5, B4, B5 ...
        ,FR1.StudentId
        ,FR1.ClassId
        ,FR1.Redizo
        ,TT.ViewCount
        ,TT.SpentTime
        ,[Version] = 1 -- ? 
        ,FR1.TestAdaptationId
        ,TaskId = TSK.TaskId
        ,TaskPosition = TT.Position
        ,AnswerType = TT.TestletAnswerTypeId
        ,TT.guid AS TTGuid
INTO [#FinalResult_Base_2] 
FROM #FinalResult_Base_1 FR1 
JOIN [TestTask] [TT] WITH (NOLOCK) ON [TT].[TestletId] = [FR1].[TLGuid] 
JOIN [Task] [TSK] WITH (NOLOCK) ON [TSK].[TaskId] = [TT].[TaskId] 
--possible unused join
--JOIN [category] [C] WITH (NOLOCK) ON [C].[CategoryID] = [TSK].[CategoryID]
WHERE [TSK].[IsActive] = 1 
    --AND [C].[IsActive] = 1

DROP TABLE [#FinalResult_Base_1]

CREATE NONCLUSTERED INDEX [#IX_FR_Student_Class]
ON [dbo].[#FinalResult_Base_2] ([StudentId], [ClassId])
INCLUDE ([TTGuid])

TRUNCATE TABLE [dbo].[FinalResult_Base];

INSERT INTO [dbo].[FinalResult_Base]
SELECT  FR2.TestCampaignContainerId
        ,FR2.TestCampaignCategoryId
        ,FR2.Grade
        ,FR2.TestCampaignId
        ,FR2.TestSetId
        ,FR2.TestId
        ,FR2.[TaskCategoryId]
        ,FR2.[TestletId]
        ,FR2.SectionNo
        ,FR2.Difficulty
        ,FR2.TestletName
        ,TQ.[QuestionId]
        ,FR2.StudentId
        ,FR2.ClassId
        ,RA.SubjectId
        ,TQ.[QuestionPoints] -- 1+ good, 0 wrong, null no answer
        , GoodAnswer =
                CASE WHEN ISNULL(TQ.[QuestionPoints], 0) > 0
                    THEN 1 
                    ELSE 0
                END
        , WrongAnswer =
            CASE
                WHEN ISNULL(TQ.[QuestionPoints], 0) = 0 
                THEN 1
                ELSE 0
            END
        , NoAnswer =
            CASE WHEN TQ.[QuestionPoints] IS NULL 
                THEN 1 
                ELSE 0
            END
        ,FR2.Redizo
        ,FR2.ViewCount
        ,FR2.SpentTime
        ,TQ.[Position] AS [QuestionPosition]
        ,RA.SpecialNeeds -- identifikace SVP        
        ,[Version] = 1 -- ? 
        ,FR2.TestAdaptationId
        ,FR2.TaskId
        ,FR2.TaskPosition
        ,QuestionRate = Q.Rate
        ,TestQuestionId = TQ.guid
        ,FR2.AnswerType
FROM [#FinalResult_Base_2] FR2 
JOIN [TestQuestion] [TQ] WITH (NOLOCK) ON [TQ].[TestTaskId] = [FR2].[TTGuid] 
JOIN [Question] [Q] WITH (NOLOCK) ON [Q].[QuestionId] = [TQ].[QuestionId] AND [Q].[IsActive] = 1 
JOIN [RoleAssignment] [RA] WITH (NOLOCK) ON [RA].[PersonID] = [FR2].[StudentId] 
    AND [RA].[ClassId] = [FR2].[ClassId] 
    AND [RA].[IsActive] = 1 
    AND [RA].[RoleId] = 1

DROP TABLE #FinalResult_Base_2;
于 2013-05-30T10:22:20.430 回答
1

恕我直言不应该是这样,我遇到了类似的问题并采取了以下步骤来优化它。

  • 尝试将索引放在用于过滤的列上(决定连接的列)。

  • 尝试将索引放在视图上,可以做到,但需要一些特殊要求。

  • 为索引重建做一些工作。

  • 使用数据库的另一个镜像实例报告离开活动数据库

  • 如果要在加入后转换数据,请不要在加入时使用函数。

  • 使用查询优化器查看连接的哪一部分占用了大部分时间/资源。

  • 使用存档表将数据从实时数据库中清除到其中。

希望能帮助到你 :)

于 2013-05-30T10:19:56.393 回答