0

我需要帮助优化或重写这个复杂的查询。该查询非常适合具有 50 行的测试数据,但实际数据现在有超过 700,000 行,并且查询需要超过 5 分钟才能运行。我在两个表的主键上有索引。我相信年龄函数的成本很高,如果我把它拿出来它可以节省大约 2 ½ 分钟。任何建议表示赞赏。提前致谢。

WITH T AS (
        SELECT TOP 2000
          A.Residence_City, 
          CASE 
             WHEN A.Gender = 'M' then 'Male'
             WHEN A.Gender = 'F' then 'Female'
             WHEN A.Gender = 'U' then 'Unknown'
        END AS Gender,
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 18 and 24 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 0 then 1 else null end) as [0_3_18],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 18 and 24 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 1 then 1 else null end) as [1_3_18],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 18 and 24 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 2 then 1 else null end) as [2_3_18],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 18 and 24 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 3 then 1 else null end) as [3_3_18],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 18 and 24 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 0 then 1 else null end) as [Unknown_18],

count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 25 and 34 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 0 then 1 else null end) as [0_3_25],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 25 and 34 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 1 then 1 else null end) as [1_3_25],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 25 and 34 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 2 then 1 else null end) as [2_3_25],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 25 and 34 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 3 then 1 else null end) as [3_3_25],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 25 and 34 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 0 then 1 else null end) as [Unknown_25],

count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 35 and 49 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 0 then 1 else null end) as [0_3_35],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 35 and 49 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 1 then 1 else null end) as [1_3_35],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 35 and 49 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 2 then 1 else null end) as [2_3_35],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 35 and 49 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 3 then 1 else null end) as [3_3_35],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 35 and 49 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 0 then 1 else null end) as [Unknown_35],

count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 50 and 64 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 0 then 1 else null end) as [0_3_50],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 50 and 64 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 1 then 1 else null end) as [1_3_50],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 50 and 64 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 2 then 1 else null end) as [2_3_50],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 50 and 64 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 3 then 1 else null end) as [3_3_50],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 50 and 64 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 0 then 1 else null end) as [Unknown_50],

count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 65 and 120 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 0 then 1 else null end) as [0_3_65],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 65 and 120 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 1 then 1 else null end) as [1_3_65],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 65 and 120 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 2 then 1 else null end) as [2_3_65],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 65 and 120 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 3 then 1 else null end) as [3_3_65],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) between 65 and 120 and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 0 then 1 else null end) as [Unknown_65],

count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) = '' or voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() )is null and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 0 then 1 else null end) as [0_3_],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) = '' or voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() )is null and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 1 then 1 else null end) as [1_3_],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) = '' or voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() )is null and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 2 then 1 else null end) as [2_3_],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) = '' or voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() )is null and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 3 then 1 else null end) as [3_3_],
count(case when voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() ) = '' or voterData.dbo.ufn_GetAge ( convert(datetime,[Date_of_Birth]), GETDATE() )is null and voterData.dbo.GeneralVoting (V.[G10EC], V.[G08EC],V.[G06EC]) = 0 then 1 else null end) as [Unknown_]

from Personal as A INNER JOIN Voting_History V 
on A.Vuid = V.Vuid 
group by Residence_City, Gender with rollup
Order by Residence_City, Gender 
)
, temp1 as (
    SELECT  *,([3_3_18] + [3_3_25] + [3_3_35] + [3_3_50] + [3_3_65] + [3_3_]
              +[2_3_18] + [2_3_25] + [2_3_35] + [2_3_50] + [2_3_65] + [2_3_]
              +[1_3_18] + [1_3_25] + [1_3_35] + [1_3_50] + [1_3_65] + [1_3_]
              +[0_3_18] + [0_3_25] + [0_3_35] + [0_3_50] + [0_3_65] + [0_3_]
            +[Unknown_18]+ [Unknown_25]+ [Unknown_35] + [Unknown_50]+ [Unknown_65] + [Unknown_]) Total
FROM T
where NULLIF(Residence_City,'') IS NOT NULL
), temp2 as (
    SELECT * FROM temp1
UNION ALL
select 
'grand Total',
SUM([0_3_18])/2,SUM([1_3_18])/2,SUM([2_3_18])/2,SUM([3_3_18])/2,SUM([Unknown_18])/2,
SUM([0_3_25])/2,SUM([1_3_25])/2,SUM([2_3_25])/2,SUM([3_3_25])/2,SUM([Unknown_25])/2,
SUM([0_3_35])/2,SUM([1_3_35])/2,SUM([2_3_35])/2,SUM([3_3_35])/2,SUM([Unknown_35])/2,
SUM([0_3_50])/2,SUM([1_3_50])/2,SUM([2_3_50])/2,SUM([3_3_50])/2,SUM([Unknown_50])/2,
SUM([0_3_65])/2,SUM([1_3_65])/2,SUM([2_3_65])/2,SUM([3_3_65])/2,SUM([Unknown_65])/2,
SUM([0_3_])/2,SUM([1_3_])/2,SUM([2_3_])/2,SUM([3_3_])/2,SUM([Unknown_])/2,
sum(Total)/2

FROM temp1
)

SELECT Residence_City, Gender,
    [0_3_18]     as [0_3],  
    [1_3_18]     as [1_3], 
    [2_3_18]     as [2_3], 
    [3_3_18]     as [3_3], 
    [Unknown_18] as [Unknown],

    [0_3_25]     as [0_3],  
    [1_3_25]     as [1_3], 
    [2_3_25]     as [2_3],  
    [3_3_25]     as [3_3],  
    [Unknown_25] as [Unknown], 

    [0_3_35]     as [0_3],  
    [1_3_35]     as [1_3], 
    [2_3_35]     as [2_3], 
    [3_3_35]     as [3_3], 
    [Unknown_35] as [Unknown], 

    [0_3_35]     as [0_3],  
    [1_3_35]     as [1_3], 
    [2_3_35]     as [2_3], 
    [3_3_35]     as [3_3], 
    [Unknown_35] as [Unknown], 

    [0_3_35]     as [0_3],  
    [1_3_35]     as [1_3], 
    [2_3_35]     as [2_3], 
    [3_3_35]     as [3_3], 
    [Unknown_35] as [Unknown], 

    [0_3_35]     as [0_3],  
    [1_3_35]     as [1_3], 
    [2_3_35]     as [2_3], 
    [3_3_35]     as [3_3], 
    [Unknown_35] as [Unknown], 
    Total
   FROM temp2

这些是功能

    ALTER FUNCTION [dbo].[GeneralVoting] ( @one varchar, @two varchar,@three varchar)
    RETURNS INT
    AS
    BEGIN

        DECLARE @vAge         INT

        SET @vAge = (CASE WHEN @one IS NOT NULL THEN 1 ELSE 0 END)
                        +(CASE WHEN @two IS NOT NULL THEN 1 ELSE 0 END)
                        +(CASE WHEN @three IS NOT NULL THEN 1 ELSE 0 END)
     RETURN @vAge
    END

    ALTER FUNCTION [dbo].[ufn_GetAge] ( @pDateOfBirth DATETIME, @pAsOfDate DATETIME )
    RETURNS INT
    AS
    BEGIN
        DECLARE @vAge         INT
        IF @pDateOfBirth >= @pAsOfDate
            RETURN 0

        SET @vAge = DATEDIFF(YY, @pDateOfBirth, @pAsOfDate)

        IF MONTH(@pDateOfBirth) > MONTH(@pAsOfDate) OR
          (MONTH(@pDateOfBirth) = MONTH(@pAsOfDate) AND
           DAY(@pDateOfBirth)   > DAY(@pAsOfDate))
            SET @vAge = @vAge - 1
      RETURN @vAge
    END
4

1 回答 1

0

连接结构看起来很合理。我不确定您是否需要汇总,但它可能不会影响性能。

我怀疑用户定义的功能正在影响性能。SQL Server 不够聪明,无法优化对 ufn_GetAge() 的多次调用,因此该函数被一遍又一遍地调用。相反,将年龄放在子查询中:

(select *, voterData.dbo.ufn_GetAge( convert(datetime,[Date_of_Birth]), GETDATE() ) as age
 from VoterData
) age

该函数非常简单,因此您也可以直接替换它以消除这个调用。

您也可以对其他功能采用相同的方法。

另外,投票和个人历史记录表有多大?特别是城市和性别有多少种组合?您可以通过预先选择前 2000 个然后对不同的组进行计算来提高查询效率。但是,在不知道尺寸的情况下,很难判断这是否有效。另外,您真的是要按 Residence_City, Gender 订购吗?. . 还是您想按其他字段之一订购?照原样,查询只是按字母顺序选择第一个。

于 2012-05-12T16:27:42.840 回答