2

我正在尝试使用子查询方法找到所选日期内的最后一条记录。问题是查询太慢了。我想知道是否有人对如何重写此查询以提高性能有任何想法。我的服务器因此而死亡。
为了使测试更容易,我创建了一个表变量来生成用于测试目的的假数据。要测试此脚本,请运行 usp_ExtractData'400000'

我担心的是 ---SECTION B 我的结果是 400000*3 = 1200000 条记录的 18 秒。在真正的数据库上,我会为它编制索引并每晚重新编制索引。

--Store proceedure with table variable data

ALTER PROCEDURE [dbo].[usp_ExtractData](
@TotalRecord int--Create random records for each product
)

AS
BEGIN
    --MS SQL 2008
    SET NOCOUNT ON;

    --SECTION 1--Create test data--- GO TO SECTION 2
        --Create Variable table to Products fake data
        DECLARE @Product TABLE
        (
          ProductID int primary  key not null
          ,SKU varchar(100) not null
        )
        --Insert couple records into @Product table
        INSERT INTO @Product(ProductID, SKU) VALUES     (100,'CUP100')
        INSERT INTO @Product(ProductID, SKU) VALUES     (101,'CUP101')
        INSERT INTO @Product(ProductID, SKU) VALUES     (102,'MUG101')

        --Create Variable table to hold Products History data
        DECLARE @History TABLE
        (
           ID int identity not null
          ,ProductID int not null
          ,VisitedDatetime datetime not null
        )

        --Generate random record for testing
        WHILE @TotalRecord>0
            BEGIN
                INSERT INTO  @History( ProductID, VisitedDatetime) VALUES (100,DATEADD(minute,rand()*100,GETDATE()))
                INSERT INTO  @History( ProductID, VisitedDatetime) VALUES (101,DATEADD(minute,rand()*100,GETDATE()))
                INSERT INTO  @History( ProductID, VisitedDatetime) VALUES (102,DATEADD(minute,rand()*100,GETDATE()))
                set @TotalRecord=@TotalRecord-1
            END
    --SECTION 1--Finised creating test data



    ---SECTION B 

      --SELECTION B1- SEE DATA
      SELECT * FROM @History         ORDER BY ProductID, VisitedDatetime DESC
        --Run query to find the last visit per each ProductID

        --THIS IS TOO SLOW
        DECLARE @TestPerformanceDatetime datetime--Test performance
        SET @TestPerformanceDatetime= GETDATE()
        SELECT  *, (select top(1) VisitedDatetime FROM @History as t2 WHERE t2.ProductID=ProductID and VisitedDatetime BETWEEN GETDATE() AND GETDATE()+10 ORDER BY VisitedDatetime DESC) as LastVistiDate
        FROM     @Product

        --Display the performance
        SELECT  DATEDIFF(SECOND, @TestPerformanceDatetime,getdate()) AS TotalSeconds
    ---SECTION B - End
END
4

3 回答 3

2

使用cross applymax()

select *
from @Product p
cross apply (
    select MAX(VisitedDatetime) LastVisitedDatetime
    from @History
    where VisitedDatetime BETWEEN GETDATE() AND GETDATE()+10
        and ProductID = p.ProductID
) h
于 2012-08-25T00:10:54.167 回答
0

这个查询的原始版本我得到了 0 秒,所以我将随机测试记录的数量从 400,000 增加到 4,000,000。

CREATE PROCEDURE [dbo].[usp_ExtractData_test](
@TotalRecord int--Create random records for each product
)

AS
BEGIN
    --MS SQL 2008
    SET NOCOUNT ON;

    --SECTION 1--Create test data--- GO TO SECTION 2
        --Create Variable table to Products fake data
        DECLARE @Product TABLE
        (
          ProductID int primary  key not null
          ,SKU varchar(100) not null
        )
        --Insert couple records into @Product table
        INSERT INTO @Product(ProductID, SKU) VALUES     (100,'CUP100')
        INSERT INTO @Product(ProductID, SKU) VALUES     (101,'CUP101')
        INSERT INTO @Product(ProductID, SKU) VALUES     (102,'MUG101')

        --Create Variable table to hold Products History data
        DECLARE @History TABLE
        (
           ID int identity not null
          ,ProductID int not null
          ,VisitedDatetime datetime not null
        )

        --Generate random record for testing
        WHILE @TotalRecord>0
            BEGIN
                INSERT INTO  @History( ProductID, VisitedDatetime) VALUES (100,DATEADD(minute,rand()*100,GETDATE()))
                INSERT INTO  @History( ProductID, VisitedDatetime) VALUES (101,DATEADD(minute,rand()*100,GETDATE()))
                INSERT INTO  @History( ProductID, VisitedDatetime) VALUES (102,DATEADD(minute,rand()*100,GETDATE()))
                set @TotalRecord=@TotalRecord-1
            END
    --SECTION 1--Finised creating test data



    ---SECTION B 
        --Run query to find the last visit per each ProductID
        --THIS IS TOO SLOW
        DECLARE @TestPerformanceDatetime datetime--Test performance
        SET @TestPerformanceDatetime= GETDATE()
        SELECT  P.*, LastVisitDate.VisitedDatetime
        FROM     @Product P
        LEFT
        JOIN  (select top(1) T2.VisitedDatetime FROM @History as t2
               ORDER BY T2.VisitedDatetime DESC) as LastVisitDate
          ON  LastVisitDate.VisitedDatetime BETWEEN GETDATE() AND GETDATE()+10

        --Display the performance
        SELECT  DATEDIFF(SECOND, @TestPerformanceDatetime,getdate()) AS TotalSeconds
    ---SECTION B - End
END

证明

于 2012-08-24T21:42:29.980 回答
0

在我的笔记本上,我看到生成历史记录大约需要 102,346 毫秒,第一次搜索需要 5,120 毫秒,第二次搜索需要 643 毫秒。OTOH,它同时是 BOINCing Rosetta@Home。

declare @HistoryRecordsPerProduct int = 400000

set nocount on

-- drop table #Product
-- drop table #History

-- Create the test tables.

create table #Product
  ( ProductId Int primary key not null, SKU VarChar(100) not null )

insert into #Product ( ProductId, SKU ) values
  ( 100, 'CUP100' ), ( 101, 'CUP101' ), ( 102, 'MUG102' )

create table #History 
  ( Id Int identity not null, ProductId Int not null, VisitedDatetime DateTime not null )
-- EDIT: Note the following index on both columns. 
create index History_Product_VisitedDateTime on #History ( ProductId, VisitedDateTime desc )

-- Populate the history table.
declare @Start as DateTime = GetDate()

while @HistoryRecordsPerProduct > 0
  begin
  insert into #History ( ProductId, VisitedDatetime ) values ( 100, DateAdd( minute, rand() * 100, GetDate() ) ) 
  insert into #History ( ProductId, VisitedDatetime ) values ( 101, DateAdd( minute, rand() * 100, GetDate() ) ) 
  insert into #History ( ProductId, VisitedDatetime ) values ( 102, DateAdd( minute, rand() * 100, GetDate() ) ) 
  set @HistoryRecordsPerProduct = @HistoryRecordsPerProduct - 1 
  end 

select DateDiff( ms, @Start, GetDate() ) as 'Elapsed History Generation (ms)'

-- Query the data.
set @Start = GetDate()
declare @End as DateTime = @Start + 10 -- Days.

select @Start as [Start], @End as [End]

select ProductId, SKU,
  ( select Max( VisitedDateTime ) from #History where ProductId = #Product.ProductId and
    @Start <= VisitedDatetime and VisitedDatetime <= @End ) as VDT
  from #Product

select DateDiff( ms, @Start, GetDate() ) as 'Elapsed Search (ms)'

-- And again with the data cached.

set @Start = GetDate()
set @End = @Start + 10 -- Days.

select @Start as [Start], @End as [End]

select ProductId, SKU,
  ( select Max( VisitedDateTime ) from #History where ProductId = #Product.ProductId and
    @Start <= VisitedDatetime and VisitedDatetime <= @End ) as VDT
  from #Product

select DateDiff( ms, @Start, GetDate() ) as 'Elapsed Search (ms)'
于 2012-08-25T00:10:44.360 回答