sql-server - 重新设计缓存 UDF，因为不允许副作用

Question

尝试复制将按如下方式工作的函数的逻辑：

given input params @a,@b,@c:

look up return value @v in table where column a=@a and b=@b and c=@c
if found, return @v
if not found, perform complex calculations to derive @v
insert into lookup table (a, b, c, v) values (@a, @b, @c, @v)
return @v

这个想法是在表中查找值将比复杂的计算快得多。如果您必须对一组值进行复杂的计算，则在表中添加一行，这样下次查找 @v 的值会更快。

当然，由于副作用的原因，您不能在 UDF 中执行 INSERT 语句。

我不是要求允许我在 UDF 中执行 INSERT 的 hack，而是要求我如何重新设计代码以执行相同类型的逻辑的想法？目标是使用 UDF，因为我们需要为数千个输入值集计算 @v 值，这些值都存在于一个表中。

score 0 · Accepted Answer

由于似乎没有可以计算值和管理缓存的单个函数，因此这里有一个为现有样本数据集提供值的合理有效方法的示例。UDF 将提供缓存或计算的值，但更新缓存仍然是单独的。

set nocount on;

-- Create some sample data and a partial cache of calculated values.
create table Samples ( Id Int Identity, A Int, B Int, V Int Null );
insert into Samples ( A, B ) values
  ( 0, 0 ), ( 0, 1 ), ( 1, 0 ), ( 1, 1 ), ( 2, 0 ), ( 0, 2 ), ( 0, 1 ), ( 0, 1 );

create table Cache ( Id Int Identity, A Int, B Int, V Int );
insert into Cache ( A, B, V ) values
  ( 1, 1, 1 ), ( 0, 2, 4 );
go

-- Create the function to perform the expensive calculation.
create function dbo.ExpensiveCalculation( @A Int, @B Int )
  returns Int
  as
  begin
  return @A * @A + @B * @B;
  end;
go

-- And another function that can use cached values.
create function dbo.ExpensiveCalculationWithCaching( @A Int, @B Int )
  returns Int
  as
  begin
  declare @Result as Int
  -- Try to get a cached value.
  select @Result = V
    from Cache
    where A = @A and B = @B;
  -- If we didn't find a cached value then calculate one.
  if @@RowCount = 0
    select @Result = dbo.ExpensiveCalculation( @A, @B );
  return @Result;
  end;
go

-- Apply any previously cached values to the samples.
update S
  set S.V = C.V
  from Samples as S inner join
    Cache as C on C.A = S.A and C.B = S.B;
print Cast( @@RowCount as VarChar(6) ) + ' samples satisfied from initial cache.'

declare @BatchSize as Int = 3; -- Number of rows to process with the function in each iteration.
declare @CacheIds as Table ( Id Int );

-- Update the samples one batch at a time.
while exists ( select 42 from Samples where V is NULL )
  begin

  -- Clear the intermediate data, if any.
  delete from @CacheIds;

  -- Find a batch of unknown values with distinct input values and apply the function.
  --   Add the results to the cache and note the id's of the new rows.
  insert into Cache
    output inserted.Id into @CacheIds
    select top (@BatchSize) A, B, dbo.ExpensiveCalculation( A, B )
      from Samples
      where V is NULL
      group by A, B;
  print Cast( @@RowCount as VarChar(6) ) + ' cache entries added.'

  -- Update any samples that benefit from the newly cached values.
  update S
    set S.V = C.V
    from Samples as S inner join
      Cache as C on C.A = S.A and C.B = S.B inner join
      @CacheIds as CI on CI.Id = C.Id
    where S.V is NULL;
  print Cast( @@RowCount as VarChar(6) ) + ' samples satisfied from cache update.'
  end

-- Display the results.
select Id, A, B, V
  from Samples

select dbo.ExpensiveCalculationWithCaching( 1, 1 ) as Cached,
  dbo.ExpensiveCalculationWithCaching( 4, 4 ) as Calculated

-- Houseclean.
drop function dbo.ExpensiveCalculationWithCaching;
drop function dbo.ExpensiveCalculation;
drop table Samples;
drop table Cache;

注意：如果添加到Samples表中的行在执行此代码期间被更新，则它们可能不会被处理。

score 0 · Accepted Answer

这不是所需的解决方案，但到目前为止，您正在寻找其他解决方案......您可以在 [LookUp] 上创建一个 INSERT TRIGGER，进行所有需要的计算。在准备最终查询时，使用查找而不是 UDF，您可以强制所有需要的值都在查找中，方法是：

Insert into [LookUp] (a,b,c)
Select ds.a,ds.b,ds.c from
(
Select Distinct a,b,c from [Source]
) ds
Left Join [LookUp] l on l.a=ds.a and l.b=ds.b and l.c=ds.c
Where l.a IS NULL

score 0 · Accepted Answer

编辑：这可能是比下面更好的方法。同样，这是完全未经测试的。它只是作为一个想法的起点。

CREATE FUNCTION dbo.MyFN_MKII_1
(
    @a INT,
    @b INT,
    @C INT
)
RETURNS TABLE 
WITH SCHEMABINDING 
AS
    RETURN  SELECT  v 
            FROM MyTable
            WHERE a = @a
            AND b = @b
            AND c = @c
;
GO

CREATE FUNCTION dbo.MyFN_MKII_2
(
    @a INT,
    @b INT,
    @C INT
)
RETURNS INT
WITH SCHEMABINDING 
AS
-- place the body of the function here
BEGIN
     RETURN POWER(@a,@b)*@c
END
GO

---------------------
--  Usage
---------------------

INSERT INTO LookupTable
SELECT   ST.ColA
        ,ST.ColB
        ,ST.ColC
        ,ISNULL(V, dbo.MyFN_MKII_2(ST.COlA, ST.ColB, ST.ColC))
FROM SomeTable  ST
CROSS APPLY dbo.MyFN_MKII_1(ColA, ColB, ColC)

这是完全未经测试的。它只是作为一个想法的起点。周围有一些非常聪明的人可能会为这个概念提供优化。

CREATE FUNCTION dbo.MyFN
(
    @a INT,
    @b INT,
    @C INT
)
RETURNS @Result TABLE 
(
    a INT,
    b INT,
    c INT,
    v INT
)
AS
BEGIN

    INSERT INTO @Result (a, b, c, v)
    SELECT @a, @b, @c, v
    FROM MyTable
    WHERE a = @a
    AND b = @b
    AND c = @c

    IF @@ROWCOUNT = 0
    BEGIN
        INSERT INTO @Result (a, b, c, v)
        SELECT @a, @b, @c, POWER(@a,@b) * @c
    END

   RETURN
END
GO


---------------------
--  Usage
---------------------

INSERT INTO LookupTable
SELECT *
FROM SomeTable
CROSS APPLY dbo.MyFN(ColA, ColB, ColC)

我预计这个功能会比较慢。

sql-server - 重新设计缓存 UDF，因为不允许副作用

3 回答 3

Related

Reference