这是我的问题:
如何在 Bigquery UDF 中将函数作为参数传递?
我想从两组私有函数创建一个 bigquery UDF 库:
- 函数A、函数B、函数C
- 功能1,功能2,功能3
我想公开从两组函数的每个组合构建的 mixin UDF。
我想避免源代码中的代码重复。
到目前为止,我发现的两个解决方案是:
解决方案1:
在函数内部使用具有两个字符串参数和一个 switch case 的唯一公共 UDF。
例如:
mypublic_UDF(*args, "functionA", "function1")
mypublic_UDF(*args, "functionA", "function3")
mypublic_UDF(*args, "functionB", "function1")
但是,此解决方案无法从 bigquery SQL 内省、运行前错误和警告检查中受益,并且涉及解析每一行的参数。
解决方案2:
使用在新函数中实例化每个案例的部署脚本
例如:
mypublic_UDF_functionA_function1(*args)
mypublic_UDF_functionA_function3(*args)
mypublic_UDF_functionB_function1(*args)
但是,这个方案需要更多的逻辑来部署脚本并生成很多函数。
有首选/最佳解决方案吗?
编辑:
我的代码太复杂,无法在此处显示,但这是一个具体示例。
CREATE TEMP FUNCTION functionA (myint NUMERIC)
AS (IF( mod(myint,3)=1, myint, NULL));
CREATE TEMP FUNCTION functionB (myint NUMERIC)
AS (IF( mod(myint,4)>3, myint, NULL));
CREATE TEMP FUNCTION functionC (myint NUMERIC)
AS (IF( mod(myint*myint-1,5)=0, myint, NULL));
CREATE TEMP FUNCTION function1 (myint NUMERIC)
AS (SQRT(ABS(myint)));
CREATE TEMP FUNCTION function2 (myint NUMERIC)
AS (LEAST(GREATEST(myint,-1),1));
CREATE TEMP FUNCTION function3 (myint NUMERIC)
AS (1/GREATEST(myint,1));
--------------------------------------------------------
-- SOLUTION 1
--------------------------------------------------------
CREATE TEMP FUNCTION first_functionset
(myint NUMERIC, mytype STRING)
AS (
CASE mytype
WHEN 'functionA' THEN functionA(myint)
WHEN 'functionB' THEN functionB(myint)
WHEN 'functionC' THEN functionC(myint)
ELSE ERROR('Unknown function name')
END
);
CREATE TEMP FUNCTION second_functionset
(myint NUMERIC, mytype STRING)
AS (
CASE mytype
WHEN 'function1' THEN function1(myint)
WHEN 'function2' THEN function2(myint)
WHEN 'function3' THEN function3(myint)
ELSE ERROR('Unknown function name')
END
);
CREATE TEMP FUNCTION mypublic_UDF
(myarray ARRAY<INT64>, param1 STRING, param2 STRING)
AS ((SELECT array_agg(first_functionset(second_functionset(x,param2),param1) IGNORE NULLS)
from
unnest(myarray) x
));
--------------------------------------------------------
-- SOLUTION 2
--------------------------------------------------------
CREATE TEMP FUNCTION mypublic_UDF_functionA_function1
(myarray ARRAY<INT64>)
AS ((SELECT array_agg(functionA(function1(x)) IGNORE NULLS)
from
unnest(myarray) x
));
CREATE TEMP FUNCTION mypublic_UDF_functionA_function2
(myarray ARRAY<INT64>)
AS ((SELECT array_agg(functionA(function2(x)) IGNORE NULLS)
from
unnest(myarray) x
));
CREATE TEMP FUNCTION mypublic_UDF_functionA_function3
(myarray ARRAY<INT64>)
AS ((SELECT array_agg(functionA(function3(x)) IGNORE NULLS)
from
unnest(myarray) x
));
CREATE TEMP FUNCTION mypublic_UDF_functionB_function1
(myarray ARRAY<INT64>)
AS ((SELECT array_agg(functionB(function1(x)) IGNORE NULLS)
from
unnest(myarray) x
));
-- and so on
Select
-- SOLUTION 1
mypublic_UDF(GENERATE_ARRAY(0,100), 'functionA', 'function1'),
mypublic_UDF(GENERATE_ARRAY(0,100), 'functionA', 'function3'),
mypublic_UDF(GENERATE_ARRAY(0,100), 'functionB', 'function1'),
-- SOLUTION 2
mypublic_UDF_functionA_function1(GENERATE_ARRAY(0,100)),
mypublic_UDF_functionA_function3(GENERATE_ARRAY(0,100)),
mypublic_UDF_functionB_function1(GENERATE_ARRAY(0,100)),