我的目标是定制 {drake} 计划,以减少简单和复杂情况下的代码重复,例如,drake::trigger
根据计划的特定条件设置多个目标(即通过检测目标名称以编程方式识别要设置的目标)在查询 Web API 或其他更复杂的情况时以 (data) _raw 结尾)。为简洁起见,此处未显示 * 以编程方式识别目标的各个方面,下面的代码仅使用手动输入的列名称。我更喜欢使用 {data.table} 来操作计划,而不是 {dplyr}。
例如,有没有办法drake::trigger
为多个目标设置?还是有更好的方法来完成工作?
我在下面提出了一个简单的解决方案,即使用熟悉的data.frame 样式方法手动操作 drake 计划。简化版的 reprex
library(drake)
library(data.table)
library(purrr)
#>
#> Attaching package: 'purrr'
#> The following object is masked from 'package:data.table':
#>
#> transpose
library(magrittr)
#>
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#>
#> set_names
library(rlang)
#>
#> Attaching package: 'rlang'
#> The following object is masked from 'package:magrittr':
#>
#> set_names
#> The following objects are masked from 'package:purrr':
#>
#> %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
#> flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
#> splice
#> The following object is masked from 'package:data.table':
#>
#> :=
trigger_func <- function(target_name) do_something(target_name)
# Helper func
reset_drake_attr <- function(data, command_names = command_names, ...) {
# Reset original 'class' of drake plan
setattr(data, 'class', c('drake_plan', 'tbl_df', 'tbl', 'data.frame'))
# Remove non-drake attributes created by data.table
map( setdiff( names(attributes(data)),
c('names', 'row.names', 'class') ),
~ setattr(data, . , NULL))
# TODO Comment this out temporarily for testing
# setattr(data$command, 'names', command_names)
invisible()
}
plan <- drake_plan(
a = 1,
b = target(2, trigger = trigger(condition = trigger_func('b'))),
c = 3, d = 4,
e = target(5, trigger = trigger(condition = trigger_func('e')))
)
# Manipulate plan with data.table ----------------
my_plan <- drake_plan(a = 1, b = 2, c = 3, d = 4, e = 5)
command_names <- names(my_plan$command) # For testing later
setDT(my_plan)[, trigger := .(.(expr(NA)))] # Pre-populate targets without trigger
map(c('b', 'e'),
~ my_plan [
][target == . , trigger := .(expr(
trigger(condition = trigger_func(!!.))
))])
#> [[1]]
#> target command trigger
#> 1: a 1 NA
#> 2: b 2 <call>
#> 3: c 3 NA
#> 4: d 4 NA
#> 5: e 5 <call>
#>
#> [[2]]
#> target command trigger
#> 1: a 1 NA
#> 2: b 2 <call>
#> 3: c 3 NA
#> 4: d 4 NA
#> 5: e 5 <call>
reset_drake_attr(my_plan)
# Test equality ----------------------------------
plan
#> # A tibble: 5 x 3
#> target command trigger
#> <chr> <expr> <expr>
#> 1 a 1 NA
#> 2 b 2 trigger(condition = trigger_func("b"))
#> 3 c 3 NA
#> 4 d 4 NA
#> 5 e 5 trigger(condition = trigger_func("e"))
my_plan
#> # A tibble: 5 x 3
#> target command trigger
#> <chr> <expr> <expr>
#> 1 a 1 NA
#> 2 b 2 trigger(condition = trigger_func("b"))
#> 3 c 3 NA
#> 4 d 4 NA
#> 5 e 5 trigger(condition = trigger_func("e"))
identical(plan, my_plan)
#> [1] FALSE
all.equal(plan$command, my_plan$command)
#> [1] "names for target but not for current"
# Reason:
names(plan$command)
#> [1] "a" "" "c" "d" ""
command_names # Saved earlier
#> [1] "a" "b" "c" "d" "e"
names(my_plan$command) # data.table removes 'names' of 'my_plan$command'
#> NULL
# Can't test the exact equality of `identical(plan, my_plan)` because only targets without `target` have 'names' on 'command' column
drake_plan(
# without `target`
a = 1, b = 2,
# with `target`
c = target(3),
d = target(4, trigger = trigger(condition = TRUE)),
e = target(func(a), map(func = !!c('x', 'y')))
) %>%
{names(.$command)}
#> [1] "a" "b" "" "" "" ""
# Test without 'names' of 'command' column -------
identical( unname(plan$command), unname(my_plan$command) )
#> [1] TRUE
# Copy objects, remove 'names' of 'command' column and test
plan_test <- plan ; my_plan_test <- my_plan
names(plan_test$command) <- NULL ; names(my_plan_test$command) <- NULL
identical(plan_test, my_plan_test)
#> [1] TRUE
由reprex 包(v0.3.0)于 2019 年 12 月 6 日创建
my_plan $command的名称是必需的吗?{drake} 在内部使用它们吗?我有三个选项,我应该选择哪些选项?
- 忽视
names(my_plan$command) <- my_plan$target
- 保存
command_names <- names(my_plan$command)
并重新设置(像这样)
# Test with 'names' of 'command' column ----------
# --- NOT RUN ---
{
# Put this in helper func, re-run `reset_drake_attr`
setattr(my_plan$command, 'names', command_names)
# (run manipulation steps...)
reset_drake_attr(my_plan, command_names)
identical( names(my_plan$command), command_names )
# TRUE
}