0

我的目标是定制 {drake} 计划,以减少简单和复杂情况下的代码重复,例如,drake::trigger根据计划的特定条件设置多个目标(即通过检测目标名称以编程方式识别要设置的目标)在查询 Web API 或其他更复杂的情况时以 (data) _raw 结尾)。为简洁起见,此处未显示 * 以编程方式识别目标的各个方面,下面的代码仅使用手动输入的列名称。我更喜欢使用 {data.table} 来操作计划,而不是 {dplyr}。

例如,有没有办法drake::trigger为多个目标设置?还是有更好的方法来完成工作?

我在下面提出了一个简单的解决方案,即使用熟悉的data.frame 样式方法手动操作 drake 计划。简化版的 reprex

library(drake)
library(data.table)
library(purrr)
#> 
#> Attaching package: 'purrr'
#> The following object is masked from 'package:data.table':
#> 
#>     transpose
library(magrittr)
#> 
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#> 
#>     set_names
library(rlang)
#> 
#> Attaching package: 'rlang'
#> The following object is masked from 'package:magrittr':
#> 
#>     set_names
#> The following objects are masked from 'package:purrr':
#> 
#>     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
#>     flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
#>     splice
#> The following object is masked from 'package:data.table':
#> 
#>     :=

trigger_func <- function(target_name) do_something(target_name)
# Helper func
reset_drake_attr <- function(data, command_names = command_names, ...) {
  # Reset original 'class' of drake plan
  setattr(data, 'class', c('drake_plan', 'tbl_df', 'tbl', 'data.frame'))
  # Remove non-drake attributes created by data.table
  map( setdiff( names(attributes(data)),
                c('names', 'row.names', 'class') ),
       ~ setattr(data, . , NULL))
  # TODO Comment this out temporarily for testing
  # setattr(data$command, 'names', command_names)
  invisible()
}

plan <- drake_plan(
  a = 1,
  b = target(2, trigger = trigger(condition = trigger_func('b'))),
  c = 3, d = 4,
  e = target(5, trigger = trigger(condition = trigger_func('e')))
)

# Manipulate plan with data.table ----------------

my_plan <- drake_plan(a = 1, b = 2, c = 3, d = 4, e = 5)

command_names <- names(my_plan$command)      # For testing later
setDT(my_plan)[, trigger := .(.(expr(NA)))]  # Pre-populate targets without trigger

map(c('b', 'e'),
    ~ my_plan [
      ][target == . , trigger := .(expr(
          trigger(condition = trigger_func(!!.))
      ))])
#> [[1]]
#>    target command trigger
#> 1:      a       1      NA
#> 2:      b       2  <call>
#> 3:      c       3      NA
#> 4:      d       4      NA
#> 5:      e       5  <call>
#> 
#> [[2]]
#>    target command trigger
#> 1:      a       1      NA
#> 2:      b       2  <call>
#> 3:      c       3      NA
#> 4:      d       4      NA
#> 5:      e       5  <call>

reset_drake_attr(my_plan)

# Test equality ----------------------------------

plan
#> # A tibble: 5 x 3
#>   target command trigger                               
#>   <chr>  <expr>  <expr>                                
#> 1 a      1       NA                                    
#> 2 b      2       trigger(condition = trigger_func("b"))
#> 3 c      3       NA                                    
#> 4 d      4       NA                                    
#> 5 e      5       trigger(condition = trigger_func("e"))
my_plan
#> # A tibble: 5 x 3
#>   target command trigger                               
#>   <chr>  <expr>  <expr>                                
#> 1 a      1       NA                                    
#> 2 b      2       trigger(condition = trigger_func("b"))
#> 3 c      3       NA                                    
#> 4 d      4       NA                                    
#> 5 e      5       trigger(condition = trigger_func("e"))

identical(plan, my_plan)
#> [1] FALSE
all.equal(plan$command, my_plan$command)
#> [1] "names for target but not for current"
# Reason: 
names(plan$command)
#> [1] "a" ""  "c" "d" ""
command_names           # Saved earlier
#> [1] "a" "b" "c" "d" "e"
names(my_plan$command)  # data.table removes 'names' of 'my_plan$command'
#> NULL

# Can't test the exact equality of `identical(plan, my_plan)` because only targets without `target` have 'names' on 'command' column
drake_plan(
  # without `target`
  a = 1, b = 2,
  # with `target`
  c = target(3),
  d = target(4, trigger = trigger(condition = TRUE)),
  e = target(func(a), map(func = !!c('x', 'y')))
) %>%
  {names(.$command)}
#> [1] "a" "b" ""  ""  ""  ""

# Test without 'names' of 'command' column -------

identical( unname(plan$command), unname(my_plan$command) )
#> [1] TRUE
# Copy objects, remove 'names' of 'command' column and test
plan_test <- plan                ; my_plan_test <- my_plan
names(plan_test$command) <- NULL ; names(my_plan_test$command) <- NULL
identical(plan_test, my_plan_test)
#> [1] TRUE

reprex 包(v0.3.0)于 2019 年 12 月 6 日创建

my_plan $command的名称是必需的吗?{drake} 在内部使用它们吗?我有三个选项,我应该选择哪些选项?

  • 忽视
  • names(my_plan$command) <- my_plan$target
  • 保存command_names <- names(my_plan$command)并重新设置(像这样)
# Test with 'names' of 'command' column ----------

# --- NOT RUN ---
{
  # Put this in helper func, re-run `reset_drake_attr`
  setattr(my_plan$command, 'names', command_names)

  # (run manipulation steps...)

  reset_drake_attr(my_plan, command_names)  
  identical( names(my_plan$command), command_names )
  # TRUE 
}

4

0 回答 0