0

I am trying to use drake for my workflow. It seems to have a lot of potential, but I noticed that that drake takes a very long time to run, and even simple step that take less than a second when I run "manually", can take 20 seconds or more when they are run with drake.

I'm aware that I did not provide enough details on this problem. Please tell me what kind of details to provide, and I will do so.

The dataset contains protein levels (a few tens) measured in patients undergoing various treatments. The protein levels are read from an ExpressionSet object, and then a linear model (including contrasts) is performed on each of these proteins. Here are the essential parts of the code:

pt_df_for_lm <- function(protein, eset){
  as.data.frame(exprs(eset)[protein,]) %>%
  rownames_to_column(var = "Sample.Name") %>%
  magrittr::set_colnames(c("Sample.Name","pt_level")) %>%
  as_tibble() %>%
  inner_join(pData(eset), by = "Sample.Name") %>% 
  mutate(drug.visit = ifelse(visit_id=="W0", "W0", paste0(drug.dose, ".", visit_id))) %>% 
  mutate(drug.visit = fct_relevel(factor(drug.visit), "W0") ) %>% 
  select(Sample.Name, drug.dose, patient_id, visit_id, drug.visit, pt_level) %>% 
  return()
}


lm_contrasts_drug_vs_placebo <- function(res_lm){

  coef_names <- names(coef(res_lm))

  contrasts_mat <-
    tibble(coef = coef_names) %>% 
    filter(!grepl("patient_id",coef)) %>% 
    mutate(term=make.names(sub("drug.visit","",coef))) %>% 
    inner_join(possible_terms_df) %>% 
    filter(drug!="Placebo") %>% 
    mutate(contrast_name = paste0(make.names(drg.ds),".",week, " - Placebo.0.", week)) %>% 
    mutate(coef_placebo = paste0("drug.visitPlacebo.0.",week)) %>% 
    mutate(contrast_vector = map2(coef, coef_placebo, function(cf_drug, cf_placebo){
      contrast_vector <- rep(0,length(coef_names))
      contrast_vector[which(coef_names==cf_drug)] <- 1
      contrast_vector[which(coef_names==cf_placebo)] <- -1
      return(contrast_vector)
    } )) %>% 
    transmute(contrast_tbl = map2(contrast_name, contrast_vector, function(cname, cvec){
      ctbl <- enframe(cvec, name = NULL)
      names(ctbl) <- cname
      return(ctbl)
    } )) %>% 
    deframe() %>% 
    bind_cols() %>% 
    as.matrix() %>% 
    magrittr::set_rownames(coef_names) %>% 
    t()

  contrast_results_df <-
    multcomp::glht(model=res_lm, linfct = contrasts_mat ) %>% 
    summary() %>% 
    broom::tidy() %>% 
    dplyr::select(-rhs) %>% 
    rename(term = lhs)

  possible_terms_df %>% 
    inner_join(contrast_results_df) %>% 
    return()
}


plan <- drake_plan(
  pt_eset = target(readRDS(paste0(INDIR,"pt_results.rds"))),
  pt_df = target(pt_df_for_lm(prot, pt_eset),
                 transform=map(prot=!!all_proteins)),
  res_pt_lm = target(lm(pt_level ~0 + patient_id + drug.visit, data = pt_df),
                  transform=map(prot, .id=prot)),
  res_pt_lm_df = target(res_pt_lm %>% 
                       broom::tidy() %>% 
                       filter(!grepl("patient_id",term)) %>% 
                       mutate(term = make.names(sub("drug.visit","",term))) %>% 
                       mutate(protein = prot) %>% 
                       select(protein, everything()),
                     transform=map(res_pt_lm, prot, .id=prot)),
  res_pt_lm_contrasts_df = target(lm_contrasts_drug_vs_placebo(res_pt_lm) %>% 
                                    mutate(protein=prot),
                                  transform=map(res_pt_lm, prot, .id=prot)),
  combined_res_pt_lm_df = target(bind_rows(res_pt_lm_df, res_pt_lm_contrasts_df),
                                 transform=combine(res_pt_lm_df, res_pt_lm_contrasts_df)),
  output_res_pt_lm_df = write_csv(combined_res_pt_lm_df,
                                  file_out(!!file.path(OUTDIR,"pt_lm_results.csv"))),
  trace = TRUE
  )

config <- drake_config(plan)
#vis_drake_graph(config)
make(plan, lock_envir=FALSE)

The code is placed within an rmarkdown notebook.

Gil

4

0 回答 0