我正在尝试进行一些参数化dplyr
操作。表达问题根源的最简单的可重现示例是:
# Data
test <- data.frame(group = rep(1:5, each = 2),
value = as.integer(c(NA, NA, 2, 3, 3, 5, 7, 8, 9, 0)))
> test
group value
1 1 NA
2 1 NA
3 2 2
4 2 3
5 3 3
6 3 5
7 4 7
8 4 8
9 5 9
10 5 0
# Summarisation example, this is what I'd like to parametrise
# so that I can pass in functions and grouping variables dynamically
test.summary <- test %>%
group_by(group) %>%
summarise(group.mean = mean(value, na.rm = TRUE))
> test.summary
Source: local data frame [5 x 2]
group group.mean
<int> <dbl>
1 1 NaN
2 2 2.5
3 3 4.0 # Correct results
4 4 7.5
5 5 4.5
这就是我一个人走了多远
# This works fine, but notice there's no 'na.rm = TRUE' passed in
doSummary <- function(d_in = data, func = 'mean', by = 'group') {
# d_in: data in
# func: required function for summarising
# by: the variable to group by
# NOTE: the summary is always for the 'value' column in any given dataframe
# Operations for summarise_
ops <- interp(~f(value),
.values = list(f = as.name(func),
value = as.name('value')))
d_out <- d_in %>%
group_by_(by) %>%
summarise_(.dots = setNames(ops, func))
}
> doSummary(test)
Source: local data frame [5 x 2]
group mean(value)
<int> <dbl>
1 1 NA
2 2 2.5
3 3 4.0
4 4 7.5
5 5 4.5
尝试使用“na.rm”参数
# When I try passing in the 'na.rm = T' parameter it breaks
doSummary.na <- function(d_in = data, func = 'mean', by = 'group') {
# Doesn't work
ops <- interp(~do.call(f, args),
.values = list(f = func,
args = list(as.name('value'), na.rm = TRUE)))
d_out <- d_in %>%
group_by_(by) %>%
summarise_(.dots = setNames(ops, func))
}
> doSummary.na(test)
Error: object 'value' not found
非常感谢您的帮助!