1

我正在尝试将多个 group_by 参数传递给 dplyr 函数以及命名变量。了解我需要使用 dplyr 的 quosure 来了解我传递给它的变量。以下代码工作正常:

quantileMaker2 <- function(data, groupCol, calcCol) {
  groupCol <- enquo(groupCol)
  calcCol <- enquo(calcCol)

  data %>%
    group_by(!! groupCol) %>%
      summarise('25%' = currency(quantile(!! calcCol, probs = 0.25), digits = 2L),
            '50%' = currency(quantile(!! calcCol, probs = 0.50), digits = 2L),
            '75%' = currency(quantile(!! calcCol, probs = 0.75), digits = 2L),
            avg = currency(mean(!! calcCol), digits = 2L),
            nAgencies = n_distinct('POSIT ID'), 
            nFTEs = sum(FTEs)
  )
}

quantileMaker2(df, employerClass, TCCperFTE)

但是,当我运行以下命令时,我遇到了问题:

quantileMaker3 <- function(data,...,calcCol) {
  groupCol <- quos(...)
  calcCol <- quo(calcCol)

  data %>%
    group_by(!!! groupCol) %>%
    summarise('25%' = currency(quantile(!! calcCol, probs = 0.25), digits = 2L),
          '50%' = currency(quantile(!! calcCol, probs = 0.50), digits = 2L),
          '75%' = currency(quantile(!! calcCol, probs = 0.75), digits = 2L),
          avg = currency(mean(!! calcCol), digits = 2L),
          nAgencies = n_distinct('POSIT ID'), 
          nFTEs = sum(FTEs)
)
}

返回以下错误:

 Error in summarise_impl(.data, dots) : 
  Evaluation error: anyNA() applied to non-(list or vector) of type 'symbol'. 

样本数据:

Year    employerClass   TCCperFTE   FTEs    POSIT ID
2014    One             5000        20      1
2014    Two             1000        30      2
2015    One             15000       40      1
2015    Two             50000       50      2
2016    One             100000      60      1
2016    Two             500000      70      2

你们可以提供的任何帮助将不胜感激。

4

1 回答 1

4

您尚未提供示例数据,但您的函数在修改为使用mtcars数据框时有效。

library(tidyverse)
library(formattable)

quantileMaker3 <- function(data, calcCol, ...) {
  groupCol <- quos(...)
  calcCol <- enquo(calcCol)

  data %>%
    group_by(!!!groupCol) %>%
    summarise('25%' = currency(quantile(!!calcCol, probs = 0.25), digits = 2L),
              '50%' = currency(quantile(!!calcCol, probs = 0.50), digits = 2L),
              '75%' = currency(quantile(!!calcCol, probs = 0.75), digits = 2L),
              avg = currency(mean(!!calcCol), digits = 2L),
              nAgencies = n_distinct(cyl), 
              nFTEs = sum(hp)
    )
}

quantileMaker3(mtcars, mpg, cyl)
# A tibble: 3 x 7
    cyl `25%`             `50%`             `75%`             avg               nAgencies nFTEs
  <dbl> <S3: formattable> <S3: formattable> <S3: formattable> <S3: formattable>     <int> <dbl>
1    4. $22.80            $26.00            $30.40            $26.66                    1  909.
2    6. $18.65            $19.70            $21.00            $19.74                    1  856.
3    8. $14.40            $15.20            $16.25            $15.10                    1 2929.

使用多个分组参数:

quantileMaker3(mtcars, mpg, cyl, vs)
# A tibble: 5 x 8
# Groups:   cyl [?]
    cyl    vs `25%`             `50%`             `75%`             avg               nAgencies nFTEs
  <dbl> <dbl> <S3: formattable> <S3: formattable> <S3: formattable> <S3: formattable>     <int> <dbl>
1    4.    0. $26.00            $26.00            $26.00            $26.00                    1   91.
2    4.    1. $22.80            $25.85            $30.40            $26.73                    1  818.
3    6.    0. $20.35            $21.00            $21.00            $20.57                    1  395.
4    6.    1. $18.03            $18.65            $19.75            $19.12                    1  461.
5    8.    0. $14.40            $15.20            $16.25            $15.10                    1 2929.

顺便说一句,您可以通过使用嵌套来避免多次调用 quantile。如果任何输出列属于类formattable(​​这是currency函数返回的内容),这将不起作用,因此我更改了函数以创建货币格式列的字符串。

quantileMaker3 <- function(data, calcCol, ..., quantiles=c(0.25,0.5,0.75)) {

  groupCol <- quos(...)
  calcCol <- enquo(calcCol)

  data %>%
    group_by(!!!groupCol) %>%
    summarise(values = list(paste0("$", sprintf("%1.2f", quantile(!!calcCol, probs=quantiles)))),
              qnames = list(sprintf("%1.0f%%", quantiles*100)),
              nAgencies = n_distinct(cyl), 
              nFTEs = sum(hp),
              avg = paste0("$", sprintf("%1.2f", mean(!!calcCol)))
    ) %>% 
    unnest %>% 
    spread(qnames, values) 
}

quantileMaker3(mtcars, mpg, cyl, vs)
# A tibble: 5 x 8
# Groups:   cyl [3]
    cyl    vs nAgencies nFTEs avg    `25%`  `50%`  `75%` 
  <dbl> <dbl>     <int> <dbl> <chr>  <chr>  <chr>  <chr> 
1    4.    0.         1   91. $26.00 $26.00 $26.00 $26.00
2    4.    1.         1  818. $26.73 $22.80 $25.85 $30.40
3    6.    0.         1  395. $20.57 $20.35 $21.00 $21.00
4    6.    1.         1  461. $19.12 $18.03 $18.65 $19.75
5    8.    0.         1 2929. $15.10 $14.40 $15.20 $16.25
于 2018-03-29T19:57:52.347 回答