我想定义一个通用函数
func_boxplot2 <- function(tmp, xvar, yvar, groupvar)
{
xvar <- enquo(xvar)
yvar <- enquo(yvar)
groupvar <- enquo(groupvar)
# If variable yield exists, put concentrations to NA for all yields < annual_yield_thres
if( "yield" %in% names(tmp) )
{
tmp <- tmp %>%
mutate_at(vars(!!yvar), ~ifelse(round(yield, 0) < 85, NA, .))
}
# Compute IQR for each year
tmp_iqr <- tmp %>%
group_by(!!groupvar) %>%
summarise(iqr=IQR(!!yvar, na.rm = TRUE))
p <- ggplot(data = tmp %>% mutate_at(vars(!!yvar), ~ifelse(tmp_iqr[which(tmp_iqr[[!!groupvar]] %in% (!!xvar)),]$iqr == 0, . + runif(1, -0.01, 0.01), . )), aes(x = !!xvar, y = !!yvar))
p <- p + stat_boxplot(aes(group = !!groupvar), na.rm = TRUE, coef = 10000) # Trick (large unrealistic coef value) so whiskers end at min(y) & max(y)
p <- p + geom_boxplot(na.rm = TRUE, outlier.shape = NA)
return(p)
}
即使IQR为0,它也能够绘制扩展到最小/最大值的箱线图晶须。我试图通过向有罪数据添加微小的随机数(低于显着性水平)来避免IQR = 0来实现这一点。
但是,我一定错过了关于 quosure 的语法,因为运行这个函数
func_boxplot2(data, date, days, date)
与数据集
structure(list(date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L), .Label = c("2010", "2011", "2012",
"2013", "2014", "2015", "2016", "2017", "2018", "2019"), class = c("ordered",
"factor")), station = c("41B011", "41MEU1", "41N043", "41R001",
"41R012", "41WOL1", "41B011", "41MEU1", "41N043", "41R001", "41R012",
"41WOL1", "41B011", "41MEU1", "41N043", "41R001", "41R012", "41WOL1",
"41B011", "41MEU1", "41N043", "41R001", "41R012", "41WOL1", "41B011",
"41MEU1", "41N043", "41R001", "41R012", "41WOL1", "41B011", "41MEU1",
"41N043", "41R001", "41R012", "41WOL1", "41B011", "41MEU1", "41N043",
"41R001", "41R012", "41WOL1", "41B011", "41MEU1", "41N043", "41R001",
"41R012", "41WOL1", "41B011", "41MEU1", "41N043", "41R001", "41R012",
"41WOL1", "41B011", "41MEU1", "41N043", "41R001", "41R012", "41WOL1"
), days = c(16, 15, 45, 26, 14, 14, 32, 7, 87, 42, 24, 23, 25,
25, 55, 29, 29, 16, 11, 14, 58, 21, 19, 10, 10, 14, 33, 18, 10,
7, 9, 10, 19, 7, 8, 7, 1, 5, 15, 8, 1, 4, 5, 6, 14, 6, 5, 5,
3, 5, 19, 8, 4, 5, 3, 4, 16, 3, 1, 3), yield = c(98.4817351598173,
49.4520547945205, 95.8561643835616, 97.6712328767123, 98.2648401826484,
95.1598173515982, 97.8767123287671, 27.9109589041096, 98.310502283105,
98.972602739726, 97.203196347032, 96.2100456621005, 98.7818761384335,
96.7554644808743, 97.4954462659381, 98.8046448087432, 98.747723132969,
98.3037340619308, 99.0525114155251, 96.1986301369863, 97.1004566210046,
96.4954337899543, 96.3698630136986, 98.2077625570776, 96.62100456621,
98.3675799086758, 95.6963470319635, 96.8835616438356, 93.5844748858447,
87.8196347031963, 91.2328767123288, 92.5570776255708, 81.5182648401827,
82.7739726027397, 90.1826484018265, 87.1461187214612, 87.2153916211293,
92.9986338797814, 94.6948998178506, 85.5760473588342, 92.3611111111111,
96.2204007285975, 86.3698630136986, 86.4269406392694, 87.796803652968,
93.2762557077626, 96.6438356164384, 95.6164383561644, 71.3812785388128,
93.7442922374429, 96.3698630136986, 97.2602739726027, 95.7876712328767,
94.7146118721461, 87.6141552511416, 43.0821917808219, 88.6872146118722,
92.6826484018265, 90.365296803653, 86.541095890411), environ = structure(c(5L,
4L, 6L, 3L, 5L, 3L, 5L, 4L, 6L, 3L, 5L, 3L, 5L, 4L, 6L, 3L, 5L,
3L, 5L, 4L, 6L, 3L, 5L, 3L, 5L, 4L, 6L, 3L, 5L, 3L, 5L, 4L, 6L,
3L, 5L, 3L, 5L, 4L, 6L, 3L, 5L, 3L, 5L, 4L, 6L, 3L, 5L, 3L, 5L,
4L, 6L, 3L, 5L, 3L, 5L, 4L, 6L, 3L, 5L, 3L), .Label = c("Urbain avec très forte influence du trafic",
"Urbain avec forte influence du trafic", "Urbain avec influence modérée du trafic",
"Urbain avec faible influence du trafic", "Urbain avec très faible influence du trafic",
"Industriel avec influence modérée du trafic"), class = "factor")), row.names = c(NA,
-60L), class = c("tbl_df", "tbl", "data.frame"))
给我以下错误
Error: Problem with `mutate()` input `days`.
x Must extract column with a single valid subscript.
x Subscript `date` has size 60 but must be size 1.
ℹ Input `days` is `(structure(function (..., .x = ..1, .y = ..2, . = ..1) ...`.
请问我的语法有什么问题?
非常感谢,
一种。
======== 更新 ==========
使用建议的更新功能
func_boxplot2 <- function(tmp, xvar, yvar, groupvar)
{
# If variable yield exists, put concentrations to NA for all yields < annual_yield_thres
if("yield" %in% names(tmp)) {
tmp <-
tmp %>%
mutate(across({{yvar}}, ~ifelse(round(yield, 0) < 85, NA, .)))
}
tmp <-
tmp %>%
group_by({{groupvar}}) %>%
mutate(
across({{yvar}}, function (x) {
ifelse(
IQR({{yvar}}, na.rm = TRUE) == 0,
x + runif(1, -0.01,0.01),
x
)
})
)
ggplot(tmp, aes(x = {{xvar}}, y = {{yvar}})) +
stat_boxplot(aes(group = {{groupvar}}), na.rm = TRUE, coef = 10000) +
geom_boxplot(na.rm = TRUE, outlier.shape = NA)
}
结果如下图
正如我在评论中所说,似乎处理tmp
同一年所有行的输出是今年的第一个值yvar
,这解释了情节。事实上,评论这个块给出了下图