我正在处理随时间变化的数据集,需要计算峰值变化发生的时间。我遇到了一个问题,因为某些科目缺少数据(NA)。
例子:
library(dplyr)
Data <- structure(list(Subject = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L), .Label = c("1", "10", "11", "12", "13", "14", "16",
"17", "18", "19", "2", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "3", "31", "32", "4", "5", "7", "8", "9"), class = "factor"),
Close = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L
), .Label = c("High Predictability", "Low Predictability"
), class = "factor"), SOA = structure(c(2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L), .Label = c("Long SOA", "Short SOA"), class = "factor"),
Time = c(-66.68, -66.68, -66.68, -66.68, -33.34, -33.34,
-33.34, -33.34, 0, 0, 0, 0, 33.34, 33.34, 33.34, 33.34, 66.68,
66.68, 66.68, 66.68, -66.68, -66.68, -66.68, -66.68, -33.34,
-33.34, -33.34, -33.34, 0, 0, 0, 0, 33.34, 33.34, 33.34,
33.34, 66.68, 66.68, 66.68, 66.68), Pcent_Chng = c(0.12314,
0.048254, -0.098007, 0.023216, 0.20327, 0.08338, -0.15157,
0.030008, 0.26442, 0.12019, -0.22878, 0.035547, 0.31849,
0.15488, -0.26887, 0.038992, 0.39489, 0.15112, -0.31185,
0.02144, NA, 0.046474, NA, 0.17541, NA, 0.14975, NA, 0.3555,
NA, -0.1736, NA, 0.72211, NA, -0.32201, NA, 1.0926, NA, -0.39551,
0.72211, 1.4406)), class = "data.frame", row.names = c(NA, -40L
), .Names = c("Subject", "Close", "SOA", "Time", "Pcent_Chng"
))
我在以下尝试中遇到错误:
Data %>%
group_by(Subject,Close,SOA) %>%
summarize(Peak_Pcent = max(Pcent_Chng),
Peak_Latency = Time[which.max(Pcent_Chng)])
错误是:
Error in summarise_impl(.data, dots) :
Column `Peak_Latency` must be length 1 (a summary value), not 0
这似乎是由于仅在某些SOA
情况下存在的 NA。与我的实际数据一起使用complete.cases()
过于激进,并且会删除太多数据。
是否有忽略NA的解决方法?