我正在尝试插入 data.table 的多个数字列dt
。它可以用 NA 或类似的东西填充na_locf
。在我的真实数据中可能不止一个因子列。这是我的示例数据:
dt <- structure(list(time = c(0, 0, 0, 0, 0, 0, 0.1, 0.1, 0.1, 0.1),
a = structure(c(1L, NA, NA, 1L, 3L, 2L, NA, 1L,
2L, NA), .Label = c("0", "1", "3"), class = "factor"), b = c(1.28,
1.27, NA, 1.25, NA, 1.25, NA, 1.23, NA, 1.21), c = c(104.45,
NA, 104.45, NA, 104.3, NA, 104.45, NA, 104.38, NA)), row.names = c(NA,
-10L), class = c("data.table", "data.frame"))
time a b c
1: 0.0 0 1.28 104.45
2: 0.0 <NA> 1.27 NA
3: 0.0 <NA> NA 104.45
4: 0.0 0 1.25 NA
5: 0.0 3 NA 104.30
6: 0.0 1 1.25 NA
7: 0.1 <NA> NA 104.45
8: 0.1 0 1.23 NA
9: 0.1 1 NA 104.38
10: 0.1 <NA> 1.21 NA
结果应如下所示:
dtres <- structure(list(time = c(0, 0, 0, 0.01, 0.02, 0.03, 0.04, 0.05,
0.06, 0.07, 0.08, 0.09, 0.1, 0.1), a = structure(c(1L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L), .Label = c("0",
"1", "3"), class = "factor"), b = c(1.2625, 1.2625, 1.2625, 1.25825,
1.254, 1.24975, 1.2455, 1.24125, 1.237, 1.23275, 1.2285, 1.22425,
1.22, 1.22), c = c(104.4, 104.4, 104.4, 104.4015, 104.403, 104.4045,
104.406, 104.4075, 104.409, 104.4105, 104.412, 104.4135, 104.415,
104.415)), row.names = c(NA, -14L), class = c("data.table", "data.frame"))
time a b c
1: 0.00 0 1.26250 104.4000
2: 0.00 3 1.26250 104.4000
3: 0.00 1 1.26250 104.4000
4: 0.01 1 1.25825 104.4015
5: 0.02 1 1.25400 104.4030
6: 0.03 1 1.24975 104.4045
7: 0.04 1 1.24550 104.4060
8: 0.05 1 1.24125 104.4075
9: 0.06 1 1.23700 104.4090
10: 0.07 1 1.23275 104.4105
11: 0.08 1 1.22850 104.4120
12: 0.09 1 1.22425 104.4135
13: 0.10 0 1.22000 104.4150
14: 0.10 1 1.22000 104.4150
对于插值,我使用了这个函数:
library(zoo)
interpTime <- seq(0, 0.1, by = 0.01)
interp_dt <- function(ecuData) {
ecuData <- ecuData[, lapply(.SD, function(x) if (is.numeric(x)) na.approx(x, time, xout = interpTime, method = "linear", ties = mean))]
return(ecuData)
}
但我无法使用数字列进行插值并以所需的方式处理因子列。因子列a
应填写 NA 或na_locf
,我不确定。如果一个时间点有多个因子值,它们也应该存在,因此例如一个时间点有多个行(在我的示例中time = 0
)有没有办法做到这一点?