0

我想根据污染物的每小时值计算每日运行平均值。为此,必须提供至少 16 个有效的每小时测量值。我怎么能那样做?找到 dput 的样本数据如下。

structure(list(X = 1:48, year = c(2007L, 2007L, 2007L, 2007L,  2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,  2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,  2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,  2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,  2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L), date = structure(c(1L,  1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,  1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,  2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("2007-11-01",  "2007-11-02"), class = "factor"), time = c(1L, 2L, 3L, 4L, 5L,  6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,  19L, 20L, 21L, 22L, 23L, 24L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,  9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L,  22L, 23L, 24L), nox = c(2.71700000762939, 3.33100008964539, 4.59100008010864, 
1.43499994277954, 2.29699993133545, 3.44099998474121, 8.74100017547607, 
15.9899997711182, 20.1299991607666, 20.2099990844727, 25.0699996948242, 
19.0900001525879, 14.1700000762939, 16.9500007629395, 22.8899993896484, 
24.3400001525879, 26.0799999237061, 14.3900003433228, 13.4300003051758, 
10.0699996948242, 6.72700023651123, 5.16900014877319, 3.31299996376038, 
2.94199991226196, 2.00600004196167, 2.43099999427795, 2.55299997329712,  NA, 3.38700008392334, 6.25899982452393, NA, 27.7999992370605, 
27.3500003814697, NA, 18.8500003814697, 22.1700000762939, 20.1499996185303,  NA, NA, NA, 34.9700012207031, 24.75, 25.7999992370605, NA, 19.3400001525879, 
39.2400016784668, 36.060001373291, 25.2000007629395), no2 = c(2.78299999237061,  NA, 4.44999980926514, NA, 2.10700011253357,
3.33699989318848, 
8.43299961090088, 14.2299995422363, NA, NA, NA, NA, 11.960000038147,  NA, 19.5300006866455, 21.7999992370605, 24.3299999237061,
13.4799995422363, 
13.0600004196167, 9.87100028991699, 6.3730001449585, 4.99100017547607, 
3.15300011634827, 2.86400008201599, 1.94500005245209, 2.07999992370605, 
2.27999997138977, 3.21600008010864, 3.12100005149841, 5.99599981307983, 
14.7600002288818, 21.2999992370605, 20.9099998474121, 16.8799991607666, 
15.3400001525879, 17.1599998474121, 16.0900001525879, 15.2200002670288, 
18.1900005340576, 21.9300003051758, 32.3699989318848, 24.4300003051758, 
25.4400005340576, 20.5599994659424, 19.0300006866455, 38.9199981689453, 
35.4799995422363, 25.1100006103516)), .Names = c("X", "year",  "date", "time", "nox", "no2"), class = "data.frame", row.names = c(NA, 
-48L))
4

1 回答 1

0

按日期将数据拆分为单独的帧(使用split)。在自定义函数 ( ) 中嵌入 16 个有效测量的要求,mean.fun然后将其应用于每个拆分帧中的感兴趣列。

> dat <- structure(...)

> mean.fun <- function(x){
    if(sum(!is.na(x)) >= 16){
        mean(x, na.rm=TRUE)
    } else {
        NA
    }
}

> sapply(split(dat[c("nox", "no2")], dat$date), sapply, mean.fun)

    2007-11-01 2007-11-02
nox  11.979750   19.90094
no2   9.808941   17.40658

由于很难像上面的解决方案那样消化嵌套sapply的 s 和splits,因此有人可能会建议使用该plyr包的解决方案。不过,我从不记得它的语法,一旦你掌握了它,就会发现使用sapply/会更清晰。split

于 2013-09-09T09:57:20.697 回答