昨天我发布了一个问题,询问如何获得数据集相邻列中观察值的平均值:
今天我意识到我实际上需要加权平均值。我尝试将上面的答案修改为修改后的情况,但仍然不能很好地理解 apply 函数系列以轻松做到这一点。
我已经编写了代码来获取下面示例数据集的加权平均值,并且可能可以将该代码与我的真实数据一起使用。尽管如此,如果有人能够说明如何在此处使用 apply 系列函数进行加权平均,我认为这将大大有助于提高我的理解和编码能力。无论如何,感谢您过去和未来的所有帮助和想法。
x = read.table(text = "
site yr1 yr2 yr3 yr4
1 10 15 6 8
2 10 20 30 40
3 5 NA 2 3
4 100 100 NA NA",
sep = "", header = TRUE)
x
weights = read.table(text = "
site yr1 yr2 yr3 yr4
1 2 4 1 3
2 2 2 4 2
3 3 2 2 3
4 4 2 2 4",
sep = "", header = TRUE)
weights
x.weights = x * weights
numerator <- matrix(NA, ncol=((ncol(x.weights)/2)+1), nrow=nrow(x.weights))
for(i in 1: ((ncol(weights)-1)/2)) {
for(j in 1: nrow(weights) ) {
numerator[j, 1 ] <- x[j,1]
numerator[j,(i+1)] <- sum(c(x.weights[j,(1 + ((i-1)*2 + 1))], x.weights[j,(1 + ((i-1)*2 + 2))]), na.rm = TRUE)
}
}
numerator
denominator <- matrix(NA, ncol=((ncol(weights)/2)+1), nrow=nrow(weights))
for(i in 1: ((ncol(weights)-1)/2)) {
for(j in 1: nrow(weights) ) {
denominator[j, 1 ] <- x[j,1]
denominator[j,(i+1)] <- sum(c(weights[j,(1 + ((i-1)*2 + 1))], weights[j,(1 + ((i-1)*2 + 2))]), na.rm = TRUE)
}
}
denominator
weighted.ave <- numerator[,2:ncol(numerator)] / denominator[,2:ncol(denominator)]
weighted.ave
# insert value from x if one of a pair is missing
# insert NA if both in a pair are missing
adj.weighted.ave <- weighted.ave
for(i in 1: ((ncol(x)-1)/2)) {
for(j in 1: nrow(x) ) {
if( is.na(x[j,(1 + (i-1)*2 + 1)]) & !is.na(x[j,(1 + (i-1)*2 + 2)])) adj.weighted.ave[j,i] = sum(c(x[j,(1 + ((i-1)*2 + 1))], x[j,(1 + ((i-1)*2 + 2))]), na.rm = TRUE)
if(!is.na(x[j,(1 + (i-1)*2 + 1)]) & is.na(x[j,(1 + (i-1)*2 + 2)])) adj.weighted.ave[j,i] = sum(c(x[j,(1 + ((i-1)*2 + 1))], x[j,(1 + ((i-1)*2 + 2))]), na.rm = TRUE)
if( is.na(x[j,(1 + (i-1)*2 + 1)]) & is.na(x[j,(1 + (i-1)*2 + 2)])) adj.weighted.ave[j,i] = NA
}
}
adj.weighted.ave
# [,1] [,2]
# [1,] 13.33333 7.50000
# [2,] 15.00000 33.33333
# [3,] 5.00000 2.60000
# [4,] 100.00000 NA