1

昨天我发布了一个问题,询问如何获得数据集相邻列中观察值的平均值:

R中的平均列对

今天我意识到我实际上需要加权平均值。我尝试将上面的答案修改为修改后的情况,但仍然不能很好地理解 apply 函数系列以轻松做到这一点。

我已经编写了代码来获取下面示例数据集的加权平均值,并且可能可以将该代码与我的真实数据一起使用。尽管如此,如果有人能够说明如何在此处使用 apply 系列函数进行加权平均,我认为这将大大有助于提高我的理解和编码能力。无论如何,感谢您过去和未来的所有帮助和想法。

x = read.table(text = "
  site     yr1  yr2  yr3  yr4
    1      10   15    6    8
    2      10   20   30   40
    3       5   NA    2    3
    4     100  100   NA   NA", 
sep = "", header = TRUE)

x

weights = read.table(text = "
  site    yr1  yr2  yr3  yr4
    1       2    4    1    3
    2       2    2    4    2
    3       3    2    2    3
    4       4    2    2    4", 
sep = "", header = TRUE)

weights

x.weights = x * weights

numerator <- matrix(NA, ncol=((ncol(x.weights)/2)+1), nrow=nrow(x.weights))

for(i in 1: ((ncol(weights)-1)/2)) {
  for(j in 1:   nrow(weights)      ) {

    numerator[j,   1 ] <- x[j,1]
    numerator[j,(i+1)] <- sum(c(x.weights[j,(1 + ((i-1)*2 + 1))], x.weights[j,(1 + ((i-1)*2 + 2))]), na.rm = TRUE) 

  }
}

numerator

denominator <- matrix(NA, ncol=((ncol(weights)/2)+1), nrow=nrow(weights))

for(i in 1: ((ncol(weights)-1)/2)) {
  for(j in 1:   nrow(weights)      ) {

    denominator[j,   1 ] <- x[j,1]
    denominator[j,(i+1)] <- sum(c(weights[j,(1 + ((i-1)*2 + 1))], weights[j,(1 + ((i-1)*2 + 2))]), na.rm = TRUE) 

  }
}

denominator

weighted.ave <- numerator[,2:ncol(numerator)] / denominator[,2:ncol(denominator)]
weighted.ave

# insert value from x if one of a pair is missing
# insert NA if both in a pair are missing

adj.weighted.ave <- weighted.ave

for(i in 1: ((ncol(x)-1)/2)) {
  for(j in 1:   nrow(x)      ) {

    if( is.na(x[j,(1 + (i-1)*2 + 1)]) & !is.na(x[j,(1 + (i-1)*2 + 2)])) adj.weighted.ave[j,i] =  sum(c(x[j,(1 + ((i-1)*2 + 1))], x[j,(1 + ((i-1)*2 + 2))]), na.rm = TRUE) 
    if(!is.na(x[j,(1 + (i-1)*2 + 1)]) &  is.na(x[j,(1 + (i-1)*2 + 2)])) adj.weighted.ave[j,i] =  sum(c(x[j,(1 + ((i-1)*2 + 1))], x[j,(1 + ((i-1)*2 + 2))]), na.rm = TRUE) 
    if( is.na(x[j,(1 + (i-1)*2 + 1)]) &  is.na(x[j,(1 + (i-1)*2 + 2)])) adj.weighted.ave[j,i] =  NA 

 }
}

adj.weighted.ave

#           [,1]     [,2]
# [1,]  13.33333  7.50000
# [2,]  15.00000 33.33333
# [3,]   5.00000  2.60000
# [4,] 100.00000       NA
4

2 回答 2

1

使用上一个问题的答案中的元素:

numerator <- sapply(seq(2,ncol(x.weights),2), function(i) {
  apply(x.weights[,c(i, i+1)], 1, sum, na.rm=T)
})

denominator <- sapply(seq(2,ncol(weights),2), function(i) {
  apply(weights[,c(i, i+1)], 1, sum, na.rm=T)
})

numerator/denominator
于 2012-12-07T06:43:25.553 回答
0
 apply(x, 1, function(rw) weighted.mean( rw[2:5], 
                     weights=weights[rw["site"], 2:5 ] ,na.rm=TRUE) )
[1]   9.750000  25.000000   3.333333 100.000000

这确实取决于与 row.names 匹配的站点编号。

于 2012-12-07T02:54:13.390 回答