托马斯的回答比我尝试的三种方法中的任何一种都要好得多。在这里,我将这四种方法与microbenchmark
. 我还没有用实际数据尝试过托马斯的答案。我原来的嵌套 for 循环方法在 22 小时后仍在运行。
Unit: milliseconds
expr min lq median uq max neval
fn.1(x, weights) 98.69133 99.47574 100.5313 101.7315 108.8757 20
fn.2(x, weights) 755.51583 758.12175 762.3775 776.0558 801.9615 20
fn.3(x, weights) 564.21423 567.98822 568.5322 571.0975 575.1809 20
fn.4(x, weights) 367.05862 370.52657 371.7439 373.7367 395.0423 20
#########################################################################################
# create data
set.seed(1234)
n.rows <- 40
n.cols <- 40
n.sample <- n.rows * n.cols
x <- sample(20, n.sample, replace=TRUE)
x.NA <- sample(n.rows*n.cols, 10*(n.sample / n.rows), replace=FALSE)
x[x.NA] <- NA
x <- as.data.frame(matrix(x, nrow = n.rows))
weights <- sample(4, n.sample, replace=TRUE)
weights <- as.data.frame(matrix(weights, nrow = n.rows))
weights
#########################################################################################
# Thomas's function
fn.1 <- function(x, weights){
newx <- reshape(x, direction="long", varying = list(seq(1,(n.cols-1),2), seq(2,n.cols,2)), v.names=c("v1", "v2"))
newwt <- reshape(weights, direction="long", varying = list(seq(1,(n.cols-1),2), seq(2,n.cols,2)), v.names=c("w1", "w2"))
condwtmean <- function(x,y,wtx,wty){
if(xor(is.na(x),is.na(y))){
if(is.na(x))
x <- (y / wty) * wtx # replacement function
if(is.na(y))
y <- (x / wtx) * wty # replacement function
return(weighted.mean(c(x,y),c(wtx,wty)))
}
else if(!is.na(x) & !is.na(y))
return(weighted.mean(c(x,y),c(wtx,wty)))
else
return(NA)
}
newx$wtmean <- mapply(condwtmean, newx$v1, newx$v2, newwt$w1, newwt$w2)
newx2 <- reshape(newx[,c(1,4:5)], v.names = "wtmean", timevar = "time", direction = "wide")
newx2 <- newx2[,2:(n.cols/2+1)]
names(newx2) <- paste('X', 1:(n.cols/2), sep = "")
return(newx2)
}
fn.1.output <- fn.1(x, weights)
#########################################################################################
# nested for-loops with 4 if statements
fn.2 <- function(x, weights){
for(i in 1: (ncol(x)/2)) {
for(j in 1: nrow(x)) {
if( is.na(x[j,(1 + (i-1)*2)]) & !is.na(x[j,(1 + (i-1)*2 + 1)])) x[j,(1 + (i-1)*2 + 0)] = (x[j,(1 + ((i-1)*2 + 1))] / weights[j,(1 + ((i-1)*2 + 1))]) * weights[j,(1 + (i-1)*2 + 0)]
if(!is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) x[j,(1 + (i-1)*2 + 1)] = (x[j,(1 + ((i-1)*2 + 0))] / weights[j,(1 + ((i-1)*2 + 0))]) * weights[j,(1 + (i-1)*2 + 1)]
if( is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) x[j,(1 + (i-1)*2 + 0)] = NA
if( is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) x[j,(1 + (i-1)*2 + 1)] = NA
}
}
x.weights = x * weights
numerator <- sapply(seq(1,ncol(x.weights),2), function(i) {
apply(x.weights[,c(i, i+1)], 1, sum, na.rm=T)
})
denominator <- sapply(seq(1,ncol(weights),2), function(i) {
apply(weights[,c(i, i+1)], 1, sum, na.rm=T)
})
weighted.x <- numerator/denominator
for(i in 1: (ncol(x)/2)) {
for(j in 1: nrow(x) ) {
if( is.na(x[j,(1 + (i-1)*2)]) & !is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = sum(c(x[j,(1 + ((i-1)*2))], x[j,(1 + ((i-1)*2 + 1))]), na.rm = TRUE)
if(!is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = sum(c(x[j,(1 + ((i-1)*2))], x[j,(1 + ((i-1)*2 + 1))]), na.rm = TRUE)
if( is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = NA
}
}
return(weighted.x)
}
fn.2.output <- fn.2(x, weights)
fn.2.output <- as.data.frame(fn.2.output)
names(fn.2.output) <- paste('X', 1:(n.cols/2), sep = "")
#########################################################################################
# nested for-loops with 2 if statements
fn.3 <- function(x, weights){
for(i in 1: (ncol(x)/2)) {
for(j in 1: nrow(x)) {
if( is.na(x[j,(1 + (i-1)*2)]) & !is.na(x[j,(1 + (i-1)*2 + 1)])) x[j,(1 + (i-1)*2 + 0)] = (x[j,(1 + ((i-1)*2 + 1))] / weights[j,(1 + ((i-1)*2 + 1))]) * weights[j,(1 + (i-1)*2 + 0)]
if(!is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) x[j,(1 + (i-1)*2 + 1)] = (x[j,(1 + ((i-1)*2 + 0))] / weights[j,(1 + ((i-1)*2 + 0))]) * weights[j,(1 + (i-1)*2 + 1)]
}
}
x.weights = x * weights
numerator <- sapply(seq(1,ncol(x.weights),2), function(i) {
apply(x.weights[,c(i, i+1)], 1, sum, na.rm=T)
})
denominator <- sapply(seq(1,ncol(weights),2), function(i) {
apply(weights[,c(i, i+1)], 1, sum, na.rm=T)
})
weighted.x <- numerator/denominator
for(i in 1: (ncol(x)/2)) {
for(j in 1: nrow(x) ) {
if( is.na(x[j,(1 + (i-1)*2)]) & !is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = sum(c(x[j,(1 + ((i-1)*2))], x[j,(1 + ((i-1)*2 + 1))]), na.rm = TRUE)
if(!is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = sum(c(x[j,(1 + ((i-1)*2))], x[j,(1 + ((i-1)*2 + 1))]), na.rm = TRUE)
if( is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = NA
}
}
return(weighted.x)
}
fn.3.output <- fn.3(x, weights)
fn.3.output <- as.data.frame(fn.3.output)
names(fn.3.output) <- paste('X', 1:(n.cols/2), sep = "")
#########################################################################################
# my reshape solution
fn.4 <- function(x, weights){
new.x <- reshape(x , direction="long", varying = list(seq(1,(n.cols-1),2), seq(2,n.cols,2)), v.names = c("v1", "v2"))
wt <- reshape(weights, direction="long", varying = list(seq(1,(n.cols-1),2), seq(2,n.cols,2)), v.names = c("w1", "w2"))
new.x$v1 <- ifelse(is.na(new.x$v1), (new.x$v2 / wt$w2) * wt$w1, new.x$v1)
new.x$v2 <- ifelse(is.na(new.x$v2), (new.x$v1 / wt$w1) * wt$w2, new.x$v2)
x2 <- reshape(new.x, direction="wide", varying = list(seq(1,3,2), seq(2,4,2)), v.names = c("v1", "v2"))
x <- x2[,2:(n.cols+1)]
x.weights = x * weights
numerator <- sapply(seq(1,ncol(x.weights),2), function(i) {
apply(x.weights[,c(i, i+1)], 1, sum, na.rm=T)
})
denominator <- sapply(seq(1,ncol(weights),2), function(i) {
apply(weights[,c(i, i+1)], 1, sum, na.rm=T)
})
weighted.x <- numerator/denominator
for(i in 1: (ncol(x)/2)) {
for(j in 1: nrow(x) ) {
if( is.na(x[j,(1 + (i-1)*2)]) & !is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = sum(c(x[j,(1 + ((i-1)*2))], x[j,(1 + ((i-1)*2 + 1))]), na.rm = TRUE)
if(!is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = sum(c(x[j,(1 + ((i-1)*2))], x[j,(1 + ((i-1)*2 + 1))]), na.rm = TRUE)
if( is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = NA
}
}
return(weighted.x)
}
fn.4.output <- fn.4(x, weights)
fn.4.output <- as.data.frame(fn.4.output)
names(fn.4.output) <- paste('X', 1:(n.cols/2), sep = "")
#########################################################################################
rownames(fn.1.output) <- NULL
rownames(fn.2.output) <- NULL
rownames(fn.3.output) <- NULL
rownames(fn.4.output) <- NULL
all.equal(fn.1.output, fn.2.output)
all.equal(fn.1.output, fn.3.output)
all.equal(fn.1.output, fn.4.output)
all.equal(fn.2.output, fn.3.output)
all.equal(fn.2.output, fn.4.output)
all.equal(fn.3.output, fn.4.output)
library(microbenchmark)
microbenchmark(fn.1(x, weights), fn.2(x, weights), fn.3(x, weights), fn.4(x, weights), times=20)
#########################################################################################