1

我正在尝试提高以下示例的内存性能:

基线 df 4 行

df <- structure(list(sessionid = structure(c(1L, 2L, 3L, 4L), .Label = 
c("AAA1", "AAA2","AAA3", "AAA4"), class = "factor"), bitrateinbps = c(10000000,
 10000000, 10000000, 10000000), startdate = structure(c(1326758507, 1326758671, 
1326759569, 1326760589), class = c("POSIXct", "POSIXt"), tzone = ""), enddate = 
structure(c(1326765780, 1326758734, 1326760629, 1326761592), class = c("POSIXct", 
"POSIXt"), tzone = "")), .Names = c("sessionid", "bitrateinbps", "startdate", 
"enddate"), row.names = c(NA, 4L), class = 
"data.frame")

用 8 行替换 df

df <- structure(list(sessionid = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), 
.Label = c("AAA1", "AAA2", "AAA3", "AAA4", "AAA5", "AAA6", "AAA7", "AAA8"),
 class = "factor"), bitrateinbps =c(10000000,  10000000, 10000000, 10000000, 
10000000, 10000000, 10000000, 10000000), startdate = structure(c(1326758507,
1326758671, 1326759569, 1326760589, 1326761589, 1326762589, 1326763589, 1326764589), 
class = c("POSIXct", 
"POSIXt"), tzone = ""), enddate = structure(c(1326765780, 1326758734, 1326760629, 
1326761592, 1326767592, 
1326768592, 1326768700, 1326769592), class = c("POSIXct", "POSIXt"), tzone = "")), 
.Names = c("sessionid", 
"bitrateinbps", "startdate", "enddate"), row.names = c(NA, 8L), class = 
"data.frame")

尝试 df 分析内存使用情况,然后再次尝试备用 df

library(xts)

fun0 <- function(i, d) {  
 idx0 <- seq(d$startdate[i],d$enddate[i],1)    # create sequence for index  
 dat0 <- rep(1,length(idx0))  # create data over sequence  
xts(dat0, idx0, dimnames=list(NULL,d$sessionid[i]))  # xts object  
}

# loop over each row and put each row into its own xts object
xl0 <- lapply(1:NROW(df), fun0, d=df)

# merge all the xts objects  
xx0 <- do.call(merge, xl0)

# apply a function (e.g. colMeans) to each 15-minute period  
xa0 <- period.apply(xx0, endpoints(xx0, 'minutes', 15), colSums, na.rm=TRUE)/900  
xa1 <- t(xa0) 

# convert from atomic vector to data frame  
xa1 = as.data.frame(xa1)

# bind to df  
out1 = cbind(df, xa1)

# print aggregate memory usage statistics  
print(paste('R is using', memory.size(), 'MB out of limit', memory.limit(), 'MB'))

# create function to return matrix of memory consumption  
object.sizes <- function()  
{  
 return(rev(sort(sapply(ls(envir=.GlobalEnv), function (object.name)  
  object.size(get(object.name))))))  
}

# print to console in table format  
object.sizes()

结果如下:

4 row df:  
xx0 = 292104 Bytes .... do.call(merge, xl0)  
xl0 = 154648 Bytes .... lapply(1:NROW(df), fun0, d=df)  

8 row df:  
xx0 = 799480 Bytes .... do.call(merge, xl0)  
xl0 = 512808 Bytes .... lapply(1:NROW(df), fun0, d=df)  

我正在为mergeandlapply函数寻找内存效率更高的东西,因此如果有人有任何建议并可以显示替代方案的比较结果,我可以扩展行数。

4

0 回答 0