我会按如下方式处理它:
计算单个周期的函数
calculateForOnePeriod <- function(DT, date.start, date.end, period.name, frmt="%d/%m/%Y", DateCols, SpeciesCols) {
date.start <- as.Date(as.character(date.start), format=frmt)
date.end <- as.Date(as.character(date.end), format=frmt)
# find the relevant rows, by date. Namely starting from the largest (start <= start.date) and ending with the smallest (end >= end.date)
row.index.min <- DT[, max(which(start <= date.start), -1)]
row.index.max <- DT[, min(which(end >= date.end), -1)]
# the `-1` are to indicate out of range
# if both are negative one, date not present at all
# otherwise, if just one of the two are -1, match to the valid value (ie, single row range)
if (row.index.max == -1 && row.index.min == -1) {
return(DT[, c(period=period.name, lapply(.SD, function(x) 0)), .SDcols=SpeciesCols])
} else if (row.index.max == - 1) {
row.index.max <- row.index.min
} else if (row.index.min == - 1) {
row.index.min <- row.index.max
}
DT2 <- DT[row.index.min : row.index.max,
# calculate the weighted averages
{
# n.days are the intersects
n.days <- length(intersect(seq.Date(start, end, by=1), seq.Date(date.start, date.end, by=1)))
lapply(.SD, `*`, n.days)
}
, by=DateCols
, .SDcols=SpeciesCols
]
DT2[, c(period=period.name, lapply(.SD, function(x) sum(x, na.rm=TRUE) / as.numeric(1+date.end-date.start))), .SDcols=SpeciesCols]
}
设置数据表
library(data.table)
# convert to data.table
DT <- data.table(dat)
# grab all of the species columns. Modify this accordingly to your real data
DateCols <- c("start", "end")
SpeciesCols <- setdiff(names(DT), DateCols)
# Make sure your dates are in fact dates (and not, say, just strings or factors)
DT[, start := as.Date(as.character(start), format="%d/%m/%Y")]
DT[, end := as.Date(as.character(end), format="%d/%m/%Y")]
# ensure that data is sorted by start, end
setkeyv(DT, DateCols)
用法:
只需创建一个开始/结束日期的向量并迭代简单的示例:
first.date <- as.Date("01/01/2004", "%d/%m/%Y")
interv <- "month" # needs to be a valid value of `by=` in ?seq.Date
total.periods <- 12 # how many periods to analyze
starting.dates <- seq.Date(from=first.date, by="month", length.out=total.periods+1) # +1 for ending dates
ending.dates <- starting.dates - 1
starting.dates <- head(starting.dates, -1)
ending.dates <- tail(ending.dates, -1)
# sample period.names.. this will need to be modified
period.names <- month.abb[month(starting.dates)]
# Note that format is now "2004-06-01"
frmt.exmp <- "%Y-%m-%d"
## have a look:
data.frame(starting.dates, ending.dates)
# iterate using mapply
res.list <-
mapply(calculateForOnePeriod, date.start=starting.dates, date.end=ending.dates, period.name=period.names
, MoreArgs=list(DT=DT, frmt=frmt.exmp, DateCols=DateCols, SpeciesCols=SpeciesCols), SIMPLIFY=FALSE)
# combine into a single data.table
res <- rbindlist(res.list)
# optionally clean 0's to NA
ZeroRows <- apply(res[, !"period", with=FALSE]==0, 1, all)
res[ZeroRows, c(SpeciesCols) := NA]
结果:
res
period G_rub G_sac P_obl N_dut G_glu G_bul G_men
1: Jan NA NA NA NA NA NA NA
2: Feb NA NA NA NA NA NA NA
3: Mar NA NA NA NA NA NA NA
4: Apr NA NA NA NA NA NA NA
5: May NA NA NA NA NA NA NA
6: Jun 9.533333 60.66667 0.000000 3.90000 3.033333 0.00000 0.8666667
7: Jul 160.741935 18.80645 3.903226 44.00000 126.322581 20.22581 4.6129032
8: Aug 104.774194 29.41935 3.870968 37.41935 40.774194 11.35484 0.5161290
9: Sep NA NA NA NA NA NA NA
10: Oct NA NA NA NA NA NA NA
11: Nov NA NA NA NA NA NA NA
12: Dec NA NA NA NA NA NA NA