我的简单方法
# Millisecond precision for deltas
options(digits.secs=3)
# Load file
log.file <- read.csv("data/raw.csv", header=T)
log.file$StartTime <- as.POSIXct(log.file$StartTime ,
format="%Y-%m-%d %H:%M:%OS")
log.file$StopTime <- as.POSIXct(log.file$StopTime ,
format="%Y-%m-%d %H:%M:%OS")
range <- seq(min(log.file$StartTime), max(log.file$StopTime), by=1)
getsum <- function(tstamp) {
ret <- nrow(subset(log.file, StartTime<=tstamp & StopTime>=tstamp))
return(ret)
}
dset <- frame()
dset$TIME <- range
dset$COUNT <- lapply(range, getsum)
plot(dset$TIME, dset$COUNT)
更好的解决方案
虽然这不完全符合我之前提出的标准(即不填补时间空白),但它确实给了我一个接近我正在寻找的图表。但是,如果有一个解决方案可以达到我之前要求的两点,我仍然很感兴趣。
# Millisecond precision for deltas
options(digits.secs=3)
# Load file
log.file <- read.csv("data/raw.csv", header=T)
log.file$StartTime <- as.POSIXct(log.file$StartTime ,
format="%Y-%m-%d %H:%M:%OS")
log.file$StopTime <- as.POSIXct(log.file$StopTime ,
format="%Y-%m-%d %H:%M:%OS")
# Create data frames for query start times and query stop times
# and give them +1 and -1 values
queries.start <- data.frame(Time=log.file$StartTime, Value=1)
queries.stop <- data.frame(Time=log.file$StopTime, Value=-1)
# Merge the two together and order by time
queries.both <- rbind(queries.start, queries.stop)
queries.both <- queries.both[with(queries.both, order(Time)), ]
# Create a cumulative sum data frame, and plot the results
queries.sum <- data.frame(Time=queries.both$Time, Queries=cumsum(queries.both$Value))
plot(queries.sum, type="l")