1

当我遇到如何完成它时,我试图回答一个关于堆栈溢出的问题(使用 R 映射多个 ID )。即,如何测试一组前后时间点之间是否存在时间点。

帖子中的用户没有提供可重复的示例,但这是我想出的。我想用数据帧中hidenic_file$hidenic_time的前后时间测试时间点,emtek_file并返回与emtek_id每个时间帧匹配的时间点hidenic_id。发帖人没有提到它,但似乎emtek_id每个人都有可能返回多个hidenic_id

library(zoo)
date_string <- paste("2001", sample(12, 10, 3), sample(28,10), sep = "-")
time_string <- c("23:03:20", "22:29:56", "01:03:30", "18:21:03", "16:56:26",
                 "23:03:20", "22:29:56", "01:03:30", "18:21:03", "16:56:26")

entry_emtek <- strptime(paste(date_string, time_string), "%Y-%m-%d %H:%M:%S")
entry_emtek <- entry_emtek[order(entry_emtek)]
exit_emtek <- entry_emtek + 3600 * 24
emtek_file <- data.frame(emtek_id = 1:10, entry_emtek, exit_emtek)

hidenic_id <- 110380:110479
date_string <- paste("2001", sample(12, 100, replace = TRUE), sample(28,100, replace = T), sep = "-")
time_string <- rep(c("23:03:20", "22:29:56", "01:03:30", "18:21:03", "16:56:26",
                 "23:03:20", "22:29:56", "01:03:30", "18:21:03", "16:56:26"),10)
hidenic_time <- strptime(paste(date_string, time_string), "%Y-%m-%d %H:%M:%S")
hidenic_time <- hidenic_time[order(hidenic_time)]
hidenic_file <- data.frame(hidenic_id, hidenic_time)

##Here is where I fail to write concise and working code to find what I want. 
combined_file <- list() 
for(i in seq(hidenic_file[,1])) {
  for(j in seq(emtek_file[,1])) {
    if(length(zoo(1, emtek_file[j,2:3]) + zoo(1,hidenic_file[i,2])) == 0) {next}
    if(length(zoo(1, emtek_file[j,2:3]) + zoo(1,hidenic_file[i,2])) == 1) {combined_file[[i]] < c(combinedfile[[i]],emtek_file[j,1])}
  }
  names(combined_file)[i] <- hidenic_file[i,1]
}
4

1 回答 1

1

由于您没有提供预期的结果,因此我不确定您想要做什么。这是使用IRanges包的解决方案。初读可能并不容易理解,但找到连续间隔的重叠非常有用。

library(IRanges)
## create a time intervals 
subject <- IRanges(as.numeric(emtek_file$entry_emtek),
        as.numeric(emtek_file$exit_emtek))
## create a time intervals (start=end here)
query <- IRanges(as.numeric(hidenic_file$hidenic_time),
        as.numeric(hidenic_file$hidenic_time))
## find overlaps and extract rows (both time point and intervals)  
emt.ids <- subjectHits(findOverlaps(query,subject))
hid.ids <- queryHits(findOverlaps(query,subject))
cbind(hidenic_file[hid.ids,],emtek_file[emt.ids,])

 hidenic_id        hidenic_time emtek_id         entry_emtek          exit_emtek
8      110387 2001-03-13 22:29:56        3 2001-03-13 22:29:56 2001-03-14 22:29:56
9      110388 2001-03-14 01:03:30        3 2001-03-13 22:29:56 2001-03-14 22:29:56
41     110420 2001-06-09 16:56:26        7 2001-06-09 16:56:26 2001-06-10 16:56:26

ps:安装包:

  source("http://bioconductor.org/biocLite.R")
  biocLite("IRanges")
于 2013-06-20T23:40:28.180 回答