3

我正在计算“同时”记录的 UTM 位置之间的距离(以米为单位),但我遇到了问题。它现在写的方式我只计算只有一个“时间最近”的个体之间的距离。我希望它计算所有及时“接近”的个体之间的距离。

在我的示例中,我有 3 只驼鹿和 3 只狼。我想拿驼鹿 1 并计算同时记录的 wolf 1 然后 wolf 2 然后 wolf 3 位置之间的距离。现在脚本只搜索任何狼之间的绝对最小时间差并计算那只狼的距离而不是所有其他人。

这是我的测试数据:

驼鹿位置数据:

structure(list(id = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L), .Label = c("F07001", 
"F07010", "M07012"), class = "factor"), x = c(1482445L, 1481274L, 
1481279L, 1481271L, 1480849L, 1480881L, 1480883L, 1480880L, 1482448L, 
1482494L, 1482534L, 1482534L, 1482553L, 1482555L, 1482414L, 1482852L, 
1476120L, 1476104L, 1476101L), y = c(6621768L, 6619628L, 6619630L, 
6619700L, 6620321L, 6620427L, 6620438L, 6620423L, 6616403L, 6616408L, 
6616395L, 6616408L, 6616406L, 6616418L, 6616755L, 6616312L, 6623655L, 
6623646L, 6623652L), date = structure(c(1173088800, 1173096000, 
1173103260, 1173110400, 1173117600, 1173211200, 1173218400, 1173139200, 
1173088800, 1173096000, 1173103260, 1173110400, 1173117600, 1173211200, 
1173218400, 1173139200, 1173270600, 1173277800, 1173282960), class = c("POSIXct", 
"POSIXt"), tzone = "UTC")), .Names = c("id", "x", "y", "date"
), row.names = c(NA, -19L), class = "data.frame")

狼定位数据:

structure(list(id = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L), .Label = c("HF7572", 
"Htest", "UM1347"), class = "factor"), x = c(1480610L, 1480640L, 
1480613L, 1480613L, 1480555L, 1480567L, 1480627L, 1480532L, 1480593L, 
1484394L, 1484394L, 1483940L, 1483933L, 1483935L, 1483930L, 1483855L, 
1483793L, 1483802L, 1484392L, 1483855L), y = c(6619853L, 6619739L, 
6619759L, 6619862L, 6619838L, 6619772L, 6619902L, 6619899L, 6619887L, 
6619589L, 6619602L, 6619899L, 6619907L, 6619905L, 6619896L, 6619834L, 
6619702L, 6619672L, 6619558L, 6619834L), date = structure(c(1173088800, 
1173096060, 1173103440, 1173111600, 1173117780, 1173213600, 1173218400, 
1173141120, 1173266100, 1173095940, 1173099600, 1173103200, 1173106920, 
1173110400, 1173208800, 1173211200, 1173222000, 1173266100, 1173362100, 
1173211200), class = c("POSIXct", "POSIXt"), tzone = "UTC")), .Names = c("id", 
"x", "y", "date"), row.names = c(NA, -20L), class = "data.frame")

到目前为止,这是我的脚本:

mloc=read.csv("moose.csv", head = T)
wloc=read.csv("wolf.csv", head=T)
mloc$date<-as.POSIXct(strptime(mloc$date,"%Y-%m-%d %H:%M"),tz="UTC")
wloc$date<-as.POSIXct(strptime(wloc$date,"%Y-%m-%d %H:%M"),tz="UTC")

#sort the data sequentially by date time then convert to number
Sortmoose = mloc[order(mloc$date),]
Sortwolf = wloc[order(wloc$date),]
m <- as.numeric(Sortmoose$date)
w <- as.numeric(Sortwolf$date)

#Creates index of the time intervals
id <- findInterval(m, w, all.inside=TRUE)
id_min <- ifelse(abs(m-w[id])<abs(m-w[id+1]), id, id+1)
Sortmoose$wolfID = Sortwolf$id[id_min]
Sortmoose$wolfdate =Sortwolf$date[id_min]
Sortmoose$wolfx = Sortwolf$x[id_min]
Sortmoose$wolfy = Sortwolf$y[id_min]
Sortmoose$dist= sqrt((Sortmoose$wolfx-Sortmoose$x)^2+(Sortmoose$wolfy-Sortmoose$y)^2)

只要位置是在“同一”时间记录的,我想计算每对驼鹿/狼对之间的距离。我希望输出包含驼鹿信息和相关的狼信息以及这两点之间的距离(以米为单位)。我也想要时差,所以我可以过滤掉那些> 45分钟或类似的东西,但这是我认为我以后可以做的事情。基本上类似于:mooseID mooseDate mooseX mooseY wolfID wolfDate wolfX wolfY Distance(m) TimeDiff (min)

4

2 回答 2

2

新的解决方案。这是执行您想要的代码(近似匹配)。关键思想是创建一个带有新列的新数据表,date1这样对于date = 05:17:13原始数据中的每个,它将具有date1 = 04:00:00,05:00:0006:00:00(以及所有其他列重复),然后对这个新列进行合并。这将保证原始数据中彼此相隔一小时内的每两个事件将被合并。

之后我们只计算距离和时间差。

请注意,使用data.table对速度至关重要,因为您的数据帧太大 - 使用常规data.frame会太慢。

library(data.table)
library(lubridate)

mloc <- data.table(mloc)
wloc <- data.table(wloc)

# Returns a new data table with one new column (date1) and length(range)
# rows for each row in the initial data table, duplicating all other fields.
# Example: for row with date = '2013-01-15 05:17:23' and for the default range
# argument it will add rows with date1 = '2013-01-15 04:00:00', '2013-01-15 05:00:00'
# and '2013-01-15 06:00:00'
AddTimeBoundaries <- function(dt, range = -1:1) {
  dt1 <- rbindlist(lapply(range, 
             function(x) data.table(id = dt$id, date = dt$date, 
                        date1 = floor_date(dt$date, 'hour') +
                        hours(x))))
  setkey(dt1, id, date)
  setkey(dt, id, date)
  result <- dt[dt1]
  setkey(result, date1)
  result
}

mloc.1 <- AddTimeBoundaries(mloc)
wloc.1 <- AddTimeBoundaries(wloc)

x <- mloc.1[wloc.1, allow.cartesian = TRUE][!is.na(id)]
result <- unique(x[, list(id, date, x, y, id.1, date.1, x.1, y.1, 
              distance = sqrt((x-x.1)^2 + (y-y.1)^2),
              time.diff = date - date.1)])

结果在 1 小时内包含所有事件(有时在 2 小时内,但您可以轻松过滤掉这些事件)。

> head(result, 10)
        id                date       x       y   id.1              date.1     x.1     y.1  distance  time.diff
1: F07001 2007-03-05 10:00:00 1482445 6621768 HF7572 2007-03-05 10:00:00 1480610 6619853 2652.2538     0 secs
2: M07012 2007-03-05 10:00:00 1482448 6616403 HF7572 2007-03-05 10:00:00 1480610 6619853 3909.0592     0 secs
3: F07001 2007-03-05 10:00:00 1482445 6621768 UM1347 2007-03-05 11:59:00 1484394 6619589 2923.4640 -7140 secs
4: M07012 2007-03-05 10:00:00 1482448 6616403 UM1347 2007-03-05 11:59:00 1484394 6619589 3733.2977 -7140 secs
5: F07001 2007-03-05 12:00:00 1481274 6619628 HF7572 2007-03-05 10:00:00 1480610 6619853  701.0856  7200 secs
6: M07012 2007-03-05 12:00:00 1482494 6616408 HF7572 2007-03-05 10:00:00 1480610 6619853 3926.5100  7200 secs
7: F07001 2007-03-05 10:00:00 1482445 6621768 HF7572 2007-03-05 12:01:00 1480640 6619739 2715.6705 -7260 secs
8: F07001 2007-03-05 12:00:00 1481274 6619628 HF7572 2007-03-05 12:01:00 1480640 6619739  643.6435   -60 secs
9: M07012 2007-03-05 10:00:00 1482448 6616403 HF7572 2007-03-05 12:01:00 1480640 6619739 3794.4380 -7260 secs
10: M07012 2007-03-05 12:00:00 1482494 6616408 HF7572 2007-03-05 12:01:00 1480640 6619739 3812.2011   -60 secs

旧解决方案这不起作用,因为 OP 需要日期的近似匹配(1 小时内),而不是精确匹配。

假设我正确解释了您的问题,这里是使用data.tablepackage.json 的解决方案。我调用了您的测试数据中的第一个结构mloc和第二个结构wloc

步骤 1. 将两个数据帧都转换为data.table并设置 key on date

library(data.table)
mloc <- data.table(mloc)
wloc <- data.table(wloc)
setkey(mloc, date)
setkey(wloc, date)

步骤 2. 通过键合并两个表date,创建“笛卡尔积”并计算距离:

x <- mloc[wloc, allow.cartesian = TRUE][!is.na(id)]
x[, distance := sqrt((x-x.1)^2 + (y-y.1)^2)]

> x
                   date     id       x       y   id.1     x.1     y.1  distance
 1: 2007-03-05 10:00:00 F07001 1482445 6621768 HF7572 1480610 6619853 2652.2538
 2: 2007-03-05 10:00:00 M07012 1482448 6616403 HF7572 1480610 6619853 3909.0592
 3: 2007-03-05 16:00:00 F07001 1481271 6619700 UM1347 1483935 6619905 2671.8759
 4: 2007-03-05 16:00:00 M07012 1482534 6616408 UM1347 1483935 6619905 3767.2019
 5: 2007-03-06 20:00:00 F07001 1480881 6620427 UM1347 1483855 6619834 3032.5443
 6: 2007-03-06 20:00:00 M07012 1482555 6616418 UM1347 1483855 6619834 3655.0042
 7: 2007-03-06 20:00:00 F07001 1480881 6620427  Htest 1483855 6619834 3032.5443
 8: 2007-03-06 20:00:00 M07012 1482555 6616418  Htest 1483855 6619834 3655.0042
 9: 2007-03-06 22:00:00 F07001 1480883 6620438 HF7572 1480627 6619902  593.9966
10: 2007-03-06 22:00:00 M07012 1482414 6616755 HF7572 1480627 6619902 3618.9747
于 2013-03-26T20:22:26.980 回答
1

我想我有一个部分解决方案,它可以让您根据需要修改“关闭”窗口。

# Convert to data.table:
mloc<-as.data.table(mloc)
wloc<-as.data.table(wloc)

# Rename columns to make them less ambiguous:
setnames(mloc,paste0("m",names(mloc)))
setnames(wloc,paste0("w",names(wloc)))

# Adjustable rounding factor:
r <- 45 /60/24 # Need to convert minutes to days

# Add the rounded date column to the two tables:
mloc[,rdate:=round(as.numeric(mdate-as.POSIXct("1970-01-01", tz="GMT"))/r)*r*60*60*24+as.POSIXct("1970-01-01", tz="GMT")]
wloc[,rdate:=round(as.numeric(wdate-as.POSIXct("1970-01-01", tz="GMT"))/r)*r*60*60*24+as.POSIXct("1970-01-01", tz="GMT")]

# Set the keys:
setkey(mloc,rdate)
setkey(wloc,rdate)

# Join the wolf and moose tables on the rounded date:
wloc[mloc, allow.cartesian=T,nomatch=0]

##                  rdate    wid      wx      wy               wdate    mid      mx      my               mdate
## 1: 2007-03-05 09:45:00 HF7572 1480610 6619853 2007-03-05 10:00:00 F07001 1482445 6621768 2007-03-05 10:00:00
## 2: 2007-03-05 09:45:00 HF7572 1480610 6619853 2007-03-05 10:00:00 M07012 1482448 6616403 2007-03-05 10:00:00
## 3: 2007-03-05 12:00:00 UM1347 1484394 6619589 2007-03-05 11:59:00 F07001 1481274 6619628 2007-03-05 12:00:00
## 4: 2007-03-05 12:00:00 HF7572 1480640 6619739 2007-03-05 12:01:00 F07001 1481274 6619628 2007-03-05 12:00:00
## 5: 2007-03-05 12:00:00 UM1347 1484394 6619589 2007-03-05 11:59:00 M07012 1482494 6616408 2007-03-05 12:00:00
## 6: 2007-03-05 12:00:00 HF7572 1480640 6619739 2007-03-05 12:01:00 M07012 1482494 6616408 2007-03-05 12:00:00
## 7: 2007-03-05 14:15:00 UM1347 1483940 6619899 2007-03-05 14:00:00 F07001 1481279 6619630 2007-03-05 14:01:00
## 8: 2007-03-05 14:15:00 HF7572 1480613 6619759 2007-03-05 14:04:00 F07001 1481279 6619630 2007-03-05 14:01:00
## 9: 2007-03-05 14:15:00 UM1347 1483940 6619899 2007-03-05 14:00:00 M07012 1482534 6616395 2007-03-05 14:01:00
##10: 2007-03-05 14:15:00 HF7572 1480613 6619759 2007-03-05 14:04:00 M07012 1482534 6616395 2007-03-05 14:01:00
##11: 2007-03-05 15:45:00 UM1347 1483935 6619905 2007-03-05 16:00:00 F07001 1481271 6619700 2007-03-05 16:00:00
##12: 2007-03-05 15:45:00 UM1347 1483935 6619905 2007-03-05 16:00:00 M07012 1482534 6616408 2007-03-05 16:00:00
##13: 2007-03-05 18:00:00 HF7572 1480555 6619838 2007-03-05 18:03:00 F07001 1480849 6620321 2007-03-05 18:00:00
##14: 2007-03-05 18:00:00 HF7572 1480555 6619838 2007-03-05 18:03:00 M07012 1482553 6616406 2007-03-05 18:00:00
##15: 2007-03-06 20:15:00 UM1347 1483855 6619834 2007-03-06 20:00:00 F07001 1480881 6620427 2007-03-06 20:00:00
##16: 2007-03-06 20:15:00  Htest 1483855 6619834 2007-03-06 20:00:00 F07001 1480881 6620427 2007-03-06 20:00:00
##17: 2007-03-06 20:15:00 UM1347 1483855 6619834 2007-03-06 20:00:00 M07012 1482555 6616418 2007-03-06 20:00:00
##18: 2007-03-06 20:15:00  Htest 1483855 6619834 2007-03-06 20:00:00 M07012 1482555 6616418 2007-03-06 20:00:00
##19: 2007-03-06 21:45:00 HF7572 1480627 6619902 2007-03-06 22:00:00 F07001 1480883 6620438 2007-03-06 22:00:00
##20: 2007-03-06 21:45:00 HF7572 1480627 6619902 2007-03-06 22:00:00 M07012 1482414 6616755 2007-03-06 22:00:00

我说这是部分的,因为当一个值向上取整而另一个向下取整时,它将错过近距离匹配。例如,wdateof2007-03-05 16:20:00向上舍入为2007-03-05 16:30:00mdateof2007-03-05 16:00:00向下舍入为2007-03-05 15:45:00,因此即使这两个事件仅相隔 20 分钟且窗口为 45 分钟,连接中也不存在匹配项。

我有另一个data.table不四舍五入的部分解决方案,而是使用roll=-45*60and roll=45*60(然后将两个结果rbindlist一起编辑)。它选择了这个示例记录,但看起来还有一些我需要调查的其他问题......

于 2013-03-27T12:56:51.083 回答