通常,这可以更有效地完成,以最大限度地利用 RAM 和处理器并减少开销。但是,如果您尝试做的是一次性操作,那么以下方法应该就足够了(在当前笔记本上大约需要 5 分钟)。
辅助函数
# More info: https://github.com/RomanAbashin/distGeo_v
distGeo_v <- function(x, y, xx, yy) {
if(!"geosphere" %in% installed.packages()) {
stop("The 'geosphere' package needs to be installed for this function to work.")
}
matrix(.Call("_inversegeodesic",
as.double(x), as.double(y), as.double(xx), as.double(yy),
as.double(6378137), 1/298.257223563, PACKAGE='geosphere'),
ncol = 3, byrow = TRUE)[,1]
}
数据
library(geosphere)
library(tidyverse)
set.seed(1702)
users <- tibble(userid = 1:10000,
x = rnorm(10000, 16.3738, 5),
y = rnorm(10000, 48.2082, 5))
towers <- tibble(lon = rnorm(35000, 16.3738, 10),
lat = rnorm(35000, 48.2082, 10),
range = runif(35000, 50, 10000))
代码
result <- NULL
for(i in 1:nrow(users)) {
is_match <- users[i, 1:3] %>%
tidyr::crossing(towers[, 1:3]) %>%
filter(distGeo_v(x, y, lon, lat) <= range) %>%
nrow() > 0
result <- bind_rows(result, tibble(userid = users$userid[i],
match = is_match))
}
结果
> head(result)
# A tibble: 6 x 2
userid match
<int> <lgl>
1 1 TRUE
2 2 FALSE
3 3 FALSE
4 4 TRUE
5 5 FALSE
6 6 FALSE
现在您可以left_join
将结果转换为您的原始数据。