4

我正在尝试将 R 与tidyverse包一起使用,并且无法将函数应用于我的数据。我的数据包括纬度/经度坐标,我想计算从每个位置(我的数据框行)到参考位置的距离。我正在尝试使用geosphere::distm函数。

library(tidyverse)
library(geosphere)

my_long <- 172
my_lat <- -43

data <- data %>%  rowwise() %>% mutate(
  dist = distm(c(myLong, myLat), c(long, lat), fun=distHaversine) # this works
)

如上所述,我使用该rowwise()功能使其工作,但这已被弃用,所以我想知道如何使用modern tidyverse,即,dplyr或者purrr,我认为,例如,我得到的最接近的是使用map2:

my_distm <- function(long1, lat1, long2, lat2)
  distm(c(long1, lat1), c(long2, lat2), fun=distHaversine)

data <- data %>%  mutate(
  dist = map2(long, lat, my_distm, my_long, my_lat) # this doesn't
)

到目前为止,我失败了。

4

4 回答 4

7

您可以mutate使用mapply

library(tidyverse)
library(geosphere)

my_long <- 172
my_lat <- -43

df <- data.frame(long = c(170, 180), lat = c(-43, 43))
df %>% rowwise() %>% mutate(
  dist = distm(c(my_long, my_lat), c(long, lat), fun=distHaversine) # this works
)

#Source: local data frame [2 x 3]
#Groups: <by row>

# A tibble: 2 x 3
#   long   lat    dist
#  <dbl> <dbl>   <dbl>
#1   170   -43  162824
#2   180    43 9606752

df %>% mutate(
    dist = mapply(function(lg, lt) distm(c(my_long, my_lat), c(lg, lt), fun=distHaversine), long, lat)
)

#  long lat    dist
#1  170 -43  162824
#2  180  43 9606752

使用更新map2

df %>% 
    mutate(dist = map2(long, lat, ~distm(c(my_long, my_lat), c(.x, .y), fun=distHaversine)))
# here .x stands for a value from long column, and .y stands for a value from lat column
#  long lat    dist
#1  170 -43  162824
#2  180  43 9606752

要使用my_distm

my_distm <- function(long1, lat1, long2, lat2)
    distm(c(long1, lat1), c(long2, lat2), fun=distHaversine)

df %>% mutate(dist = map2(long, lat, ~my_distm(my_long, my_lat, .x, .y)))
#  long lat    dist
#1  170 -43  162824
#2  180  43 9606752
于 2017-08-22T00:29:52.033 回答
7

您可以使用distHaversine代替distm, 和cbind

data %>%  mutate(dist = distHaversine(cbind(myLong, myLat), cbind(long, lat)))

示例数据:

myLong = 172
myLat = -43 
long = c(180,179,179)
lat = c(-40,-41,-40)
data = data.frame(myLong,myLat,long,lat)

结果是:

  myLong myLat long lat     dist
1    172   -43  180 -40 745481.0
2    172   -43  179 -41 620164.8
3    172   -43  179 -40 672076.2
于 2017-08-22T00:37:34.357 回答
2

我也很喜欢rowwise,但是因为您正在寻找其他解决方案

Psidom的数据

my_long <- 172
my_lat <- -43
myval <- c(my_long, my_lat)

df <- data.frame(long = c(170, 180), lat = c(-43, 43))

咕噜解决方案

这里是purrr::map

library(purrr)
df1 <- df %>%  
         mutate(dist = map(1:nrow(.), ~distm(myval, df[.x,], fun=distHaversine)))

#   long lat    dist
# 1  170 -43  162824
# 2  180  43 9606752

您可以map2通过重复myval多次以形成 2 列 data.frame 的形状来使用,但不能用作向量

OP的请求

要从更大的数据框中选择long和使用 with ,请在语句中使用latdistmselectmap

garbage <- data.frame(long = c(170, 180), lat = c(-43, 43), junk=c(0,0))
df1 <- garbage %>%  
         mutate(dist = map(1:nrow(.), ~distm(myval, select(garbage[.x,],long,lat), fun=distHaversine)))

#   long lat junk    dist
# 1  170 -43    0  162824
# 2  180  43    0 9606752

使用迭代器的 sapply 解决方案

我也喜欢iterators用于按行操作

library(iterators)
df2 <- df %>%
         mutate(dist = sapply(iter(df, by="row"), function(x) distm(myval, x, fun=distHaversine)))

#   long lat    dist
# 1  170 -43  162824
# 2  180  43 9606752
于 2017-08-22T00:52:06.520 回答
2

你可以使用pmap()

f  <- function(StartLong, StartLat, EndLong, EndLat) 
  distm(c(StartLong, StartLat), c(EndLong, EndLat))

df %>% mutate(dist = pmap_dbl(., f))

或者Vectorize()你的函数并直接使用mutate()

g <- Vectorize(f)
df %>% mutate(dist = g(StartLong, StartLat, EndLong, EndLat))

这使:

#  StartLong StartLat EndLong EndLat    dist
#1       170      -43     172    -43  162824
#2       180       43     172    -43 9606752

另一个想法by_row()来自purrrlyr

library(purrrlyr)

df %>%
  by_row(function(x) {
    distm(c(x$StartLong, x$StartLat), 
          c(x$EndLong, x$EndLat)) },
    .collate = "rows", .to = "dist") 

这使:

## tibble [2 x 5]
#  StartLong StartLat EndLong EndLat    dist
#      <dbl>    <dbl>   <dbl>  <dbl>   <dbl>
#1       170      -43     172    -43  162824
#2       180       43     172    -43 9606752

数据

df <- structure(list(StartLong = c(170, 180), StartLat = c(-43, 43), 
      EndLong = c(172, 172), EndLat = c(-43, -43)), .Names = c("StartLong", 
      "StartLat", "EndLong", "EndLat"), row.names = c(NA, -2L), class = "data.frame")
于 2017-08-22T00:58:18.633 回答