2

我试图找到开始和结束位置之间的长度。那么,我应该应用哪些包和功能。

This is what the sample looks like: 

# A tibble: 10 x 5
   trip_id start_location     end_location       start_time end_time
     <int> <chr>              <chr>              <chr>      <chr>   
 1       1 13.6753,100.63453  13.65828,100.71631 00:05:24   00:41:14
 2       2 13.66348,100.71868 13.65258,100.71571 03:49:54   03:57:52
 3       3 13.63345,100.71102 13.63349,100.71096 04:14:52   04:53:52
 4       4 13.59653,100.70172 13.63433,100.71101 05:01:52   05:36:52
 5       5 13.57542,100.79453 13.59612,100.74922 05:57:11   06:15:52
 6       6 13.60123,100.71091 13.63241,100.71297 06:21:52   06:33:52
 7       7 13.60388,100.70617 13.60567,100.71292 06:43:32   06:58:52
 8   43456 13.94582,100.735   13.95905,100.62037 19:28:51   20:28:30
 9   43457 14.01229,100.66908 13.98712,100.61631 20:58:30   21:23:30
10   43458 13.79245,100.70217 13.90366,100.66788 22:09:30   22:40:30
my.df <- structure(list(trip_id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 43456L,43457L, 43458L), start_location = c("13.6753,100.63453", "13.66348,100.71868","13.63345,100.71102", "13.59653,100.70172", "13.57542,100.79453","13.60123,100.71091", "13.60388,100.70617", "13.94582,100.735","14.01229,100.66908", "13.79245,100.70217"), end_location = c("13.65828,100.71631","13.65258,100.71571", "13.63349,100.71096", "13.63433,100.71101","13.59612,100.74922", "13.63241,100.71297", "13.60567,100.71292","13.95905,100.62037", "13.98712,100.61631", "13.90366,100.66788"), start_time = c("00:05:24", "03:49:54", "04:14:52", "05:01:52","05:57:11", "06:21:52", "06:43:32", "19:28:51", "20:58:30", "22:09:30"), end_time = c("00:41:14", "03:57:52", "04:53:52", "05:36:52","06:15:52", "06:33:52", "06:58:52", "20:28:30", "21:23:30", "22:40:30")), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame")) 

因为我的问题是如何计算start_locationend_location列之间的长度,这些列是经纬度坐标。

我的预期输出是;

   trip_id start_location     end_location       start_time end_time   length (in meters) 
 1       1 13.6753,100.63453  13.65828,100.71631 00:05:24   00:41:14    120 
 2       2 13.66348,100.71868 13.65258,100.71571 03:49:54   03:57:52    500
 3       3 13.63345,100.71102 13.63349,100.71096 04:14:52   04:53:52    480
 4       4 13.59653,100.70172 13.63433,100.71101 05:01:52   05:36:52    7000
 5       5 13.57542,100.79453 13.59612,100.74922 05:57:11   06:15:52    1563
 6       6 13.60123,100.71091 13.63241,100.71297 06:21:52   06:33:52    7892
 7       7 13.60388,100.70617 13.60567,100.71292 06:43:32   06:58:52    200
 8   43456 13.94582,100.735   13.95905,100.62037 19:28:51   20:28:30    5863
 9   43457 14.01229,100.66908 13.98712,100.61631 20:58:30   21:23:30    1478
10   43458 13.79245,100.70217 13.90366,100.66788 22:09:30   22:40:30    2348

那么,任何包都可以使用吗?

先感谢您。

4

2 回答 2

3

geosphere 包可以根据经度和纬度计算距离。

library(tidyr)
#separate the latitude and longitude into separate columns
my.df<- separate(my.df, start_location, into=c("start_lat", "start_long"), sep="[,]")
my.df<- separate(my.df, end_location, into=c("end_lat", "end_long"), sep="[,]")

library(geosphere)
#calculate distances
my.df$length <- distGeo(my.df[ ,c("start_long", "start_lat")], my.df[ ,c("end_long", "end_lat")])

my.df

# A tibble: 10 x 8
   trip_id start_lat start_long end_lat  end_long  start_time end_time   length
     <int> <chr>     <chr>      <chr>    <chr>     <chr>      <chr>       <dbl>
1        1 13.6753   100.63453  13.65828 100.71631 00:05:24   00:41:14  9046.  
2        2 13.66348  100.71868  13.65258 100.71571 03:49:54   03:57:52  1248.  
3        3 13.63345  100.71102  13.63349 100.71096 04:14:52   04:53:52     7.86
4        4 13.59653  100.70172  13.63433 100.71101 05:01:52   05:36:52  4301.  
5        5 13.57542  100.79453  13.59612 100.74922 05:57:11   06:15:52  5412.  
6        6 13.60123  100.71091  13.63241 100.71297 06:21:52   06:33:52  3457.  
7        7 13.60388  100.70617  13.60567 100.71292 06:43:32   06:58:52   757.  
8    43456 13.94582  100.735    13.95905 100.62037 19:28:51   20:28:30 12473.  
9    43457 14.01229  100.66908  13.98712 100.61631 20:58:30   21:23:30  6345.  
10   43458 13.79245  100.70217  13.90366 100.66788 22:09:30   22:40:30 12850.  
于 2021-04-10T16:13:02.007 回答
1

做这个。实际上,您的坐标向量应该在计算地理距离之前反转,因为geosphere::disGeo()将向量作为经度和纬度顺序。为简单起见,我已将两个坐标列修改为列表 cols。

library(tidyverse)
library(geosphere)
my.df %>% mutate(across(ends_with("location"), 
                        ~map(str_split(.x, ","), 
                             ~ c(as.numeric(.x[2]), as.numeric(.x[1]))))) %>%
  mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y)))

# A tibble: 10 x 6
   trip_id start_location end_location start_time end_time      len
     <int> <list>         <list>       <chr>      <chr>       <dbl>
 1       1 <dbl [2]>      <dbl [2]>    00:05:24   00:41:14  9046.  
 2       2 <dbl [2]>      <dbl [2]>    03:49:54   03:57:52  1248.  
 3       3 <dbl [2]>      <dbl [2]>    04:14:52   04:53:52     7.86
 4       4 <dbl [2]>      <dbl [2]>    05:01:52   05:36:52  4301.  
 5       5 <dbl [2]>      <dbl [2]>    05:57:11   06:15:52  5412.  
 6       6 <dbl [2]>      <dbl [2]>    06:21:52   06:33:52  3457.  
 7       7 <dbl [2]>      <dbl [2]>    06:43:32   06:58:52   757.  
 8   43456 <dbl [2]>      <dbl [2]>    19:28:51   20:28:30 12473.  
 9   43457 <dbl [2]>      <dbl [2]>    20:58:30   21:23:30  6345.  
10   43458 <dbl [2]>      <dbl [2]>    22:09:30   22:40:30 12850.

根据下面的评论,以下语法不会强制更改原始纬度/经度列

my.df %>% mutate(across(ends_with("location"), 
                        ~map(str_split(.x, ","), 
                             ~ c(as.numeric(.x[2]), as.numeric(.x[1]))),
                        .names = "{.col}_1")) %>%
  mutate(length = map2_dbl(start_location_1, end_location_1, ~distGeo(.x, .y))) %>%
  select(!ends_with("_1"))

或者,

my.df %>% mutate(across(ends_with("location"), 
                        ~map(str_split(.x, ","), 
                             ~ c(Long = as.numeric(.x[2]), Lat = as.numeric(.x[1]))))) %>%
  mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y))) %>%
  unnest_wider(start_location, names_sep = "_") %>%
  unnest_wider(end_location, names_sep = "_")

# A tibble: 10 x 8
   trip_id start_location_Long start_location_Lat end_location_Long end_location_Lat start_time end_time   length
     <int>               <dbl>              <dbl>             <dbl>            <dbl> <chr>      <chr>       <dbl>
 1       1                101.               13.7              101.             13.7 00:05:24   00:41:14  9046.  
 2       2                101.               13.7              101.             13.7 03:49:54   03:57:52  1248.  
 3       3                101.               13.6              101.             13.6 04:14:52   04:53:52     7.86
 4       4                101.               13.6              101.             13.6 05:01:52   05:36:52  4301.  
 5       5                101.               13.6              101.             13.6 05:57:11   06:15:52  5412.  
 6       6                101.               13.6              101.             13.6 06:21:52   06:33:52  3457.  
 7       7                101.               13.6              101.             13.6 06:43:32   06:58:52   757.  
 8   43456                101.               13.9              101.             14.0 19:28:51   20:28:30 12473.  
 9   43457                101.               14.0              101.             14.0 20:58:30   21:23:30  6345.  
10   43458                101.               13.8              101.             13.9 22:09:30   22:40:30 12850. 

您还可以根据需要将列表 col 转换回字符类型

my.df %>% mutate(across(ends_with("location"), 
                        ~map(str_split(.x, ","), 
                             ~ c(Long = as.numeric(.x[2]), Lat = as.numeric(.x[1]))))) %>%
  mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y))) %>%
  mutate(across(ends_with("location"), 
                ~map_chr(.x, ~ paste(.x[1], .x[2], sep = ", "))))

# A tibble: 10 x 6
   trip_id start_location      end_location        start_time end_time   length
     <int> <chr>               <chr>               <chr>      <chr>       <dbl>
 1       1 100.63453, 13.6753  100.71631, 13.65828 00:05:24   00:41:14  9046.  
 2       2 100.71868, 13.66348 100.71571, 13.65258 03:49:54   03:57:52  1248.  
 3       3 100.71102, 13.63345 100.71096, 13.63349 04:14:52   04:53:52     7.86
 4       4 100.70172, 13.59653 100.71101, 13.63433 05:01:52   05:36:52  4301.  
 5       5 100.79453, 13.57542 100.74922, 13.59612 05:57:11   06:15:52  5412.  
 6       6 100.71091, 13.60123 100.71297, 13.63241 06:21:52   06:33:52  3457.  
 7       7 100.70617, 13.60388 100.71292, 13.60567 06:43:32   06:58:52   757.  
 8   43456 100.735, 13.94582   100.62037, 13.95905 19:28:51   20:28:30 12473.  
 9   43457 100.66908, 14.01229 100.61631, 13.98712 20:58:30   21:23:30  6345.  
10   43458 100.70217, 13.79245 100.66788, 13.90366 22:09:30   22:40:30 12850.
于 2021-04-10T16:24:05.497 回答