9

有没有办法将地图上抖动的点保持在该地图的边界内?在下面的示例中,康涅狄格州西南部的抖动位置最终在水中或处于相邻状态,有没有办法让 R 抖动位置点但不超过地图边界?

或者,是否有其他一些技术,例如在每个城市附近创建一个表 grob 以列出公司的名称?

# create a data frame called "ct" of geolocations in two cities near the border of a US state (Connecticut).  Each firm has the same lat and longitude of one of the two cities

> dput(ct)
structure(list(city = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L), .Label = c("Greenwich", "Stamford"), class = "factor"), 
    firm = structure(c(1L, 12L, 21L, 22L, 23L, 24L, 25L, 26L, 
    27L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 14L, 
    15L, 16L, 17L, 18L, 19L, 20L), .Label = c("A1", "A10", "A11", 
    "A12", "A13", "A14", "A15", "A16", "A17", "A18", "A19", "A2", 
    "A20", "A21", "A22", "A23", "A24", "A25", "A26", "A27", "A3", 
    "A4", "A5", "A6", "A7", "A8", "A9"), class = "factor"), long = c(-73.63, 
    -73.63, -73.63, -73.63, -73.63, -73.55, -73.55, -73.55, -73.55, 
    -73.55, -73.55, -73.55, -73.55, -73.55, -73.55, -73.55, -73.55, 
    -73.55, -73.55, -73.55, -73.55, -73.55, -73.55, -73.55, -73.55, 
    -73.55, -73.55), lat = c(41.06, 41.06, 41.06, 41.06, 41.06, 
    41.09, 41.09, 41.09, 41.09, 41.09, 41.09, 41.09, 41.09, 41.09, 
    41.09, 41.09, 41.09, 41.09, 41.09, 41.09, 41.09, 41.09, 41.09, 
    41.09, 41.09, 41.09, 41.09)), .Names = c("city", "firm", 
"long", "lat"), row.names = c(NA, -27L), class = "data.frame")


library(ggplot2)
# load the map of the United States
all_states <- map_data("state")
# choose to map the borders only of the state of Connecticut
st.map <- subset(all_states, region == "connecticut")

# plot the points for the firms with minimal jitter that still distinguishes each point
ggplot(ct, aes(long, lat)) + 
  geom_polygon(data=st.map, aes(x=long, y=lat, group = group), colour="grey70", fill="white") +
  coord_map() + 
  geom_point(position=position_jitter(width=.1, height=.1), size=2)

在此处输入图像描述

像在这个问题中一样,稍微改变每个经度或纬度是行不通的,因为点太多了,我希望有一个算法解决方案,因为我有很多情况可能会出现这种拥挤和过境。 https://stackoverflow.com/questions/22943110/jitter-coordinates

感谢您的任何建议或答案。

4

2 回答 2

8

您可以制作自己的jitter函数来抖动数据。然后使用函数pnt.in.polyfromSDMTools检查该点是否在多边形内。否则你只是再次抖动原始点。请参阅下面的示例:

require(SDMTools)
bounded_jitter <- function(mapping, data, bounds, width, height, ...){
  # data2 is the jittered data
  data2 <- data
  data2[, paste(mapping$x)] <- rnorm(nrow(data), data[, paste(mapping$x)], width/1.96)
  data2[, paste(mapping$y)] <- rnorm(nrow(data), data[, paste(mapping$y)], height/1.96)
  # is it inside the polygon?
  idx <- as.logical(pnt.in.poly(pnts = data2[, c(paste(mapping$x), paste(mapping$y))],  
                                poly.pnts = bounds)[, 'pip'])
  while(!all(idx)) { # redo for points outside polygon
    data2[!idx, paste(mapping$x)] <- rnorm(sum(!idx), data[!idx, paste(mapping$x)], width/1.96)
    data2[!idx, paste(mapping$y)] <- rnorm(sum(!idx), data[!idx, paste(mapping$y)], height/1.96)
    idx <- as.logical(pnt.in.poly(pnts = data2[, c(paste(mapping$x), paste(mapping$y))],  
                                  poly.pnts = bounds)[, 'pip'])
  }
  # the point
  geom_point(data = data2, mapping, ...)
}
# plot the points for the firms with minimal jitter that still distinguishes each point
ggplot(ct, aes(long, lat)) + 
  geom_polygon(data=st.map, aes(x=long, y=lat, group = group), colour="grey70", fill="white") +
  coord_map() + 
  geom_point(size=2) + 
  bounded_jitter(mapping = aes(x=long, y=lat), 
                 data = ct, 
                 bounds = st.map[, c('long', 'lat')], 
                 width = .1, 
                 height = .1)

结果图:康涅狄格州内部有抖动点

于 2014-12-23T15:52:09.903 回答
1

自首次发布以来的 7 年中出现了一些工具,即与 tidyverse 包配合得非常好的sfggplot2::geom_sf包,以及相应的. 我会将所有东西都作为sf对象而不是多边形来处理,以便访问空间操作、下载州和城镇边界(tigris从人口普查局下载 shapefile 并返回sf对象)并转换坐标。

library(dplyr)
library(sf)
library(ggplot2)
sf_use_s2(FALSE)
#> Spherical geometry (s2) switched off

state_sf <- tigris::states(cb = TRUE) %>%
  filter(STUSPS == "CT")
town_sf <- tigris::county_subdivisions("CT", cb = TRUE)
pts_sf <- ct_pts %>%
  mutate(geometry = purrr::map2(long, lat, ~st_point(x = c(.x, .y)))) %>%
  st_as_sf(crs = st_crs(state_sf))

版本 1仅在每个不同点周围使用一个循环缓冲区(因为我注意到您的原始数据集重复了看起来是城镇中心的内容),然后将其屏蔽以适应州边界。

circle_buff <- pts_sf %>%
  distinct(city, geometry) %>%
  st_buffer(dist = 0.1) %>%
  st_intersection(state_sf)

ggplot() +
  geom_sf(data = state_sf, fill = "white") +
  geom_sf(aes(fill = city), data = circle_buff, color = NA, alpha = 0.4)

然后,您可以通过在这些多边形内采样来创建抖动点,每个城镇的点数与原始数据集中的观测值相同。

set.seed(10)
jitter1 <- ct_pts %>%
  select(city) %>%
  inner_join(circle_buff, by = "city") %>%
  group_by(city) %>%
  summarise(geometry = suppressMessages(st_sample(geometry, size = n()))) %>%
  ungroup() %>%
  st_as_sf()

ggplot() +
  geom_sf(data = state_sf, fill = "white") +
  geom_sf(aes(color = city), data = jitter1, size = 0.8, alpha = 0.8)

但是请注意,由于缓冲区通过城镇边界之外并重叠,因此斯坦福点和格林威治点可以在该重叠区域中占据一些相同的空间。版本 2按城镇边界而不是仅按州屏蔽缓冲区,因此两个城镇可用于采样的区域不再重叠。对于这个例子,我稍微缩小了缓冲区距离,只是为了说明缓冲区边界在城镇边界的内部和外部都结束——也就是说,每个城镇可用于采样的空间都在城镇内部缓冲区半径范围内。

town_buff <- pts_sf %>%
  distinct(city, geometry) %>%
  st_buffer(dist = 0.07) %>%
  split(.$city) %>%
  purrr::imap_dfr(~st_intersection(.x, town_sf %>% filter(NAME == .y)))

jitter2 <- ct_pts %>%
  select(city) %>%
  inner_join(town_buff, by = "city") %>%
  group_by(city) %>%
  summarise(geometry = suppressMessages(st_sample(geometry, size = n()))) %>%
  ungroup() %>%
  st_as_sf()

ggplot() +
  geom_sf(data = state_sf, fill = "white") +
  geom_sf(aes(color = city), data = jitter2, size = 0.8, alpha = 0.8)

于 2021-12-25T00:21:12.900 回答