这应该让你开始。它不会在圆圈内绘制圆圈。可以使 ggplot 将不同的变量映射到相同的美学(大小),但有困难。这里,点的大小代表总数,点的颜色代表患病的数量。您将需要调整整套数据的大小比例。
下面的代码获取城市的地理位置,然后将它们合并回数据文件。然后它汇总数据以提供包含所需计数的数据框。地图的边界由城市的最大和最小 lon 和 lat 设置。最后一步是在地图上绘制城市和计数。
# load libraries
library(ggplot2)
library(maps)
library(ggmap)
library(grid)
library(plyr)
# Your data
df <- read.table(header = TRUE, text = "
subjectid location disease
12 'Atlanta, GA' yes
15 'Boston, MA' no
13 'True Blue, Grenada' yes
85 'True Blue, Grenada' yes
46 'Atlanta, GA' yes
569 'Boston, MA' yes
825 'True Blue, Grenada' yes
685 'Atlanta, GA' no
54 'True Blue, Grenada' no
214 'Atlanta, GA' no
685 'Boston, MA' no
125 'True Blue, Grenada' yes
569 'Boston, MA' no", stringsAsFactors = FALSE)
# Get geographic locations and merge them into the data file
geoloc <- geocode(unique(df$location))
pos <- data.frame(location = unique(df$location), geoloc, stringsAsFactors = FALSE)
df <- merge(df, pos, by = "location", all = TRUE)
# Summarise the data file
df = ddply(df, .(location, lon, lat), summarise,
countDisease = sum(ifelse(disease == "yes", 1, 0)),
countTotal = length(location))
# Plot the map
mp1 <- fortify(map(fill = TRUE, plot = FALSE))
xmin <- min(df$lon) - 5
xmax <- max(df$lon) + 7
ymin <- min(df$lat) - 5
ymax <- max(df$lat) + 5
Amap <- ggplot() +
geom_polygon(aes(x = long, y = lat, group = group), data = mp1, fill = "grey", colour = "grey") +
coord_cartesian(xlim = c(xmin, xmax), ylim = c(ymin, ymax)) +
theme_bw()
# Plot the cities and counts
Amap <- Amap + geom_point(data = df, aes(x = lon, y = lat, size = countTotal, colour = countDisease)) +
geom_text(data = df, aes(x = lon, y = lat, label = gsub(",.*$", "", location)), size = 2.5, hjust = -.3) +
scale_size(range = c(3, 10)) +
scale_colour_continuous(low = "blue", high = "red", space = "Lab")
