5

我想在R中创建人口金字塔。我知道 StackOverflow 上有很多示例,但我想创建一个还包括人口预测的示例,即每个年龄组按性别划分的条形图和按性别和年龄组划分的行数用于预测。

你可以在这里看到一个例子:http: //geographyblog.eu/wp/the-worlds-population-pyramid-is-changing-shape/

在此处输入图像描述

如果有一些关于如何更好地说明这一点的建议(例如用平滑线),他们也很受欢迎,但我想指出当前情况和预测。示例数据可在联合国网站上找到:http: //esa.un.org/wpp/population-pyramids/population-pyramids_absolute.htm

任何帮助将不胜感激。

4

2 回答 2

5

也许少一点的临时方法使用ggplot2andgeom_bargeom_step

可以从wpp2015包中提取数据(或者wpp2012wpp2010或者wpp2008如果您更喜欢旧版本)。

library("dplyr")
library("tidyr")
library("wpp2015")

#load data in wpp2015
data(popF)
data(popM)
data(popFprojMed)
data(popMprojMed)

#combine past and future female population
df0 <- popF %>% 
  left_join(popFprojMed) %>%
  mutate(gender = "female")

#combine past and future male population, add on female population
df1 <- popM %>% 
  left_join(popMprojMed) %>%
  mutate(gender = "male") %>%
  bind_rows(df0) %>%
  mutate(age = factor(age, levels = unique(age)))

#stack data for ggplot, filter World population and required years
df2 <- df1 %>%
  gather(key = year, value = pop, -country, -country_code, -age, -gender) %>%
  mutate(pop = pop/1e03) %>%
  filter(country == "World", year %in% c(1950, 2000, 2050, 2100))

#add extra rows and numeric age variable for geom_step used for 2100
df2 <- df2 %>% 
  mutate(ageno = as.numeric(age) - 0.5)

df2 <- df2 %>%
  bind_rows(df2 %>% filter(year==2100, age=="100+") %>% mutate(ageno = ageno + 1)) 

df2 
# Source: local data frame [170 x 7]
# 
#    country country_code    age gender  year       pop ageno
#     (fctr)        (int) (fctr)  (chr) (chr)     (dbl) (dbl)
# 1    World          900    0-4   male  1950 171.85124   0.5
# 2    World          900    5-9   male  1950 137.99242   1.5
# 3    World          900  10-14   male  1950 133.27428   2.5
# 4    World          900  15-19   male  1950 121.69274   3.5
# 5    World          900  20-24   male  1950 112.39438   4.5
# 6    World          900  25-29   male  1950  96.59408   5.5
# 7    World          900  30-34   male  1950  83.38595   6.5
# 8    World          900  35-39   male  1950  80.59671   7.5
# 9    World          900  40-44   male  1950  73.08263   8.5
# 10   World          900  45-49   male  1950  63.13648   9.5
# ..     ...          ...    ...    ...   ...       ...   ...

使用标准ggplot功能,您可以获得类似的东西,从这里的答案改编:

在此处输入图像描述

library("ggplot2")
ggplot(data = df2, aes(x = age, y = pop, fill = year)) +
  #bars for all but 2100
  geom_bar(data = df2 %>% filter(gender == "female", year != 2100) %>% arrange(rev(year)),
           stat = "identity",
           position = "identity") +
  geom_bar(data = df2 %>% filter(gender == "male", year != 2100) %>% arrange(rev(year)),
           stat = "identity",
           position = "identity",
           mapping = aes(y = -pop)) +
  #steps for 2100
  geom_step(data =  df2 %>% filter(gender == "female", year == 2100), 
            aes(x = ageno)) +
  geom_step(data =  df2 %>% filter(gender == "male", year == 2100), 
            aes(x = ageno, y = -pop)) +
  coord_flip() +
  scale_y_continuous(labels = abs)

注意:您需要这样做arrange(rev(year)),因为条形图是叠加层。

使用该ggthemes软件包,您可以非常接近最初的《经济学人》情节。

在此处输入图像描述

library("ggthemes") 
ggplot(data = df2, aes(x = age, y = pop, fill = year)) +
  #bars for all but 2100
  geom_bar(data = df2 %>% filter(gender == "female", year != 2100) %>% arrange(rev(year)),
           stat = "identity",
           position = "identity") +
  geom_bar(data = df2 %>% filter(gender == "male", year != 2100) %>% arrange(rev(year)),
           stat = "identity",
           position = "identity",
           mapping = aes(y = -pop)) +
  #steps for 2100
  geom_step(data =  df2 %>% filter(gender == "female", year == 2100), 
        aes(x = ageno), size = 1) +
  geom_step(data =  df2 %>% filter(gender == "male", year == 2100), 
        aes(x = ageno, y = -pop), size = 1) +
  coord_flip() +
  #extra style shazzaz
  scale_y_continuous(labels = abs, limits = c(-400, 400), breaks = seq(-400, 400, 100)) +
  geom_hline(yintercept = 0) +
  theme_economist(horizontal = FALSE) +
  scale_fill_economist() +
  labs(fill = "", x = "", y = "")

(我相信你可以走得更近,但我现在已经停在这里了)。

于 2016-05-09T10:55:52.617 回答
2

您可以使用此问题的答案轻松制作一些东西(这里我使用了@timriffle 的答案以及我的答案)。
首先是一些数据(来自您提供的链接):

wp <- structure(list(M.1990 = c(325814, 295272, 269351, 265163, 249651, 220027, 196523, 178295, 141789, 115097, 106579, 91763, 77150, 56845, 38053, 25716, 19442), M.2000 = c(319675, 317296, 317072, 290827, 262992, 256378, 241401, 212924, 188905, 169133, 131813, 103162, 90921, 72231, 53449, 32707, 25868), M.2010 = c(328759, 315119, 311456, 312831, 311077, 284258, 255596, 248575, 232217, 202633, 176241, 153494, 114194, 83129, 65266, 43761, 39223), F.1990 = c(308121, 281322, 257432, 254065, 238856, 211943, 188433, 170937, 138358, 112931, 106510, 93425, 82667, 67057, 47679, 37435, 36724), F.2000 = c(298455, 297012, 299757, 277706, 252924, 248127, 233583, 207518, 183646, 165444, 132307, 105429, 96681, 80227, 64956, 45832, 46413), F.2010 = c(307079, 293664, 290598, 293313, 295739, 273379, 247383, 241938, 226914, 201142, 176440, 156283, 121200, 92071, 77990, 56895, 66029)), .Names = c("M.1990", "M.2000", "M.2010", "F.1990", "F.2000", "F.2010"), row.names = c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80+"), class = "data.frame")

wp
      M.1990 M.2000 M.2010 F.1990 F.2000 F.2010
0-4   325814 319675 328759 308121 298455 307079
5-9   295272 317296 315119 281322 297012 293664
10-14 269351 317072 311456 257432 299757 290598
15-19 265163 290827 312831 254065 277706 293313
20-24 249651 262992 311077 238856 252924 295739
25-29 220027 256378 284258 211943 248127 273379
30-34 196523 241401 255596 188433 233583 247383
35-39 178295 212924 248575 170937 207518 241938
40-44 141789 188905 232217 138358 183646 226914
45-49 115097 169133 202633 112931 165444 201142
50-54 106579 131813 176241 106510 132307 176440
55-59  91763 103162 153494  93425 105429 156283
60-64  77150  90921 114194  82667  96681 121200
65-69  56845  72231  83129  67057  80227  92071
70-74  38053  53449  65266  47679  64956  77990
75-79  25716  32707  43761  37435  45832  56895
80+    19442  25868  39223  36724  46413  66029

xrange <- range(c(0,wp))
yrange <- range(c(0,nrow(wp)))

然后是绘图部分(在两个面板中):

par(mfcol=c(1,2))
par(mar=c(5,4,4,0))
plot(NA,type="n", main="Men", xlab="", ylab="", xaxs="i", 
     xlim=rev(xrange), ylim=yrange, axes=FALSE, yaxs="i")
rect(xrange[1],yrange[1],xrange[2],yrange[2], col="cadetblue")
abline(v=seq(0,xrange[2],by=1e5), col="white")
# All years with bars you want to represent filled 
# should be entered in reverse order
polygon(c(0,rep(wp$M.2000,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)),
        col="lightblue",border="lightblue")
polygon(c(0,rep(wp$M.1990,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
        col="darkblue",border="darkblue")
# And those you want with just a border, afterwards:
polygon(c(0,rep(wp$M.2010,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
        col=NA,border="darkred",lwd=2)
axis(1, at=c(0,1e5,2e5,3e5), labels=format(c(0,1e5,2e5,3e5),scientific=FALSE))
axis(2, at=1:nrow(wp)-0.5,labels=row.names(wp),las=2)
box()

par(mar=c(5,0,4,4))
plot(NA,type="n", main="Women", xlab="", ylab="", xaxs="i", 
     xlim=xrange, ylim=yrange, axes=FALSE, yaxs="i")
rect(xrange[1],yrange[1],xrange[2],yrange[2], col="cadetblue")
abline(v=seq(0,xrange[2],by=1e5), col="white")
polygon(c(0,rep(wp$F.2000,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)),
        col="lightblue",border="lightblue")
polygon(c(0,rep(wp$F.1990,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
        col="darkblue",border="darkblue")
polygon(c(0,rep(wp$F.2010,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
        col=NA,border="darkred",lwd=2)
axis(1, at=c(0,1e5,2e5,3e5), labels=format(c(0,1e5,2e5,3e5),scientific=FALSE))
axis(4, at=1:nrow(wp)-0.5,labels=row.names(wp),las=2)
box()

在此处输入图像描述

为了规避@Spacedman 在他的评论中强调的问题,您可以在某些年份使用 alpha。

library(scales)
[...]
polygon(c(0,rep(wp$M.1990,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
        col=alpha("darkblue",0.4),border="darkblue")
[...]
polygon(c(0,rep(wp$F.1990,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
        col=alpha("darkblue",0.4),border="darkblue")
[...]

在此处输入图像描述

于 2013-04-05T07:21:11.683 回答