2

根据答案编辑

我有 50 个州的结婚率数据。我正在尝试为每个州制作单独的箱线图,并且还能够将这些图放在 R 中的州地图上。如果由于拥堵而无法做到这一点,我很想知道如何仅放置最小值或地图上每个州的最大值。 有兴趣的链接到数据

我在 R 中以两种方式列出了我的数据,我认为第一种方式在绘图方面会更好。

 marriage<-read.csv(file="~/Desktop/masters.csv", header=T, sep=",",check.names=FALSE)
 marriagefine <-
        marriage %>%
        pivot_longer(
          cols = `2017`:`1990`,
          names_to = 'year',
          values_to = 'rate'
        ) %>%
        mutate(
          year = as.numeric(year)
        )

这让 R 读到我的表格是这样的;

> marriagefine
# A tibble: 1,071 x 3
  State    year  rate
  <fct>   <dbl> <dbl>
1 Alabama  2017   7  
2 Alabama  2016   7.1
3 Alabama  2015   7.4
4 Alabama  2014   7.8
5 Alabama  2013   7.8
6 Alabama  2012   8.2
7 Alabama  2011   8.4
8 Alabama  2010   8.2
9 Alabama  2009   8.3
10 Alabama  2008   8.6
# … with 1,061 more rows

另一种阅读方式

                  State 2017 2016 2015 2014 2013 2012 2011 2010 2009 2008 2007 2006 2005 2004 2003 2002 2001 2000 1999 1995 1990
1               Alabama  7.0  7.1  7.4  7.8  7.8  8.2  8.4  8.2  8.3  8.6  8.9  9.2  9.2  9.4  9.6  9.9  9.4 10.1 10.8  9.8 10.6
2                Alaska  6.9  7.1  7.4  7.5  7.3  7.2  7.8  8.0  7.8  8.4  8.5  8.2  8.2  8.5  8.1  8.3  8.1  8.9  8.6  9.0 10.2
3               Arizona  5.8  5.9  5.9  5.8  5.4  5.6  5.7  5.9  5.6  6.0  6.4  6.5  6.6  6.7  6.5  6.7  7.6  7.5  8.2  8.8 10.0
4              Arkansas  9.5  9.9 10.0 10.1  9.8 10.9 10.4 10.8 10.7 10.6 12.0 12.4 12.9 13.4 13.4 14.3 14.3 15.4 14.8 14.4 15.3
5           California   6.3  6.5  6.2  6.4  6.5  6.0  5.8  5.8  5.8  6.7  6.2  6.3  6.4  6.4  6.1  6.2  6.5  5.8  6.4  6.3  7.9
6              Colorado  7.3  7.4  6.8  7.1  6.5  6.8  7.0  6.9  6.9  7.4  7.1  7.2  7.6  7.4  7.8    8  8.2  8.3  8.2  9.0  9.8
7           Connecticut  5.6  5.6  5.3  5.4    5  5.2  5.5  5.6  5.9  5.4  5.5  5.5  5.8  5.8  5.5  5.7  5.4  5.7  5.8  6.6  7.9
8              Delaware  5.5  5.6  5.7    6  6.6  5.8  5.2  5.2  5.4  5.5  5.7  5.9  5.9  6.1    6  6.4  6.5  6.5  6.7  7.3  8.4
9  District of Columbia  8.2  8.1  8.2 11.8 10.8  8.4  8.7  7.6  4.7  4.1  4.2    4  4.1  5.2  5.1  5.1  6.2  4.9  6.6  6.1  8.2
10              Florida  7.8  8.1  8.2  7.3    7  7.2  7.4  7.3  7.5  8.0  8.5  8.6  8.9  9.0    9  9.4  9.3  8.9  8.7  9.9 10.9
11              Georgia  6.9  6.8  6.2  ---  ---  6.5  6.6  7.3  6.6  6.0  6.8  7.3  7.0  7.9    7  6.5  6.1  6.8  7.8  8.4 10.3
12               Hawaii 15.3 15.6 15.9 17.7 16.3 17.5 17.6 17.6 17.2 19.1 20.8 21.9 22.6 22.6   22 20.8 19.6 20.6 18.9 15.7 16.4
13                Idaho  7.8  8.1  8.2  8.4  8.2  8.2  8.6  8.8  8.9  9.5 10.0 10.1 10.5 10.8 10.9   11 11.2 10.8 12.1 13.1 13.9

我的箱线图命令基于下面列出的答案

boxplot(rate ~ State, data = marriagefine, 
         main="Box Plot for Marriage Rates by State", 
         xlab="States", ylab="Rates",              
         col=rainbow(length(unique(marriagefine$State))))

我如何将每个箱线图和/或每个图的最小值/最大值叠加到美国地图上?我知道这是基本大纲。

library(usmap)
plot_usmap(regions = c("states", "state", "counties", "county"),
include = c(), exclude = c(), data = data.frame(),
values = "values", labels = FALSE,
label_color = "black")
4

2 回答 2

2

错误应该很简单,因为您的全局环境中没有这样的对象。具体来说,没有将State分配为具有名为rate的元素的独立对象,以便能够调用State$rate相反,您在名为Staterate的数据框中有两个字段,您可以分别调用:marriagefine$Statemarriagefine$rate.

但是,支持根据数据参数boxplot中传递的项目数据框运行的公式样式。(以下仅使用帖子正文中发布的数据)

# BY YEAR
boxplot(rate ~ year, data = marriagefine, 
        main="Stats for Marriage Rates, 1990-2017", 
        xlab="States", ylab="Rates", 
        col=rainbow(length(2017:1990)))

箱线图(按年份)

# BY STATE
boxplot(rate ~ State, data = marriagefine, 
        main="Stats for Marriage Rates, 1990-2017", 
        xlab="States", ylab="Rates",  
        col=rainbow(length(unique(marriagefine$State))))

状态箱线图

Online Demo

于 2020-02-01T03:44:43.403 回答
1

这需要一个闪亮的解决方案:

lapply(c("shiny", "data.table", "ggplot2", "RColorBrewer", "ggrepel"),
    require, character.only = TRUE)

# mangle data
marriage <- fread("masters.csv", header = TRUE)
marriage <- melt(marriage, id.vars = "State")
marriage$variable <- as.numeric(as.character(marriage$variable ))
setnames(marriage, c("State", "year", "rate"))
marriage$State <- tolower(marriage$State)
states_map <- map_data("state")
marriage <- merge(data.table(data.frame(state.center), 
    state.abb, State=tolower(state.name)), marriage, by="State")

# pick fixed color palette
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_fill_gradientn(colours = myPalette(100), 
    limits = range(marriage$rate))

# Define UI
ui <- fluidPage(
    titlePanel("Marriage"),
    sidebarLayout(
        sidebarPanel(
            sliderInput("year", "Year", min(marriage$year), 
                max(marriage$year), value=min(marriage$year), step = 1)
        ),
        mainPanel(
            plotOutput(outputId = "box", height = "800px")
        )
    )
)

# Define server function
server <- function(input, output) {
    output$box <- renderPlot({
        req(input$year)
        DT <- marriage[year==input$year]
        ggplot(DT, aes(map_id = State)) +
            geom_map(aes(fill = rate), map = states_map) +
            expand_limits(x = states_map$long, y = states_map$lat) +
            sc +
            geom_text_repel(data=DT, aes(x=x, y=y, label = rate), size=10)
    })
}

# Create Shiny object
shinyApp(ui = ui, server = server)

回复请求:一个静态版本,有两个图,每个状态的最大值和最小值彼此相邻:

# Load packages
lapply(c("data.table", "ggplot2", "RColorBrewer", "ggrepel", "cowplot"),
    require, character.only = TRUE)

# mangle data
marriage <- fread("masters.csv", header = TRUE)
marriage <- melt(marriage, id.vars = "State")
marriage$variable <- as.numeric(as.character(marriage$variable ))
setnames(marriage, c("State", "year", "rate"))
marriage$State <- tolower(marriage$State)
states_map <- map_data("state")
marriage <- merge(data.table(data.frame(state.center), 
    state.abb, State=tolower(state.name)), marriage, by = "State")

# pick fixed color palette
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_fill_gradientn(colours = myPalette(100), 
    limits = range(marriage$rate))

# sort by State and rate
setkeyv(marriage, c("State", "rate"))

# pick year with largest and smallest rate (could be one of several)
DT.max <- marriage[, tail(.SD, 1), by = State]
DT.min <- marriage[, head(.SD, 1), by = State]

theme_set(theme_void())
# generate plot of maximum and minimum rates by State
p1 <- ggplot(DT.max, aes(map_id = State)) +
    geom_map(aes(fill = rate), map = states_map) +
    expand_limits(x = states_map$long, y = states_map$lat) +
    sc + 
    geom_text_repel(data=DT.max, aes(x=x, y=y, 
        label = paste0(rate, "\n(",year,")")), size=3.5) +
    ggtitle("Maximum marriage rate 1990-2017 \nby State (year measured)") +
    theme(plot.title = element_text(hjust = 0.5))

p2 <- ggplot(DT.min, aes(map_id = State)) +
    geom_map(aes(fill = rate), map = states_map) +
    expand_limits(x = states_map$long, y = states_map$lat) +
    sc + 
    geom_text_repel(data=DT.min, aes(x=x, y=y, 
        label = paste0(rate, "\n(",year,")")), size=3.5) +
    ggtitle("Minimum marriage rate 1990-2017 \nby State (year measured)") +
    theme(plot.title = element_text(hjust = 0.5))

# plot plots next to each other
cowplot::plot_grid(p1, p2, ncol=2)

于 2020-02-01T04:53:14.693 回答