1

我只是在开始使用包之类的xtable或者`stargazer。Bellow 是一个样本数据集,原始数据集要大得多。

set.seed(1)
df <- data.frame(rep(
    sample(c(2012,2016),10, replace = T)),
    sample(c('Treat','Control'),10,replace = T),
    runif(10,0,1),
    runif(10,0,1),
    runif(10,0,1))

colnames(df) <- c('Year','Group','V1','V2','V3')

我想生成格式良好的表格,显示上述数据集的描述性统计信息。

但是,有没有一种方法xtable可以直接生成一个表格,我可以在其中看到整个数据集的统计数据,按组(治疗和控制)和年份(2012 年、2016 年)分开?或者甚至可以通过组合组和年份?

或者我应该根据这些设置过滤原始 df 并xtable在每个设置上运行?

我想要的另一件事是显示变量的中位数,而不是显示平均值,以及其他统计数据。有可能,还是我必须使用 R 手动计算?

任何考虑的解决方案stargazer都是有效的!

谢谢!

4

1 回答 1

5

下面是一些您可以在rmarkdown文档中执行的操作的示例,尽管您可以在格式化表格方面比我在这里做的更进一步。我已经包含了一些使用xtablekableExtra包的示例和几个示例,除非您已经熟练使用latex,否则与xtable. 有关更多信息,xtable请参阅小插图。kableExtra

为了汇总和重塑数据,我使用了dplyrandtidyr包中的函数(它们是tidyverse包套件的一部分)。您还可以使用基本 R 函数(例如aggregateandreshapedata.table包)进行汇总和重塑。

rmarkdown文档

---
title: "Tables"
author: "eipi10"
date: "7/7/2017"
output: pdf_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=FALSE, warning=FALSE)
library(xtable)
options(xtable.comment=FALSE, xtable.include.rownames=FALSE)
library(tidyverse)
```

```{r}
set.seed(1)
df <- data.frame(Year=rep(
    sample(c(2012,2016), 10, replace = T)),
    Group=sample(c('Treat','Control'),10,replace = T),
    V1=runif(10,0,1),
    V2=runif(10,0,1),
    V3=runif(10,0,1))
```

```{r, results="asis"}
# Mean by Year and Group
summary.table = df %>% 
               group_by(Year, Group) %>%
               summarise_all(funs(mean))

print(xtable(summary.table, 
             caption="Mean by Year and Group",
             digits=c(1,0,0,2,2,2)))
```

```{r, results="asis"}
# Median by Year and Group
summary.table = df %>% 
               group_by(Year, Group) %>%
               summarise_all(funs(median))

print(xtable(summary.table, 
             caption="Median by Year and Group",
             digits=c(1,0,0,2,2,2)))
```

```{r, results="asis"}
# Mean and Median by Year and Group
summary.table = df %>% 
               group_by(Year, Group) %>%
               summarise_all(funs(mean, median))

print(xtable(summary.table, 
             caption="Mean and Median by Year and Group",
             digits=c(1,0,0,rep(2,6))))
```

```{r}
# Create a function that takes the summary function calls as arguments
xtab_fnc = function(data, caption, ...) {

  # quosure to dispatch functions properly (see Programming with dplyr vignette)
  funcs = quos(...)

  summary.table = data %>% 
    group_by(Year, Group) %>%
    summarise_all(funs(!!!funcs))

  # Get rid of repeated years
  for(i in nrow(summary.table):2) {
    if(identical(summary.table$Year[i-1], summary.table$Year[i])) {
      summary.table$Year[i] = ""
    }
  }

  xtable(summary.table, 
         caption=caption,
         digits=c(1,0,0,rep(2,ncol(summary.table) - 2)))
}
```

```{r, results="asis"}
# Run the function
print(xtab_fnc(df, "Mean, Median, and Sum by Year and Group", mean, median, sum),
      size="scriptsize")
```

```{r, results="asis"}
# Run the function
print(xtab_fnc(df, "Mean by Year and Group", mean),
      size="large")
```

```{r}
# You can do more complex formatting in xtable, but it's probably easier with kableExtra
library(knitr)
library(kableExtra)
```

```{r}
summary.table = df %>% 
  group_by(Year, Group) %>%
  summarise_all(funs(mean,sd,min,median,max))

# Get rid of repeated years
for(i in nrow(summary.table):2) {
  if(identical(summary.table$Year[i-1], summary.table$Year[i])) {
    summary.table$Year[i] = ""
  }
}

# Get rid of "_mean", and "_median" in column names
names(summary.table) = gsub("_.*","",names(summary.table))

# LaTeX Table
kable(summary.table, format = "latex", 
      booktabs = T, caption = "kableExtra to format spanning columns",
      digits=c(0,0,rep(3,15))) %>%
  kable_styling(latex_options = c("striped", "hold_position", "scale_down"),
                full_width=F) %>%
  add_header_above(c("","","Mean"=3,"SD"=3,"Min"=3,"Median[note]"=3,"Max"=3)) %>%
  add_footnote(c("Note, means and medians are often the same with this data."))
```

```{r}
# Reshape table to turn V1-V3 into rows
summary.table = df %>% 
  group_by(Year, Group) %>%
  summarise_all(funs(mean,sd,min,median,max)) %>% 
  gather(key, value, -Year, -Group) %>%
  separate(key, into=c("var", "stat")) %>%
  unite(stat_Group, stat, Group) %>%
  spread(stat_Group, value) 

# Get rid of repeated years
for(i in nrow(summary.table):2) {
  if(identical(summary.table$Year[i-1], summary.table$Year[i])) {
    summary.table$Year[i] = ""
  }
}

names(summary.table) = gsub(".*_", "", names(summary.table))

# LaTeX Table
kable(summary.table, format = "latex", 
      booktabs = T, caption = "kableExtra to format spanning columns",
      digits=c(0,0,rep(3,10))) %>%
  kable_styling(latex_options = c("striped", "hold_position", "scale_down"),
                full_width=F) %>%
  add_header_above(c("","","Max"=2,"Mean"=2,"Median"=2,"Min"=2,"SD"=2)) 
```

```{r}
# Reshape table to turn V1-V3 into rows; also add a column summarizing all subjects
summary.table = df %>% 
  group_by(Year, Group) %>%
  summarise_all(funs(mean,sd,min,median,max)) %>% 
  bind_rows(df %>%                                              # bind_rows block adds the "All" column
              mutate(Group="All") %>%                           # | 
              group_by(Year, Group) %>%                         # |
              summarise_all(funs(mean,sd,min,median,max))) %>%  # |
  gather(key, value, -Year, -Group) %>%
  separate(key, into=c("var", "stat")) %>%
  unite(stat_Group, stat, Group) %>%
  spread(stat_Group, value) 

# Get rid of repeated years
for(i in nrow(summary.table):2) {
  if(identical(summary.table$Year[i-1], summary.table$Year[i])) {
    summary.table$Year[i] = ""
  }
}

names(summary.table) = gsub(".*_", "", names(summary.table))

# LaTeX Table
kable(summary.table, format = "latex", 
      booktabs = T, caption = "kableExtra to format spanning columns; also add an 'All' column",
      digits=c(0,0,rep(3,15))) %>%
  kable_styling(latex_options = c("striped", "hold_position", "scale_down"),
                full_width=F) %>%
  add_header_above(c("","","Max"=3,"Mean"=3,"Median"=3,"Min"=3,"SD"=3)) 
```

PDF 输出文件

在此处输入图像描述 在此处输入图像描述

于 2017-07-08T06:48:27.030 回答