1

我经常必须生成包含例如平均值、标准差和简单测试结果的表格。为了拥有可重现和可共享的工作流程,我尝试使用tables::tabular. (请参阅此处了解如何将测试作为函数包含在内。)

这有效:

nicetable <- tabular(sampling~treatment*var1*(mean+sd), data=tab)

但是,我未能定义一个函数,例如,配对 Wilcoxon 符号秩检验来比较多个抽样案例中的处理:似乎我未能将正确的参数或数据传递给函数。

比我还笨的人能帮忙吗?

如果您关心,这里有一些可重复性的数据:

structure(list(plot = structure(c(6L, 9L, 6L, 9L, 6L, 9L, 6L, 
9L, 12L, 15L, 12L, 15L, 12L, 15L, 12L, 15L, 5L, 16L, 5L, 16L, 
5L, 16L, 5L, 16L, 8L, 17L, 8L, 17L, 8L, 17L, 8L, 17L, 4L, 10L, 
4L, 10L, 4L, 10L, 4L, 10L, 2L, 11L, 2L, 11L, 2L, 11L, 2L, 11L, 
3L, 13L, 3L, 13L, 3L, 13L, 3L, 13L, 1L, 14L, 1L, 14L, 1L, 14L, 
1L, 14L, 24L, 19L, 24L, 19L, 24L, 19L, 24L, 19L, 22L, 23L, 22L, 
23L, 22L, 23L, 22L, 23L, 20L, 21L, 20L, 21L, 20L, 21L, 20L, 21L, 
7L, 18L, 7L, 18L, 7L, 18L, 7L, 18L), .Label = c("1", "2", "3", 
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", 
"16", "17", "18", "19", "20", "21", "22", "23", "24"), class = "factor"), 
    sampling = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 
    1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 
    1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 
    4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 
    4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 
    3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 
    3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L), .Label = c("1", 
    "2", "3", "4"), class = "factor"), pairs = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 
    6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 
    8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
    11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("pair_1", 
    "pair_10", "pair_11", "pair_12", "pair_2", "pair_3", "pair_4", 
    "pair_5", "pair_6", "pair_7", "pair_8", "pair_9"), class = "factor"), 
    treatment = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("A", 
    "B"), class = "factor"), var1 = c(4, 6, 21, 11, 6, 11, 21, 
    16, 2, 5, 18, 18, 8, 5, 26, 24, 0, 4, 28, 26, 7, 11, 20, 
    29, 1, 4, 17, 28, 20, 11, 20, 24, 11, 8, 19, 15, 11, 10, 
    16, 17, 7, 4, 18, 21, 6, 6, 18, 16, 7, 2, 17, 15, 3, 12, 
    18, 26, 8, 5, 23, 17, 9, 8, 24, 20, 7, 7, 1, 17, 9, 10, 7, 
    0, 0, 5, 18, 0, 8, 10, 15, 17, 5, 7, 24, 19, 13, 8, 20, 17, 
    1, 2, 22, 19, 6, 8, 17, 13), var2 = c(0.531857406453951, 
    0.99147016935005, 0.978386084625517, 0.547701177005542, 0.557590884845267, 
    0.986951076487171, 0.562417675727868, 0.986951076487171, 
    0.483984835736487, 0.726909676798849, 0.388579012270421, 
    0.745553604701919, 0.465094116634877, 0.726909676798849, 
    0.488003757207879, 0.557184406817338, 0.701676487711027, 
    0.869080260649975, 0.720173845681177, 0.750917673793786, 
    0.755303408639525, 0.506987878760014, 0.686245881868453, 
    0.60763119427203, 0.548453587721443, 0.703832816328718, 0.412731402996848, 
    0.717973047643672, 0.550210159561483, 0.671791216125084, 
    0.361548563337832, 0.606668062640702, 0.518806412571116, 
    0.742554357381421, 0.507677339941509, 0.923200219631054, 
    0.341071242549443, 0.681636160803754, 0.384435345144425, 
    0.61998338971563, 0.557812388143911, 0.632317782224629, 0.603677751166685, 
    0.632317782224629, 0.624604514381939, 0.623183042284434, 
    0.589665731283708, 0.338738325837909, 0.448751068565499, 
    0.620695986589587, 0.412147458001507, 0.354008373981433, 
    0.444023865279733, 0.366742726110414, 0.368307839974067, 
    0.338054566392881, 0.492950438718815, 0.722825772176568, 
    0.529502336899605, 0.834207208644564, 0.523569852219379, 
    0.834207208644564, 0.591655754114154, 0.725359004030846, 
    0.604856790039767, 0.787389376103932, 0.491331714116263, 
    0.828838159960298, 0.506594233666576, 0.75537998521935, 0.477785779781003, 
    0.925304881641062, 0.425400499022199, 0.537980402016095, 
    0.443113792876767, 0.991210220561304, 0.366372451776005, 
    0.585051630458758, 0.363869227771921, 0.67007984346546, 0.37054162796269, 
    0.574771389575503, 0.446535654066238, 0.700306153200489, 
    0.358793598876081, 0.309159322200134, 0.372983177758783, 
    0.353384010493424, 0.492456412584678, 0.359873708654463, 
    0.436447650900556, 0.591291884661869, 0.436447650900556, 
    0.603360031882414, 0.453002902987777, 0.370462648444931)), .Names = c("plot", 
"sampling", "pairs", "treatment", "var1", "var2"), row.names = c(NA, 
96L), class = "data.frame")
4

1 回答 1

2

如果您展示了您希望能够使用的语法以及您希望输出的样子,这可能会有所帮助。当前工作方式的问题tabular在于它将单个向量(正在处理的与表的当前单元格相对应的数据列的子集)传递给汇总函数,并且没有其他参数。但是Wilcox.test和其他测试函数需要多个参数,这些参数tabular目前无法传递。

添加了计算百分比的可能性(Percent在公式中使用),因此对作者/维护者的功能请求可能会在未来产生类似的测试可能性,但这将更加复杂,因为所有可能的参数都将被传递在。

您可以使用硬编码的额外参数创建自己的函数,如上一个答案中那样,但这可能会导致尝试维护可重现和可共享的工作流时遇到困难。

另一种选择是预先计算 p 值和任何其他感兴趣的摘要并将它们附加到数据框,然后在公式中包含一个术语以获取计算列的第一个元素。这是一个示例,用于ddply进行计算并返回一个增强的数据框,然后调用tabular显示:

library(plyr)

tab2 <- ddply(tab, .(sampling), function(df) {
    x <- df$var1
    g <- df$treatment
    df$P.value <- wilcox.test(x[g=='A'], x[g=='B'], paired=TRUE)$p.value
    df$diff <- x[g=='A'] - x[g=='B']
    df
})

` ` <- function(x) x[1]

tabular(sampling ~ treatment*var1*(mean+sd) + 
     ` `*diff +(Wilcoxon = ` `*P.value), data=tab2)

它不如只在公式中添加一个术语那么好tabular,但您可以准确控制要计算的内容,然后显示。

于 2014-07-22T16:27:05.430 回答