2

在我做的最后一个问题中,他们指出作为可重现示例的一部分,较少的数据将易于阅读和理解。在再次询问的路上,我试图通过缩短数据,dput(head(data))但我得到的结果和我做的一样,dput(data)甚至dput(data[1:6, ])dput(data)[1:6, ](在最后一种情况下,我还得到了整个数据的前 6 行dput

有没有简单的方法来做到这一点?在dput选项中我没有找到任何东西,必须有一个解决方案来避免手动删除我不想显示的内容。

这是整个输入数据:

>dput(data)
structure(list(GOterm = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 
47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 
60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 71L, 72L, 76L, 77L, 78L, 
83L, 87L, 88L, 89L, 93L, 96L, 97L, 101L, 103L, 104L, 105L, 106L, 
109L, 111L, 113L, 114L, 116L), .Label = c("GO:0000746", "GO:0000910", 
"GO:0006091", "GO:0006259", "GO:0006351", "GO:0006399", "GO:0006412", 
"GO:0006457", "GO:0006464", "GO:0006468", "GO:0006486", "GO:0006520", 
"GO:0006725", "GO:0006766", "GO:0006810", "GO:0006811", "GO:0006839", 
"GO:0006897", "GO:0006950", "GO:0006970", "GO:0006974", "GO:0006979", 
"GO:0006986", "GO:0006997", "GO:0007005", "GO:0007010", "GO:0007029", 
"GO:0007031", "GO:0007033", "GO:0007034", "GO:0007049", "GO:0007059", 
"GO:0007114", "GO:0007124", "GO:0007126", "GO:0007165", "GO:0009408", 
"GO:0009409", "GO:0015031", "GO:0016044", "GO:0016050", "GO:0016070", 
"GO:0016071", "GO:0016072", "GO:0016192", "GO:0016567", "GO:0016568", 
"GO:0016570", "GO:0019725", "GO:0030435", "GO:0031505", "GO:0032196", 
"GO:0032989", "GO:0042221", "GO:0042254", "GO:0042594", "GO:0043543", 
"GO:0044255", "GO:0044257", "GO:0044262", "GO:0045333", "GO:0046483", 
"GO:0048193", "GO:0051169", "GO:0051186", "GO:0051276", "GO:0070271", 
"GO:0000278", "GO:0000902", "GO:0002181", "GO:0005975", "GO:0006325", 
"GO:0006353", "GO:0006360", "GO:0006366", "GO:0006383", "GO:0006397", 
"GO:0006401", "GO:0006414", "GO:0006418", "GO:0006470", "GO:0006605", 
"GO:0006629", "GO:0006865", "GO:0006869", "GO:0006873", "GO:0006887", 
"GO:0006914", "GO:0008033", "GO:0008213", "GO:0008643", "GO:0009311", 
"GO:0009451", "GO:0015931", "GO:0016197", "GO:0023052", "GO:0031399", 
"GO:0032543", "GO:0042255", "GO:0042273", "GO:0042274", "GO:0043144", 
"GO:0043934", "GO:0045454", "GO:0051052", "GO:0051321", "GO:0051603", 
"GO:0051604", "GO:0051726", "GO:0055086", "GO:0070647", "GO:0000054", 
"GO:0001403", "GO:0006352", "GO:0006354", "GO:0006364", "GO:0006413", 
"GO:0006417", "GO:0006497", "GO:0008380", "GO:0009072", "GO:0051049", 
"GO:0061025", "GO:0071554"), class = "factor"), GOdesc = structure(c(16L, 
17L, 23L, 19L, 58L, 62L, 59L, 37L, 39L, 40L, 38L, 3L, 4L, 67L, 
60L, 27L, 30L, 20L, 51L, 48L, 46L, 49L, 52L, 33L, 29L, 18L, 21L, 
34L, 64L, 63L, 2L, 14L, 1L, 43L, 28L, 56L, 47L, 45L, 41L, 9L, 
65L, 54L, 31L, 55L, 66L, 42L, 12L, 26L, 7L, 57L, 22L, 61L, 6L, 
44L, 53L, 50L, 35L, 8L, 10L, 5L, 11L, 25L, 24L, 32L, 15L, 13L, 
36L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA), .Label = c("cell budding", "cell cycle", 
"cellular amino acid and metabolic process", "cellular aromatic compound metabolic process", 
"cellular carbohydrate metabolic process", "cellular component morphogenesis", 
"cellular homeostasis", "cellular lipid metabolic process", "cellular membrane organization", 
"cellular protein catabolic process", "cellular respiration", 
"chromatin modification", "chromosome organization and biogenesis", 
"chromosome segregation", "cofactor metabolic process", "conjugation", 
"cytokinesis", "cytoskeleton organization and biogenesis", "DNA metabolic process", 
"endocytosis", "ER organization and biogenesis", "fungal-type cell wall organization", 
"generation of precursor metabolites and energy", "golgi vesicle transport", 
"heterocycle metabolic process", "histone modification", "ion transport", 
"meiosis", "mitchondrion organization", "mitochondrial transport", 
"mRNA metabolic process", "nuclear transport", "nucleus organization", 
"peroxisome organization", "protein acylation", "protein complex biogenesis", 
"protein folding", "protein glycosylation", "protein modification process", 
"protein phosphorylation", "protein transport", "protein ubiquitination", 
"pseudohyphal growth", "response to chemical stimulus", "response to cold", 
"response to DNA damage stimulus", "response to heat", "response to osmotic stress", 
"response to oxidative stress", "response to starvation", "response to stress", 
"response to unfolded protein", "ribosome biogenesis", "RNA metabolic process", 
"rRNA metabolic process", "signal transduction", "sporulation resulting in formation of a cellular spore", 
"transcription", "translation", "transport", "transposition", 
"tRNA metabolic process", "vacuolar transport", "vacuole organizations", 
"vesicle organization", "vesicle-mediated transport", "vitamin metabolic process"
), class = "factor"), GSA_p33_SC = c(NA, -1, NA, NA, NA, NA, 
NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, -1, NA, NA, 
-1, -1, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA), GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA, NA, NA, 
NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, 1, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 
NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, 
NA), GSA_p38_SC = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
1, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
NA, NA, NA, -1, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA), GSA_p38_X33 = c(NA, 
1, NA, NA, NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 1, 
1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, -1, NA, NA, 1, NA, NA), GSA_p52_SC = c(NA, NA, NA, NA, 
NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, 
-1, -1, NA, NA, NA), GSA_p52_X33 = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 
NA, -1, NA, 1, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, -1, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, -1, NA, 
NA, NA, NA), GSA_p64_SC = c(NA, NA, NA, NA, NA, NA, NA, 1, NA, 
NA, 1, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
1, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, -1, NA, -1, -1, 
NA, NA, NA, -1, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, -1, 1, 
-1, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA
), GSA_p64_X33 = c(1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 1, NA, NA, 
NA, NA, NA, NA, -1, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, -1, -1), GSA_SC_X33 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, 
NA, NA, NA, NA, NA, NA, -1, NA, 1, NA, NA, NA, NA, NA, NA, 1, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, 
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, 
1, NA, NA, 1, -1, NA, -1, NA, NA, NA, -1, 1, NA, NA, NA, NA, 
NA, -1, NA, NA, NA, NA, NA, NA)), .Names = c("GOterm", "GOdesc", 
"GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", 
"GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA, 
-89L), class = "data.frame")

一个缩短的版本可能是这样的:

structure(list(GOterm = structure(c(1L, 2L, 3L, 4L, 5L, 6L),
.Label = c("GO:0000746", "GO:0000910", "GO:0006091", "GO:0006259",
 "GO:0006351", "GO:0006399"), class = "factor"),
 GOdesc = structure(c(16L,17L, 23L, 19L, 58L, 62L),
.Label = c("cell budding", "cell cycle", 
    "cellular amino acid and metabolic process", "cellular aromatic compound
 metabolic process", "cellular carbohydrate metabolic process", "cellular
component morphogenesis"), class = "factor"),
GSA_p33_SC = c(NA, -1, NA, NA, NA, NA),
GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA), 
GSA_p38_SC = c(NA, NA, NA, NA, NA, NA), 
GSA_p38_X33 = c(NA, 1, NA, NA, NA, NA), 
GSA_p52_SC = c(NA, NA, NA, NA, NA, NA), 
GSA_p52_X33 = c(NA, NA, NA, NA, NA, NA),
GSA_p64_SC = c(NA, NA, NA, NA, NA, NA),
GSA_p64_X33 = c(1, NA, NA, NA, NA, NA),
GSA_SC_X33 = c(NA, NA, NA, NA, NA, NA)),
.Names = c("GOterm", "GOdesc", 
    "GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", 
    "GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA, 
    -6L), class = "data.frame"))
4

2 回答 2

3

所有这些额外的恐惧都来自你的factor水平。如果您知道在降低这些级别后您的问题仍然可以重现,那么您可以考虑(等待它)droplevels

> dput(droplevels(head(data)))
structure(list(GOterm = structure(1:6, .Label = c("GO:0000746", 
"GO:0000910", "GO:0006091", "GO:0006259", "GO:0006351", "GO:0006399"
), class = "factor"), GOdesc = structure(c(1L, 2L, 4L, 3L, 5L, 
6L), .Label = c("conjugation", "cytokinesis", "DNA metabolic process", 
"generation of precursor metabolites and energy", "transcription", 
"tRNA metabolic process"), class = "factor"), GSA_p33_SC = c(NA, 
-1, NA, NA, NA, NA), GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA), 
    GSA_p38_SC = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), GSA_p38_X33 = c(NA, 1, NA, NA, NA, NA), GSA_p52_SC = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p52_X33 = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p64_SC = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p64_X33 = c(1, 
    NA, NA, NA, NA, NA), GSA_SC_X33 = c(NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_)), .Names = c("GOterm", "GOdesc", 
"GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", 
"GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA, 
6L), class = "data.frame") 

这在以下示例中更容易证明:

x <- factor("A", levels = LETTERS)
x
# [1] A
# Levels: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
dput(x)
# structure(1L, .Label = c("A", "B", "C", "D", "E", "F", "G", "H", 
# "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", 
# "V", "W", "X", "Y", "Z"), class = "factor")
dput(droplevels(x))
# structure(1L, .Label = "A", class = "factor")
于 2013-11-04T12:08:53.583 回答
0

另一种缩短它的方法是将列转换为characterbefore dput。然后可以读回数据as.data.frame并保留因子级别。

第一个子集

> data2 <- data[sample(nrow(data), 4), ]

然后dput作为字符

> d <- dput(lapply(data2, as.character))
structure(list(GOterm = c("GO:0000746", "GO:0070647", "GO:0006914", 
"GO:0007010"), GOdesc = c("conjugation", NA, NA, "cytoskeleton organization and biogenesis"
), GSA_p33_SC = c(NA_character_, NA_character_, NA_character_, 
NA_character_), GSA_p33_X33 = c(NA, NA, "1", "1"), GSA_p38_SC = c(NA_character_, 
NA_character_, NA_character_, NA_character_), GSA_p38_X33 = c(NA_character_, 
NA_character_, NA_character_, NA_character_), GSA_p52_SC = c(NA, 
"-1", NA, NA), GSA_p52_X33 = c(NA, NA, NA, "1"), GSA_p64_SC = c(NA, 
NA, NA, "1"), GSA_p64_X33 = c("1", NA, NA, NA), GSA_SC_X33 = c(NA, 
NA, NA, "1")), .Names = c("GOterm", "GOdesc", "GSA_p33_SC", "GSA_p33_X33", 
"GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", "GSA_p52_X33", "GSA_p64_SC", 
"GSA_p64_X33", "GSA_SC_X33"))

并读回

> as.data.frame(d)
于 2014-08-02T05:53:09.130 回答