0

我有一个包含 +- 38 个列表的列表。应该只选择几个(其余没有值,例如 NULL)。我想为这些列表制作一个很好的数据框。

我的清单:

structure(list(NULL, AFT = NULL, `AP-2` = NULL, `AT_hook, ETS` = NULL, 
    `BASIC, HLH` = NULL, BRIGHT = NULL, BRLZ = NULL, `BRLZ, BZIP_1, BZIP_2` = NULL, 
    bZIP = NULL, DWA = NULL, E2F_TDP = NULL, ETS = structure(list(
        MASHvstRap = 8.34818462488622e-05, MASHvsBEEML = 0.000250015234002341, 
        tRapvsBEEML = 8.80480124829088e-06, frequency = 10, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), FH = structure(list(
        MASHvstRap = 1.72864219357795e-05, MASHvsBEEML = 0.000840376826415137, 
        tRapvsBEEML = 2.54589884424594e-07, frequency = 10, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), GCM = NULL, 
    HLH = structure(list(MASHvstRap = 1.22573775496788e-08, MASHvsBEEML = 0.00119919900578073, 
        tRapvsBEEML = 3.60117573203279e-07, frequency = 13, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), HMG = structure(list(
        MASHvstRap = 6.07022175358029e-30, MASHvsBEEML = 0.0994358268075855, 
        tRapvsBEEML = 5.3728011843321e-09, frequency = 44, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), Homeo = structure(list(
        MASHvstRap = 4.33277656523673e-123, MASHvsBEEML = 0.442020719677047, 
        tRapvsBEEML = 8.44025048683083e-74, frequency = 158, 
        stringsAsFactors = 0), .Names = c("MASHvstRap", "MASHvsBEEML", 
    "tRapvsBEEML", "frequency", "stringsAsFactors"), row.names = c(NA, 
    -1L), class = "data.frame"), `Homeo ` = structure(list(MASHvstRap = 3.36388469632471e-14, 
        MASHvsBEEML = 0.763756578209722, tRapvsBEEML = 3.75944533892572e-07, 
        frequency = 19, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), `Homeo, PAX` = NULL, 
    `Homeo, POU` = structure(list(MASHvstRap = 3.06769943976602e-08, 
        MASHvsBEEML = 0.423594358667165, tRapvsBEEML = 7.51004008659922e-09, 
        frequency = 11, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), `HSF_DNA-bind` = NULL, 
    `HTH APSES-type` = NULL, IRF = structure(list(MASHvstRap = 1.25502843779857e-05, 
        MASHvsBEEML = 0.00094114146973297, tRapvsBEEML = 1.17030570144044e-06, 
        frequency = 10, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), MADS = NULL, 
    Myb = NULL, RFX = NULL, SAND = NULL, SANT = NULL, TBOX = NULL, 
    TBP = NULL, TEA = NULL, unknown = structure(list(MASHvstRap = 4.82890837154273e-32, 
        MASHvsBEEML = 0.0736357072352032, tRapvsBEEML = 7.20783906680568e-26, 
        frequency = 121, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), Zf_C2H2 = NULL, 
    Zf_GATA = NULL, Zn2Cys6 = structure(list(MASHvstRap = 4.71138538453502e-05, 
        MASHvsBEEML = 0.000623286035357452, tRapvsBEEML = 3.93333369828925e-07, 
        frequency = 17, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), ZnF_C2H2 = structure(list(
        MASHvstRap = 1.62205005760679e-17, MASHvsBEEML = 1.46483433509648e-08, 
        tRapvsBEEML = 2.89656372293867e-25, frequency = 54, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), ZnF_C4 = structure(list(
        MASHvstRap = 4.93181852868703e-06, MASHvsBEEML = 0.0467257430288347, 
        tRapvsBEEML = 6.69189512726035e-07, frequency = 10, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), ZnF_GATA = NULL), .Names = c("", 
"AFT", "AP-2", "AT_hook, ETS", "BASIC, HLH", "BRIGHT", "BRLZ", 
"BRLZ, BZIP_1, BZIP_2", "bZIP", "DWA", "E2F_TDP", "ETS", "FH", 
"GCM", "HLH", "HMG", "Homeo", "Homeo ", "Homeo, PAX", "Homeo, POU", 
"HSF_DNA-bind", "HTH APSES-type", "IRF", "MADS", "Myb", "RFX", 
"SAND", "SANT", "TBOX", "TBP", "TEA", "unknown", "Zf_C2H2", "Zf_GATA", 
"Zn2Cys6", "ZnF_C2H2", "ZnF_C4", "ZnF_GATA"))

如您所见,一些列表不包含任何值,因此应该删除。我想要的是一个有 4 列的数据框:family、method、p.value、frequency。在列表中,每个家庭都像这样提供它们:$Zn2Cys6

    MASHvstRap MASHvsBEEML  tRapvsBEEML frequency
1 4.711385e-05 0.000623286 3.933334e-07        17

所以 $zn2Cys6 是姓氏,应该添加在 MASHvstRap 前面。因此,该列表应该未列出并重新转换为具有以下列名称的数据框(family,method = 'MASHvstRAp',p.value,frequency。我尝试使用 lapply(rbind) 但这给了我一个奇怪的结构。cbind 和as.data.frame 也没有帮助我。

4

3 回答 3

7

如果structure您发布的帖子被命名ll,您可以执行以下操作:

> do.call("rbind",ll)
              MASHvstRap  MASHvsBEEML  tRapvsBEEML frequency
ETS         8.348185e-05 2.500152e-04 8.804801e-06        10
FH          1.728642e-05 8.403768e-04 2.545899e-07        10
HLH         1.225738e-08 1.199199e-03 3.601176e-07        13
HMG         6.070222e-30 9.943583e-02 5.372801e-09        44
Homeo      4.332777e-123 4.420207e-01 8.440250e-74       158
Homeo       3.363885e-14 7.637566e-01 3.759445e-07        19
Homeo, POU  3.067699e-08 4.235944e-01 7.510040e-09        11
IRF         1.255028e-05 9.411415e-04 1.170306e-06        10
unknown     4.828908e-32 7.363571e-02 7.207839e-26       121
Zn2Cys6     4.711385e-05 6.232860e-04 3.933334e-07        17
ZnF_C2H2    1.622050e-17 1.464834e-08 2.896564e-25        54
ZnF_C4      4.931819e-06 4.672574e-02 6.691895e-07        10

第一列是row.names(之前是每个列表项的名称)。

于 2013-06-20T10:27:57.340 回答
2

如果您将列表列表称为“pino”,则只需键入:

prova<-data.frame(matrix(unlist(pino),ncol=5,byrow=TRUE)[,1:4])
names(prova)<-c("MASHvstRap","MASHvsBEEML","tRapvsBEEML","frequency")

不过,可能有一个更通用的解决方案......

于 2013-06-20T10:28:49.990 回答
2

这是一个解决方案reshape2

tmp <- do.call(rbind, dat)[-5] # put data into one data frame
tmp$family = rownames(tmp)     # add column for 'family'

library(reshape2)
melt(tmp, measure.vars = names(tmp)[1:3], 
     variable.name = "method", value.name = "p.value")

结果:

   frequency     family      method       p.value
1         10        ETS  MASHvstRap  8.348185e-05
2         10         FH  MASHvstRap  1.728642e-05
3         13        HLH  MASHvstRap  1.225738e-08
4         44        HMG  MASHvstRap  6.070222e-30
5        158      Homeo  MASHvstRap 4.332777e-123
6         19     Homeo   MASHvstRap  3.363885e-14
7         11 Homeo, POU  MASHvstRap  3.067699e-08
8         10        IRF  MASHvstRap  1.255028e-05
9        121    unknown  MASHvstRap  4.828908e-32
10        17    Zn2Cys6  MASHvstRap  4.711385e-05
11        54   ZnF_C2H2  MASHvstRap  1.622050e-17
12        10     ZnF_C4  MASHvstRap  4.931819e-06
13        10        ETS MASHvsBEEML  2.500152e-04
14        10         FH MASHvsBEEML  8.403768e-04
15        13        HLH MASHvsBEEML  1.199199e-03
16        44        HMG MASHvsBEEML  9.943583e-02
17       158      Homeo MASHvsBEEML  4.420207e-01
18        19     Homeo  MASHvsBEEML  7.637566e-01
19        11 Homeo, POU MASHvsBEEML  4.235944e-01
20        10        IRF MASHvsBEEML  9.411415e-04
21       121    unknown MASHvsBEEML  7.363571e-02
22        17    Zn2Cys6 MASHvsBEEML  6.232860e-04
23        54   ZnF_C2H2 MASHvsBEEML  1.464834e-08
24        10     ZnF_C4 MASHvsBEEML  4.672574e-02
25        10        ETS tRapvsBEEML  8.804801e-06
26        10         FH tRapvsBEEML  2.545899e-07
27        13        HLH tRapvsBEEML  3.601176e-07
28        44        HMG tRapvsBEEML  5.372801e-09
29       158      Homeo tRapvsBEEML  8.440250e-74
30        19     Homeo  tRapvsBEEML  3.759445e-07
31        11 Homeo, POU tRapvsBEEML  7.510040e-09
32        10        IRF tRapvsBEEML  1.170306e-06
33       121    unknown tRapvsBEEML  7.207839e-26
34        17    Zn2Cys6 tRapvsBEEML  3.933334e-07
35        54   ZnF_C2H2 tRapvsBEEML  2.896564e-25
36        10     ZnF_C4 tRapvsBEEML  6.691895e-07
于 2013-06-20T10:36:10.610 回答