1

我尝试了几种方法来做这件事,但都没有成功。基本上我有一个巨大的数据框,有 6 列和 +8000 行。第一列有一个 2 码字母(例如 AA,然后 AB,等等...)。第二列有一些其他 4 列共有的数值(例如,AA 为 0 到 180,AB 为 0 到 170,等等)。其他列是值。

这是我的 DataFrame 的摘录:

 structure(list(X2code = c("AA", "AA", "AA", "AA", "AA", "AA", 
"AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", 
"AA", "AA", "AA", "AA", "AA", "AA", "AB", "AB", "AB", "AB", "AB", 
"AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", 
"AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", 
"AB", "AB", "AB", "AB", "AB", "AB", "AC", "AC", "AC", "AC", "AC", 
"AC", "AC", "AC", "AC", "AC", "AC", "AC", "AC", "AC", "AC", "AC"
), DOY = c(294L, 295L, 296L, 297L, 298L, 299L, 300L, 301L, 302L, 
303L, 304L, 305L, 306L, 307L, 308L, 309L, 310L, 311L, 312L, 313L, 
314L, 315L, 316L, 294L, 295L, 296L, 297L, 298L, 299L, 300L, 301L, 
302L, 303L, 304L, 305L, 306L, 307L, 308L, 309L, 310L, 311L, 312L, 
313L, 314L, 315L, 316L, 317L, 318L, 319L, 320L, 321L, 322L, 323L, 
324L, 325L, 326L, 168L, 169L, 170L, 171L, 172L, 173L, 174L, 175L, 
176L, 177L, 178L, 179L, 180L, 181L, 182L, 183L), WDrain = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 
0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 
0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 0.13, 
0.13, 0.13, 0.13, 0.13, 244.1, 244.1, 244.1, 244.1, 244.1, 244.1, 
244.1, 244.1, 244.1, 244.1, 244.1, 244.1, 244.1, 244.1, 244.1, 
244.1), CumET = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3.34, 4.75, 5.33, 
6.67, 7.41, 7.84, 8.78, 9.59, 10.47, 10.57, 11.17, 11.91, 12.07, 
12.48, 12.63, 12.88, 13.8, 14.56, 15.11, 15.43, 15.86, 16.66, 
17.27, 17.54, 18.21, 18.64, 18.75, 19.11, 19.2, 19.85, 20.48, 
21.02, 21.32, 222, 226, 233, 241, 250, 258, 265, 269, 271, 276, 
279, 281, 281, 283, 285, 288), SoilAvW = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, 246.51, 245.1, 249.52, 248.68, 248.04, 247.44, 246.5, 
245.69, 244.81, 244.71, 244.11, 243.37, 243.3, 242.88, 242.83, 
242.58, 241.66, 241, 243.65, 243.5, 243.36, 242.65, 249.03, 250.74, 
253.05, 266.21, 270.28, 279.71, 287.9, 288.84, 288.69, 288.25, 
295.13, 330.2, 326, 319.5, 311.2, 302.8, 294.4, 287.7, 287.2, 
285, 280.4, 278.6, 276, 282.3, 286.5, 284.1, 281.5), Runoff = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L), Transp = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.01, 0.01, 0.01, 
0.01, 0.01, 0.02, 0.02, 0.02, 0.02, 0.03, 0.03, 0.03, 0.03, 0.04, 
0.04, 0.04, 0.04, 0.04, 0.05, 0.05, 105.7, 109.1, 114.5, 121.5, 
128.9, 136.2, 141.9, 145.4, 147.1, 150.7, 152.9, 154.8, 155, 
155.8, 157.9, 159.9)), .Names = c("X2code", "DOY", "WDrain", 
"CumET", "SoilAvW", "Runoff", "Transp"), class = "data.frame", row.names = c(NA, 
-72L))

我想做的是创建 4 个新数据帧,在每个数据帧中,我想以 AA、AB 等将成为新列的方式打破长列。除了值旁边,我想要每个 2 代码附带的第 2 列的值。例如,AA 将具有列 0 到 180,然后是值,AB 将具有列值 0 到 170,然后是其值。此外,在列的顶部,我想为两列添加 2 个字母代码。这是我想要的数据框示例(例如,这是用于 value1)。

AA  AA  AB  AB  AC  AC

0   2   0   0.5 0   50
1   2.4 1   1.6 1   0.6
2   5   2   4.6 ..  ..
3   6.7 3   2   ..  ..
..  ..  ..  ..      
..  ..  170 70      
180 10          

下面是我的部分代码:

  setwd("C:\\.....")

    my.data <- read.table("my.data.txt", header=T, na.strings = c("na" , "n/a" , "NA" , "") , stringsAsFactors = F)

   for(id in my.data$2.code)
     {

# here I take the columns 2 to 3 in the data, the first col is the ID that is used                                                 # to let R understand that for each ID needs to do a certain operation

       data.code <- my.data[my.data$2.code == id, 2:3] 

  # And now my is my problems... I tried with DOBY, and other operations but I could not do what I want!!  
       write.table(....., "trials.txt", quote=F, col.names=FALSE, row.names= F)


           }
4

1 回答 1

2

我建议保留您的 data.frame 原样,因为它是在 R 中进行进一步处理的最明智的格式。但是,这里有一些可能性,它们不能完全实现您想要的输出(不容易实现),但可能仍然是有帮助。

library(reshape2)
WDrain.wide <- dcast(DF,DOY~X2code,value.var='WDrain')

library(plyr)
WDrain.strange <- dlply(DF,.(X2code),function(x) x[,2:3])

或者按照@SimonO101 的建议:

split(DF[,-1],DF[,1])
于 2013-04-19T15:45:10.317 回答