3

我正在从具有两列的 mysql 加载数据:id,rt

id 在实践中对应于许多 rts(我设计不佳的表):

      id   rt
 1 5129052 2  
 2 5129052 2
 3 5129052 5
 4 5129052 6
 5 7125052 0
 6 7125052 1
 7 7125052 2
 8 7125052 4
 9 7125052 6
10 7125052 7

我想创建一个如下所示的数据透视表。第一列只是行数,没什么特别的。

     5129052 7125052
  1    2       0
  2    2       1
  3    5       2
  4    6       4
  5   NA       6
  6   NA       7

如果可能,还按升序对值进行排序。

谢谢!

4

5 回答 5

5

你的答案必须是一个矩阵吗?因为在这种情况下,矩阵对我来说没有多大意义。似乎列表将是一个更实用的解决方案,允许rt每个id. 例如:

lapply(split(df$rt, df$id), sort)
于 2013-06-18T21:00:00.283 回答
2

使用 Jean V.Adams 提供的内容,您可以使用以下内容创建数据框。

 dat <- read.table(text= "  id   rt
 1 5129052 2  
 2 5129052 2
 3 5129052 5
 4 5129052 6
 5 7125052 0
 6 7125052 1
 7 7125052 2
 8 7125052 4
 9 7125052 6
 10 7125052 7",header=TRUE,sep="")

 tmp <- split(dat$rt,dat$id,sort)
 res <- sapply(tmp,function(x) { c(x,rep(NA,maxl - length(x)))})
于 2013-06-19T08:21:07.700 回答
2

抱歉,但我发现这里的大多数答案都有些矫枉过正。这里还有两个建议。两者都依赖于创建一个辅助“id”,它表示现有 id 存在的值的数量。

## Create a secondary "id"
df$id2 <- ave(as.character(df$id), df$id, FUN = seq_along)
df  ## Your new "df"
        id rt id2
1  5129052  2   1
2  5129052  2   2
3  5129052  5   3
4  5129052  6   4
5  7125052  0   1
6  7125052  1   2
7  7125052  2   3
8  7125052  4   4
9  7125052  6   5
10 7125052  7   6

选项 1:基础 Rreshape

如果需要,只需清理变量名,就可以了。

reshape(df, direction = "wide", idvar = "id2", timevar = "id")
   id2 rt.5129052 rt.7125052
1    1          2          0
2    2          2          1
3    3          5          2
4    4          6          4
9    5         NA          6
10   6         NA          7

选项 2:dcast来自“reshape2”

更清晰的语法和更清晰的输出。

library(reshape2)
dcast(df, id2 ~ id, value.var="rt")
  id2 5129052 7125052
1   1       2       0
2   2       2       1
3   3       5       2
4   4       6       4
5   5      NA       6
6   6      NA       7

至于你的奖金问题?这些解决方案都输出常规data.frame,因此write.csv可以直接在它们上使用。

于 2013-07-10T17:14:01.953 回答
1

您可以 cbind 列表的元素:

# Loading dataset
df <- structure(list(id = c(5129052L, 5129052L, 5129052L, 5129052L, 
7125052L, 7125052L, 7125052L, 7125052L, 7125052L, 7125052L), 
    rt = c(2L, 2L, 5L, 6L, 0L, 1L, 2L, 4L, 6L, 7L)), .Names = c("id", 
"rt"), class = "data.frame", row.names = c(NA, -10L))

# cbind the list output
do.call(cbind,split(df$rt, df$id))

#Output: the values of the first list are recycled instead of NAs 
#      5129052 7125052
#[1,]       2       0
#[2,]       2       1
#[3,]       5       2
#[4,]       6       4
#[5,]       2       6
#[6,]       2       7

# A.N. Spiess wrote a cbind.na function http://rmazing.wordpress.com/2012/06/19/dont-fill-me-up/#comments
    cbind.na <- function (..., deparse.level = 1)
{
 na <- nargs() - (!missing(deparse.level))
 deparse.level <- as.integer(deparse.level)
 stopifnot(0 <= deparse.level, deparse.level <= 2)
 argl <- list(...)
 while (na > 0 && is.null(argl[[na]])) {
 argl <- argl[-na]
 na <- na - 1
 }
 if (na == 0)
 return(NULL)
 if (na == 1) {
 if (isS4(..1))
 return(cbind2(..1))
 else return(matrix(...)) ##.Internal(cbind(deparse.level, ...)))
 }
 if (deparse.level) {
 symarg <- as.list(sys.call()[-1L])[1L:na]
 Nms <- function(i) {
 if (is.null(r <- names(symarg[i])) || r == "") {
 if (is.symbol(r <- symarg[[i]]) || deparse.level ==
 2)
 deparse(r)
 }
 else r
 }
 }
 ## deactivated, otherwise no fill in with two arguments
 if (na == 0) {
 r <- argl[[2]]
 fix.na <- FALSE
 }
 else {
 nrs <- unname(lapply(argl, nrow))
 iV <- sapply(nrs, is.null)
 fix.na <- identical(nrs[(na - 1):na], list(NULL, NULL))
 ## deactivated, otherwise data will be recycled
 #if (fix.na) {
 # nr <- max(if (all(iV)) sapply(argl, length) else unlist(nrs[!iV]))
 # argl[[na]] <- cbind(rep(argl[[na]], length.out = nr),
 # deparse.level = 0)
 #}
 if (deparse.level) {
 if (fix.na)
 fix.na <- !is.null(Nna <- Nms(na))
 if (!is.null(nmi <- names(argl)))
 iV <- iV & (nmi == "")
 ii <- if (fix.na)
 2:(na - 1)
 else 2:na
 if (any(iV[ii])) {
 for (i in ii[iV[ii]]) if (!is.null(nmi <- Nms(i)))
 names(argl)[i] <- nmi
 }
 }

 ## filling with NA's to maximum occuring nrows
 nRow <- as.numeric(sapply(argl, function(x) NROW(x)))
 maxRow <- max(nRow, na.rm = TRUE)
 argl <- lapply(argl, function(x) if (is.null(nrow(x))) c(x, rep(NA, maxRow - length(x)))
 else rbind.na(x, matrix(, maxRow - nrow(x), ncol(x))))
 r <- do.call(cbind, c(argl[-1L], list(deparse.level = deparse.level)))
 }
 d2 <- dim(r)
 r <- cbind2(argl[[1]], r)
 if (deparse.level == 0)
 return(r)
 ism1 <- !is.null(d1 <- dim(..1)) && length(d1) == 2L
 ism2 <- !is.null(d2) && length(d2) == 2L && !fix.na
 if (ism1 && ism2)
 return(r)
 Ncol <- function(x) {
 d <- dim(x)
 if (length(d) == 2L)
 d[2L]
 else as.integer(length(x) > 0L)
 }
 nn1 <- !is.null(N1 <- if ((l1 <- Ncol(..1)) && !ism1) Nms(1))
 nn2 <- !is.null(N2 <- if (na == 2 && Ncol(..2) && !ism2) Nms(2))
 if (nn1 || nn2 || fix.na) {
 if (is.null(colnames(r)))
 colnames(r) <- rep.int("", ncol(r))
 setN <- function(i, nams) colnames(r)[i] <<- if (is.null(nams))
 ""
 else nams
 if (nn1)
 setN(1, N1)
 if (nn2)
 setN(1 + l1, N2)
 if (fix.na)
 setN(ncol(r), Nna)
 }
 r
}

# Which can be used to produce the desired output
do.call(cbind.na,split(df$rt, df$id))

#     5129052 7125052
#[1,]       2       0
#[2,]       2       1
#[3,]       5       2
#[4,]       6       4
#[5,]      NA       6
#[6,]      NA       7
于 2013-06-19T06:18:57.550 回答
0

非常丑陋但可行的解决方案:

> dput(df)
structure(list(id = c(5129052L, 5129052L, 5129052L, 5129052L, 
7125052L, 7125052L, 7125052L, 7125052L, 7125052L, 7125052L), 
    rt = c(2L, 2L, 5L, 6L, 0L, 1L, 2L, 4L, 6L, 7L)), .Names = c("id", 
"rt"), class = "data.frame", row.names = c("1", "2", "3", "4", 
"5", "6", "7", "8", "9", "10"))

> df
        id rt
1  5129052  2
2  5129052  2
3  5129052  5
4  5129052  6
5  7125052  0
6  7125052  1
7  7125052  2
8  7125052  4
9  7125052  6
10 7125052  7

计算条目:

t1 = table(df$id)
> t1

5129052 7125052 
      4       6

初始化矩阵:

   foo = matrix(NA,max(t1),length(t1))

填充矩阵:

for (x in names(t1)){foo[1:t1[x],x] = sort(df$rt[df$id==x])}

> foo
     5129052 7125052
[1,]       2       0
[2,]       2       1
[3,]       5       2
[4,]       6       4
[5,]      NA       6
[6,]      NA       7
于 2013-06-18T20:51:42.580 回答