2

我有以下代码。

completemodel <- function(model, colnum)
{
  modlst = c()
  tuplenum = length(model)
  if(tuplenum != 0)
    for(i in 1:tuplenum)
      modlst = c(modlst, model[[i]])
  index = seq(0, colnum-1)
  inddiff = setdiff(index, modlst)
  inddifflen = length(inddiff)
  for(i in seq(length.out=inddifflen))
    model = append(model, inddiff[i])
  return(model)
}

##   Calculate number of parameters in model.
numparam <- function(mod, colnum)
  {
    library(RJSONIO)
    mod = fromJSON(mod)
    mod = completemodel(mod, colnum)
    totnum = 0
    for(tup in mod)
      totnum = totnum +(4**length(tup))
    return(totnum)
  }

x = cbind.data.frame(rownum=c(100, 100), colnum=c(10, 20), modeltrue=c("[]", "[]"), modelresult=c("[[1,2]]","[[1,3]]"), stringsAsFactors=FALSE)

> x
  rows colnum modeltrue modelresult
1  100     10        []     [[1,2]]
2  100     20        []     [[1,3]]

我该如何操作x才能给我一个如下所示的数据框?numparam("[]", 10)当然,这里我的意思是当我写的时候 eg 的值numparam("[]", 10)

  rownum   colnum    numparam_modeltrue   numparam_modelresult
  100        10      numparam("[]", 10)   numparam("[[1,2]]", 10)
  100        20      numparam("[]", 20)   numparam("[[1,3]]", 20)

某些版本的 apply 函数可能会起作用,但我在找到合适的公式时遇到了问题。

更新:似乎如果rownnum,colnum元组不是唯一的,那么可以执行以下操作。

x = cbind.data.frame(id=c(1, 2, 3), rownum=c(100, 100, 100), colnum=c(10, 20, 20), modeltrue=c("[]", "[]", "[]"),
  modelresult=c("[[1,2]]","[[1,3]]","[[1,3, 4]]"), stringsAsFactors = FALSE)

##Then, create a data.table and set the key

library(data.table)
xDT <- as.data.table(x)
setkeyv(xDT, c("id", "rownum", "colnum")

那是正确的方法吗?

4

3 回答 3

3

如果你对它持开放态度,你可以使用这个data.table包。

首先,创建一个data.table,添加一个唯一标识符列id并将其设置为键

library(data.table)
xDT <- as.data.table(x)
xDT[, id := seq_len(nrow(xDT))]
setkey(xDT, "id")

然后,使用do.call,您可以numparam在适当的列上运行您的函数:

res1 <- xDT[, list(numparam_modeltrue = do.call(numparam, unname(.SD))),
  .SDcols = c(3, 2), by = key(xDT)]
res2 <- xDT[, list(numparam_modelresult = do.call(numparam, unname(.SD))),
  .SDcols = c(4, 2), by = key(xDT)]

然后将结果组合成一个data.table

xDT[res1][res2][, c("modeltrue", "modelresult") := NULL, with = FALSE]
   id rownum colnum numparam_modeltrue numparam_modelresult
1:  1    100     10                 40                   48
2:  2    100     20                 80                   88

编辑:

正如 Matthew Dowle 所建议的,您可以通过以下方式在最后没有 mrege 的情况下达到相同的结果:

xDT[,numparam_modeltrue := do.call(numparam, unname(.SD)),
  .SDcols = c(3, 2), by = key(xDT)]
xDT[,numparam_modelresult := do.call(numparam, unname(.SD)),
  .SDcols = c(4, 2), by = key(xDT)]

如果你想摆脱列modeltruemodelresult

xDT[,c("modeltrue", "modelresult") := NULL, with = FALSE]
# NOTE that with = FALSE shouldn't be necessary with data.table 1.8.3
# But I'm still with 1.8.2
于 2012-10-09T10:46:31.653 回答
1

下面的代码排序的作品。不过,它不是很漂亮。欢迎提出改进建议。特别是,不必转置矩阵并添加列名会很好,而且由于它返回一个矩阵,因此仍然存在整数转换为字符串的烦人问题。感谢flodel 提供的关于他对“将参数从矩阵的每一行传递给函数”的回答的提示。

completemodel <- function(model, colnum)
{
  modlst = c()
  tuplenum = length(model)
  if(tuplenum != 0)
    for(i in 1:tuplenum)
      modlst = c(modlst, model[[i]])
  index = seq(0, colnum-1)
  inddiff = setdiff(index, modlst)
  inddifflen = length(inddiff)
  for(i in seq(length.out=inddifflen))
    model = append(model, inddiff[i])
  return(model)
}

##   Calculate number of parameters in model.
numparam <- function(mod, colnum)
  {
    library(RJSONIO)
    mod = fromJSON(mod)
    print(paste("mod", mod))
    mod = completemodel(mod, colnum)
    totnum = 0
    for(tup in mod)
      totnum = totnum +(4**length(tup))
    return(totnum)
  }

numparamvec <- function(rownum, colnum, modeltrue, modelresult)
  {
    totnum1 = numparam(modeltrue, as.integer(colnum))
    totnum2 = numparam(modelresult, as.integer(colnum))
    return(c(rownum, colnum, totnum1, totnum2))
  }

x = cbind.data.frame(rownum=c(100, 100), colnum=c(10, 20), modeltrue=c("[]", "[]"), modelresult=c("[[1,2]]","[[1,3]]"), stringsAsFactors=FALSE)
val = t(apply(x, 1, function(x)do.call(numparamvec, as.list(x))))
colnames(val) = c("rownum", "colnum", "numparam_modeltrue", "numparam_modelresult")
于 2012-10-10T09:24:43.530 回答
1

使用 sapply 的替代方法:

numparamvec <- function(rownum, colnum, modeltrue, modelresult)
  {
    totnum1 = numparam(modeltrue, as.integer(colnum))
    totnum2 = numparam(modelresult, as.integer(colnum))
    return(c(rownum = rownum, colnum = colnum,
      numparam_modeltrue = totnum1, numparam_modelresult = totnum2))
  }

val <- sapply(seq_len(nrow(x)),
  function(y) do.call(numparamvec, x[y, ]))

> as.data.frame(t(val))
  rownum colnum numparam_modeltrue numparam_modelresult
1    100     10                 40                   48
2    100     20                 80                   88

替代方法使用vapply

val <- t(vapply(seq_len(nrow(x)), function(y) do.call(numparamvec, x[y, ]),
  c(rownum = 0, colnum = 0, numparam_modeltrue = 0, numparam_modelresult = 0)))

> val
     rownum colnum numparam_modeltrue numparam_modelresult
[1,]    100     10                 40                   48
[2,]    100     20                 80                   88
于 2012-10-11T12:09:07.703 回答