11

有没有办法让数据框引用自己?

我发现自己花了很多时间写一些y$Category1[is.na(y$Category1)]<-NULL难以阅读的东西,并且感觉像是在缓慢重复打字。我想知道是否有类似的东西:

y$Category1[is.na(self)] <- NULL我可以改用。

谢谢

4

1 回答 1

7

多么棒的问题。不幸的是,正如@user295691 在评论中指出的那样,问题在于两次引用向量:一次作为被索引的对象,一次作为条件的主题。避免双重引用似乎是不可能的。

numericVector[cond(numericVector)] <- newVal

我认为我们可以做的是有一个漂亮而整洁的功能,而不是

 # this  
 y$Category1[is.na(y$Category1)] <- list(NULL)

 # we can have this: 
 NAtoNULL(y$Category1)

例如,以下函数包装selfAssign()(下):

NAtoNULL(obj)      # Replaces NA values in obj with NULL.
NAtoVal(obj, val)  # Replaces NA values in obj with val.
selfReplace(obj, toReplace, val)  # Replaces toReplace values in obj with val

# and selfAssign can be called directly, but I'm not sure there would be a good reason to
selfAssign(obj, ind, val)  # equivalent to obj[ind] <- val

例子:

# sample df
df <- structure(list(subj=c("A",NA,"C","D","E",NA,"G"),temp=c(111L,112L,NA,114L,115L,116L,NA),size=c(0.7133,NA,0.7457,NA,0.0487,NA,0.8481)),.Names=c("subj","temp","size"),row.names=c(NA,-7L),class="data.frame")

df
  subj temp   size
1    A  111 0.7133
2 <NA>  112     NA
3    C   NA 0.7457
4    D  114     NA
5    E  115 0.0487
6 <NA>  116     NA
7    G   NA 0.8481

# Make some replacements
NAtoNULL(df$size)    # Replace all NA's in df$size wtih NULL's
NAtoVal(df$temp, 0)  # Replace all NA's in df$tmp wtih 0's
NAtoVal(df$subj, c("B", "E"))   # Replace all NA's in df$subj with alternating "B" and "E" 

# the modified df is now:  
df

  subj temp   size
1    A  111 0.7133
2    B  112   NULL
3    C    0 0.7457
4    D  114   NULL
5    E  115 0.0487
6    E  116   NULL
7    G    0 0.8481


# replace the 0's in temp for NA
selfReplace(df$temp, 0, NA)

# replace NULL's in size for 1's
selfReplace(df$size, NULL, 1)

# replace all "E"'s in subj with alternate c("E", "F")
selfReplace(df$subj, c("E"), c("E", "F"))

df

  subj temp   size
1    A  111 0.7133
2    B  112      1
3    C   NA 0.7457
4    D  114      1
5    E  115 0.0487
6    F  116      1
7    G   NA 0.8481

现在这适用于向量,但会因 *apply 而失败。我很想让它充分发挥作用,尤其是在应用 plyr 时。关键是修改


功能

函数的代码如下。

很重要的一点。这(还没有!)不适用于 *apply / plyr。
我相信它可以通过修改 n 的值并进行调整sys.parent(.)match.call()但它仍然需要一些摆弄。任何建议/修改将不胜感激

selfAssign <- function(self, ind, val, n=1, silent=FALSE) {
## assigns val to self[ind] in environment parent.frame(n)
## self should be a vector.  Currently will not work for matricies or data frames

  ## GRAB THE CORRECT MATCH CALL
  #--------------------------------------
      # if nested function, match.call appropriately
      if (class(match.call()) == "call") {
        mc <- (match.call(call=sys.call(sys.parent(1))))
      } else {
        mc <- match.call()
      }

      # needed in case self is complex (ie df$name)
      mc2 <- paste(as.expression(mc[[2]]))


  ## CLEAN UP ARGUMENT VALUES
  #--------------------------------------
      # replace logical indecies with numeric indecies
      if (is.logical(ind))
        ind <- which(ind) 

      # if no indecies will be selected, stop here
      if(identical(ind, integer(0)) || is.null(ind)) {
        if(!silent) warning("No indecies selected")
        return()
      }

      # if val is a string, we need to wrap it in quotes
      if (is.character(val))
        val <- paste('"', val, '"', sep="")

      # val cannot directly be NULL, must be list(NULL)
      if(is.null(val))
        val <- "list(NULL)"


  ## CREATE EXPRESSIONS AND EVAL THEM
  #--------------------------------------
     # create expressions to evaluate
     ret <- paste0("'[['(", mc2, ", ", ind, ") <- ", val)

     # evaluate in parent.frame(n)
     eval(parse(text=ret), envir=parent.frame(n))
}


NAtoNULL <- function(obj, n=1) {
  selfAssign(match.call()[[2]], is.na(obj), NULL, n=n+1)
}

NAtoVal <- function(obj, val, n=1) {
  selfAssign(match.call()[[2]], is.na(obj), val, n=n+1)  
}

selfReplace <- function(obj, toReplace, val, n=1) {
## replaces occurrences of toReplace within obj with val

  # determine ind based on value & length of toReplace
  # TODO:  this will not work properly for data frames, but neither will selfAssign, yet.
  if (is.null(toReplace)) {
    ind <- sapply(obj, function(x) is.null(x[[1]]))
  }  else if (is.na(toReplace)) {
    ind <- is.na(obj)
  } else  {
    if (length(obj) > 1) {    # note, this wont work for data frames
          ind <- obj %in% toReplace
    } else {
      ind <- obj == toReplace
    }
  } 

  selfAssign(match.call()[[2]], ind, val, n=n+1)  
}



  ## THIS SHOULD GO INSIDE NAtoNULL, NAtoVal etc. 

  # todo: modify for use with *apply
  if(substr(paste(as.expression(x1)), 1, 10) == "FUN(obj = ") {
      # PASS.  This should identify when the call is coming from *apply. 
      #  in such a case, need to increase n by 1 for apply & lapply.  Increase n by 2 for sapply      
      # I'm not sure the increase required for plyr functions
  }
于 2012-11-29T07:14:50.890 回答