有没有办法让数据框引用自己?
我发现自己花了很多时间写一些y$Category1[is.na(y$Category1)]<-NULL
难以阅读的东西,并且感觉像是在缓慢重复打字。我想知道是否有类似的东西:
y$Category1[is.na(self)] <- NULL
我可以改用。
谢谢
多么棒的问题。不幸的是,正如@user295691 在评论中指出的那样,问题在于两次引用向量:一次作为被索引的对象,一次作为条件的主题。避免双重引用似乎是不可能的。
numericVector[cond(numericVector)] <- newVal
我认为我们可以做的是有一个漂亮而整洁的功能,而不是
# this
y$Category1[is.na(y$Category1)] <- list(NULL)
# we can have this:
NAtoNULL(y$Category1)
例如,以下函数包装selfAssign()
(下):
NAtoNULL(obj) # Replaces NA values in obj with NULL.
NAtoVal(obj, val) # Replaces NA values in obj with val.
selfReplace(obj, toReplace, val) # Replaces toReplace values in obj with val
# and selfAssign can be called directly, but I'm not sure there would be a good reason to
selfAssign(obj, ind, val) # equivalent to obj[ind] <- val
例子:
# sample df
df <- structure(list(subj=c("A",NA,"C","D","E",NA,"G"),temp=c(111L,112L,NA,114L,115L,116L,NA),size=c(0.7133,NA,0.7457,NA,0.0487,NA,0.8481)),.Names=c("subj","temp","size"),row.names=c(NA,-7L),class="data.frame")
df
subj temp size
1 A 111 0.7133
2 <NA> 112 NA
3 C NA 0.7457
4 D 114 NA
5 E 115 0.0487
6 <NA> 116 NA
7 G NA 0.8481
# Make some replacements
NAtoNULL(df$size) # Replace all NA's in df$size wtih NULL's
NAtoVal(df$temp, 0) # Replace all NA's in df$tmp wtih 0's
NAtoVal(df$subj, c("B", "E")) # Replace all NA's in df$subj with alternating "B" and "E"
# the modified df is now:
df
subj temp size
1 A 111 0.7133
2 B 112 NULL
3 C 0 0.7457
4 D 114 NULL
5 E 115 0.0487
6 E 116 NULL
7 G 0 0.8481
# replace the 0's in temp for NA
selfReplace(df$temp, 0, NA)
# replace NULL's in size for 1's
selfReplace(df$size, NULL, 1)
# replace all "E"'s in subj with alternate c("E", "F")
selfReplace(df$subj, c("E"), c("E", "F"))
df
subj temp size
1 A 111 0.7133
2 B 112 1
3 C NA 0.7457
4 D 114 1
5 E 115 0.0487
6 F 116 1
7 G NA 0.8481
现在这适用于向量,但会因 *apply 而失败。我很想让它充分发挥作用,尤其是在应用 plyr 时。关键是修改
函数的代码如下。
很重要的一点。这(还没有!)不适用于 *apply / plyr。
我相信它可以通过修改 n 的值并进行调整sys.parent(.)
,match.call()
但它仍然需要一些摆弄。任何建议/修改将不胜感激
selfAssign <- function(self, ind, val, n=1, silent=FALSE) {
## assigns val to self[ind] in environment parent.frame(n)
## self should be a vector. Currently will not work for matricies or data frames
## GRAB THE CORRECT MATCH CALL
#--------------------------------------
# if nested function, match.call appropriately
if (class(match.call()) == "call") {
mc <- (match.call(call=sys.call(sys.parent(1))))
} else {
mc <- match.call()
}
# needed in case self is complex (ie df$name)
mc2 <- paste(as.expression(mc[[2]]))
## CLEAN UP ARGUMENT VALUES
#--------------------------------------
# replace logical indecies with numeric indecies
if (is.logical(ind))
ind <- which(ind)
# if no indecies will be selected, stop here
if(identical(ind, integer(0)) || is.null(ind)) {
if(!silent) warning("No indecies selected")
return()
}
# if val is a string, we need to wrap it in quotes
if (is.character(val))
val <- paste('"', val, '"', sep="")
# val cannot directly be NULL, must be list(NULL)
if(is.null(val))
val <- "list(NULL)"
## CREATE EXPRESSIONS AND EVAL THEM
#--------------------------------------
# create expressions to evaluate
ret <- paste0("'[['(", mc2, ", ", ind, ") <- ", val)
# evaluate in parent.frame(n)
eval(parse(text=ret), envir=parent.frame(n))
}
NAtoNULL <- function(obj, n=1) {
selfAssign(match.call()[[2]], is.na(obj), NULL, n=n+1)
}
NAtoVal <- function(obj, val, n=1) {
selfAssign(match.call()[[2]], is.na(obj), val, n=n+1)
}
selfReplace <- function(obj, toReplace, val, n=1) {
## replaces occurrences of toReplace within obj with val
# determine ind based on value & length of toReplace
# TODO: this will not work properly for data frames, but neither will selfAssign, yet.
if (is.null(toReplace)) {
ind <- sapply(obj, function(x) is.null(x[[1]]))
} else if (is.na(toReplace)) {
ind <- is.na(obj)
} else {
if (length(obj) > 1) { # note, this wont work for data frames
ind <- obj %in% toReplace
} else {
ind <- obj == toReplace
}
}
selfAssign(match.call()[[2]], ind, val, n=n+1)
}
## THIS SHOULD GO INSIDE NAtoNULL, NAtoVal etc.
# todo: modify for use with *apply
if(substr(paste(as.expression(x1)), 1, 10) == "FUN(obj = ") {
# PASS. This should identify when the call is coming from *apply.
# in such a case, need to increase n by 1 for apply & lapply. Increase n by 2 for sapply
# I'm not sure the increase required for plyr functions
}