所以,我正在尝试在 ctree (partykit 的一部分)包中进行修改。具体来说,我想删除全局环境中的一个对象并运行 gc() 以帮助节省内存(R 在使用 Windows 页面文件时运行速度非常慢)。我使用 fixInNamespace 做到了:
fixInNamespace(ctree,"partykit")
我注意到我的更改不起作用,因此我什至将其用作替换代码:
function(formula, data, weights, subset, na.action = na.pass,
control = ctree_control(...), ytrafo = NULL,
scores = NULL, ...) {
return("foo")
}
我也试过用这个:
tmpfun <- get("ctree", envir = asNamespace("partykit"))
environment(ctree) <- environment(tmpfun)
attributes(ctree) <- attributes(tmpfun) # don't know if this is really needed
assignInNamespace("ctree", ctree, ns="partykit")
无论我似乎在做什么,我都坚持使用 ctree 的库版本。顺便说一句,我在 Windows 8.1 上使用 RStudio 0.98.507 和 R 3.1.1。
这是否与 .ctree_fit 调用中的外部 C 代码有关?
此外,在我们走“R 仅在写入时复制……”的道路之前,我已经验证了我们最终得到了数据集的多个副本。看:
> d2<-iris
> tracemem(iris)
[1] "<0x0000000019c7f5f8>"
> tracemem(d2)
[1] "<0x0000000019c7f5f8>"
> cttest<-ctree(Species~.,data=d2)
> tracemem(cttest$data)
[1] "<0x0000000008af8e30>"
感谢到目前为止的帖子,但是当我尝试我正在尝试的内容时,我收到以下错误:
> cttest<-ctree(Species~.,data=d2)
Error in environment(partykit) : object 'partykit' not found
这是一个更长的代码片段,显示了我想要实现的目标:
require(partykit)
ctree(Species~.,data=iris)
package_name<-"partykit"
function_name<-"ctree"
#
# Borrowed: https://github.com/robertzk/testthatsomemore/blob/master/R/stub.R
#
namespaces <-
list(as.environment(paste0('package:', package_name)),
getNamespace(package_name))
if (!exists(function_name, envir = namespaces[[1]], inherits = FALSE))
namespaces <- namespaces[-1]
if (!exists(function_name, envir = tail(namespaces,1)[[1]], inherits = FALSE))
stop(gettextf("Cannot stub %s::%s because it must exist in the package",
package_name, function_name))
lapply(namespaces, unlockBinding, sym = function_name)
# Clean up our stubbing on exit
previous_object <- get(function_name, envir = tail(namespaces,1)[[1]])
on.exit({
lapply(namespaces, function(ns) {
tryCatch(error = function(.) NULL, assign(function_name, previous_object, envir = ns))
lockBinding(function_name, ns)
})
})
lapply(namespaces, function(ns)
assign(function_name,
#
# Modified ctree - kill original data variable prior to running longer-running algorithm
#
function(formula, data, weights, subset, na.action = na.pass,
control = ctree_control(...), ytrafo = NULL,
scores = NULL, ...) {
if (missing(data))
data <- environment(formula)
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "weights", "na.action"),
names(mf), 0)
mf <- mf[c(1, m)]
### only necessary for extended model formulae
### e.g. multivariate responses
formula <- Formula::Formula(formula)
mf$formula <- formula
mf$drop.unused.levels <- FALSE
mf$na.action <- na.action
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
response <- names(Formula::model.part(formula, mf, lhs = 1))
weights <- model.weights(mf)
dat <- mf[, colnames(mf) != "(weights)"]
if (!is.null(scores)) {
for (n in names(scores)) {
sc <- scores[[n]]
if (is.ordered(dat[[n]]) &&
nlevels(dat[[n]]) == length(sc)) {
attr(dat[[n]], "scores") <- as.numeric(sc)
} else {
warning("scores for variable ", sQuote(n), " ignored")
}
}
}
if (is.null(weights))
weights <- rep(1, nrow(mf))
storage.mode(weights) <- "integer"
nvar <- sum(!(colnames(dat) %in% response))
control$cfun <- function(...) {
if (control$teststat == "quad")
p <- .pX2(..., pval = (control$testtype != "Teststatistic"))
if (control$teststat == "max")
p <- .pmaxT(..., pval = (control$testtype != "Teststatistic"))
names(p) <- c("statistic", "p.value")
if (control$testtype == "Bonferroni")
p["p.value"] <- p["p.value"] * min(nvar, control$mtry)
crit <- p["statistic"]
if (control$testtype != "Teststatistic")
crit <- p["p.value"]
c(crit, p)
}
#require(partykit)
environment(partykit)
if (!is.null(get("delvar",envir=globalenv()))) {
eval(parse(text=paste("rm (", get("delvar",envir=globalenv()), ",envir=globalenv())")))
}
tree <- .ctree_fit(dat, response, weights = weights, ctrl = control,
ytrafo = ytrafo)
fitted <- data.frame("(fitted)" = fitted_node(tree, dat),
"(weights)" = weights,
check.names = FALSE)
fitted[[3]] <- dat[, response, drop = length(response) == 1]
names(fitted)[3] <- "(response)"
ret <- party(tree, data = dat, fitted = fitted)
class(ret) <- c("constparty", class(ret))
### doesn't work for Surv objects
# ret$terms <- terms(formula, data = mf)
ret$terms <- terms(mf)
### need to adjust print and plot methods
### for multivariate responses
### if (length(response) > 1) class(ret) <- "party"
return(ret)
}
, envir = ns))
#
# End Borrowed
#
d2<-iris
delvar="d2"
cttest<-ctree(Species~.,data=d2)
更新:我找到了一个可能的解决方案,但我希望有人有更清洁的方法来做到这一点。我下载了partykit包的源代码并编写了一个脚本将所有内容导入到全局环境中(除了从CRAN安装partykit包时安装的已编译C函数)
这基本上是我结束的地方:
files<-c("as.party.R",
"ctree.R",
"glmtree.R",
"lmtree.R",
"mob-plot.R",
"mob-pvalue.R",
"modelparty.R",
"node.R",
"party.R",
"plot.R",
"pmmlTreeModel.R",
"print.R",
"simpleparty.R",
"split.R",
"utils.R")
for ( i in 1:length(files)) {
source(paste("c:\\cygwin64\\home\\Mike\\partykit\\R\\",files[i],sep=""))
}
ctree <- function(formula, data, weights, subset, na.action = na.pass,
control = ctree_control(...), ytrafo = NULL,
scores = NULL, ...) {
if (missing(data))
data <- environment(formula)
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "weights", "na.action"),
names(mf), 0)
mf <- mf[c(1, m)]
### only necessary for extended model formulae
### e.g. multivariate responses
formula <- Formula::Formula(formula)
mf$formula <- formula
mf$drop.unused.levels <- FALSE
mf$na.action <- na.action
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
response <- names(Formula::model.part(formula, mf, lhs = 1))
weights <- model.weights(mf)
dat <- mf[, colnames(mf) != "(weights)"]
if (!is.null(scores)) {
for (n in names(scores)) {
sc <- scores[[n]]
if (is.ordered(dat[[n]]) &&
nlevels(dat[[n]]) == length(sc)) {
attr(dat[[n]], "scores") <- as.numeric(sc)
} else {
warning("scores for variable ", sQuote(n), " ignored")
}
}
}
if (is.null(weights))
weights <- rep(1, nrow(mf))
storage.mode(weights) <- "integer"
nvar <- sum(!(colnames(dat) %in% response))
control$cfun <- function(...) {
if (control$teststat == "quad")
p <- .pX2(..., pval = (control$testtype != "Teststatistic"))
if (control$teststat == "max")
p <- .pmaxT(..., pval = (control$testtype != "Teststatistic"))
names(p) <- c("statistic", "p.value")
if (control$testtype == "Bonferroni")
p["p.value"] <- p["p.value"] * min(nvar, control$mtry)
crit <- p["statistic"]
if (control$testtype != "Teststatistic")
crit <- p["p.value"]
c(crit, p)
}
#require(partykit)
#environment(partykit)
if (!is.null(get("delvar",envir=globalenv()))) {
eval(parse(text=paste("rm (", get("delvar",envir=globalenv()), ",envir=globalenv())")))
}
tree <- .ctree_fit(dat, response, weights = weights, ctrl = control,
ytrafo = ytrafo)
fitted <- data.frame("(fitted)" = fitted_node(tree, dat),
"(weights)" = weights,
check.names = FALSE)
fitted[[3]] <- dat[, response, drop = length(response) == 1]
names(fitted)[3] <- "(response)"
ret <- party(tree, data = dat, fitted = fitted)
class(ret) <- c("constparty", class(ret))
### doesn't work for Surv objects
# ret$terms <- terms(formula, data = mf)
ret$terms <- terms(mf)
### need to adjust print and plot methods
### for multivariate responses
### if (length(response) > 1) class(ret) <- "party"
return(ret)
}
d2<-iris
delvar="d2"
cttest<-ctree(Species~.,data=d2)
cttest