1

所以,我正在尝试在 ctree (partykit 的一部分)包中进行修改。具体来说,我想删除全局环境中的一个对象并运行 gc() 以帮助节省内存(R 在使用 Windows 页面文件时运行速度非常慢)。我使用 fixInNamespace 做到了:

fixInNamespace(ctree,"partykit")

我注意到我的更改不起作用,因此我什至将其用作替换代码:

function(formula, data, weights, subset, na.action = na.pass,
                  control = ctree_control(...), ytrafo = NULL,
                  scores = NULL, ...) {

    return("foo")
}

我也试过用这个:

tmpfun <- get("ctree", envir = asNamespace("partykit"))
environment(ctree) <- environment(tmpfun)
attributes(ctree) <- attributes(tmpfun)  # don't know if this is really needed
assignInNamespace("ctree", ctree, ns="partykit")

无论我似乎在做什么,我都坚持使用 ctree 的库版本。顺便说一句,我在 Windows 8.1 上使用 RStudio 0.98.507 和 R 3.1.1。

这是否与 .ctree_fit 调用中的外部 C 代码有关?

此外,在我们走“R 仅在写入时复制……”的道路之前,我已经验证了我们最终得到了数据集的多个副本。看:

> d2<-iris
> tracemem(iris)
[1] "<0x0000000019c7f5f8>"
> tracemem(d2)
[1] "<0x0000000019c7f5f8>"
> cttest<-ctree(Species~.,data=d2)
> tracemem(cttest$data)
[1] "<0x0000000008af8e30>"

感谢到目前为止的帖子,但是当我尝试我正在尝试的内容时,我收到以下错误:

> cttest<-ctree(Species~.,data=d2)
Error in environment(partykit) : object 'partykit' not found

这是一个更长的代码片段,显示了我想要实现的目标:

require(partykit)

ctree(Species~.,data=iris)

package_name<-"partykit"
function_name<-"ctree"


#
# Borrowed: https://github.com/robertzk/testthatsomemore/blob/master/R/stub.R
#

namespaces <-
  list(as.environment(paste0('package:', package_name)),
       getNamespace(package_name))
if (!exists(function_name, envir = namespaces[[1]], inherits = FALSE))
  namespaces <- namespaces[-1]
if (!exists(function_name, envir = tail(namespaces,1)[[1]], inherits = FALSE))
  stop(gettextf("Cannot stub %s::%s because it must exist in the package",
                package_name, function_name))
lapply(namespaces, unlockBinding, sym = function_name)
# Clean up our stubbing on exit
previous_object <- get(function_name, envir = tail(namespaces,1)[[1]])
on.exit({
  lapply(namespaces, function(ns) {
    tryCatch(error = function(.) NULL, assign(function_name, previous_object, envir = ns))
    lockBinding(function_name, ns)
  })
})
lapply(namespaces, function(ns)
  assign(function_name, 
         #
         # Modified ctree - kill original data variable prior to running longer-running algorithm
         #

         function(formula, data, weights, subset, na.action = na.pass,
                                 control = ctree_control(...), ytrafo = NULL,
                                 scores = NULL, ...) {



    if (missing(data))
      data <- environment(formula)
    mf <- match.call(expand.dots = FALSE)
    m <- match(c("formula", "data", "subset", "weights", "na.action"),
               names(mf), 0)
    mf <- mf[c(1, m)]

    ### only necessary for extended model formulae 
    ### e.g. multivariate responses
    formula <- Formula::Formula(formula)
    mf$formula <- formula
    mf$drop.unused.levels <- FALSE
    mf$na.action <- na.action
    mf[[1]] <- as.name("model.frame")
    mf <- eval(mf, parent.frame())

    response <- names(Formula::model.part(formula, mf, lhs = 1))
    weights <- model.weights(mf)
    dat <- mf[, colnames(mf) != "(weights)"]
    if (!is.null(scores)) {
      for (n in names(scores)) {
        sc <- scores[[n]]
        if (is.ordered(dat[[n]]) &&
              nlevels(dat[[n]]) == length(sc)) {
          attr(dat[[n]], "scores") <- as.numeric(sc)
        } else {
          warning("scores for variable ", sQuote(n), " ignored")
        }
      }
    }

    if (is.null(weights))
      weights <- rep(1, nrow(mf))
    storage.mode(weights) <- "integer"

    nvar <- sum(!(colnames(dat) %in% response))

    control$cfun <- function(...) {
      if (control$teststat == "quad")
        p <- .pX2(..., pval = (control$testtype != "Teststatistic"))
      if (control$teststat == "max")
        p <- .pmaxT(..., pval = (control$testtype != "Teststatistic"))
      names(p) <- c("statistic", "p.value")

      if (control$testtype == "Bonferroni")
        p["p.value"] <- p["p.value"] * min(nvar, control$mtry)
      crit <-  p["statistic"]
      if (control$testtype != "Teststatistic")
        crit <- p["p.value"]
      c(crit, p)
    }

    #require(partykit)
    environment(partykit)

    if (!is.null(get("delvar",envir=globalenv()))) {
      eval(parse(text=paste("rm (", get("delvar",envir=globalenv()), ",envir=globalenv())")))
    }


    tree <- .ctree_fit(dat, response, weights = weights, ctrl = control,
                       ytrafo = ytrafo)

    fitted <- data.frame("(fitted)" = fitted_node(tree, dat),
                         "(weights)" = weights,
                         check.names = FALSE)
    fitted[[3]] <- dat[, response, drop = length(response) == 1]
    names(fitted)[3] <- "(response)"
    ret <- party(tree, data = dat, fitted = fitted)
    class(ret) <- c("constparty", class(ret))

    ### doesn't work for Surv objects
    # ret$terms <- terms(formula, data = mf)
    ret$terms <- terms(mf)
    ### need to adjust print and plot methods
    ### for multivariate responses
    ### if (length(response) > 1) class(ret) <- "party"
    return(ret)
  }
  , envir = ns))

#
# End Borrowed
#


d2<-iris
delvar="d2"
cttest<-ctree(Species~.,data=d2)

更新:我找到了一个可能的解决方案,但我希望有人有更清洁的方法来做到这一点。我下载了partykit包的源代码并编写了一个脚本将所有内容导入到全局环境中(除了从CRAN安装partykit包时安装的已编译C函数)

这基本上是我结束的地方:

files<-c("as.party.R",
         "ctree.R",
         "glmtree.R",
         "lmtree.R",
         "mob-plot.R",
         "mob-pvalue.R",
         "modelparty.R",
         "node.R",
         "party.R",
         "plot.R",
         "pmmlTreeModel.R",
         "print.R",
         "simpleparty.R",
         "split.R",
         "utils.R")

for ( i in 1:length(files)) {
    source(paste("c:\\cygwin64\\home\\Mike\\partykit\\R\\",files[i],sep=""))

}

ctree <- function(formula, data, weights, subset, na.action = na.pass,
                  control = ctree_control(...), ytrafo = NULL,
                  scores = NULL, ...) {



  if (missing(data))
    data <- environment(formula)
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data", "subset", "weights", "na.action"),
             names(mf), 0)
  mf <- mf[c(1, m)]

  ### only necessary for extended model formulae 
  ### e.g. multivariate responses
  formula <- Formula::Formula(formula)
  mf$formula <- formula
  mf$drop.unused.levels <- FALSE
  mf$na.action <- na.action
  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())

  response <- names(Formula::model.part(formula, mf, lhs = 1))
  weights <- model.weights(mf)
  dat <- mf[, colnames(mf) != "(weights)"]
  if (!is.null(scores)) {
    for (n in names(scores)) {
      sc <- scores[[n]]
      if (is.ordered(dat[[n]]) &&
            nlevels(dat[[n]]) == length(sc)) {
        attr(dat[[n]], "scores") <- as.numeric(sc)
      } else {
        warning("scores for variable ", sQuote(n), " ignored")
      }
    }
  }

  if (is.null(weights))
    weights <- rep(1, nrow(mf))
  storage.mode(weights) <- "integer"

  nvar <- sum(!(colnames(dat) %in% response))

  control$cfun <- function(...) {
    if (control$teststat == "quad")
      p <- .pX2(..., pval = (control$testtype != "Teststatistic"))
    if (control$teststat == "max")
      p <- .pmaxT(..., pval = (control$testtype != "Teststatistic"))
    names(p) <- c("statistic", "p.value")

    if (control$testtype == "Bonferroni")
      p["p.value"] <- p["p.value"] * min(nvar, control$mtry)
    crit <-  p["statistic"]
    if (control$testtype != "Teststatistic")
      crit <- p["p.value"]
    c(crit, p)
  }

  #require(partykit)
  #environment(partykit)

  if (!is.null(get("delvar",envir=globalenv()))) {
    eval(parse(text=paste("rm (", get("delvar",envir=globalenv()), ",envir=globalenv())")))
  }


  tree <- .ctree_fit(dat, response, weights = weights, ctrl = control,
                     ytrafo = ytrafo)

  fitted <- data.frame("(fitted)" = fitted_node(tree, dat),
                       "(weights)" = weights,
                       check.names = FALSE)
  fitted[[3]] <- dat[, response, drop = length(response) == 1]
  names(fitted)[3] <- "(response)"
  ret <- party(tree, data = dat, fitted = fitted)
  class(ret) <- c("constparty", class(ret))

  ### doesn't work for Surv objects
  # ret$terms <- terms(formula, data = mf)
  ret$terms <- terms(mf)
  ### need to adjust print and plot methods
  ### for multivariate responses
  ### if (length(response) > 1) class(ret) <- "party"
  return(ret)
}

d2<-iris
delvar="d2"
cttest<-ctree(Species~.,data=d2)

cttest
4

1 回答 1

0

它适用于我的系统。你可能需要先打电话unlockBinding。这就是testthatsomemore包在幕后所做的;看看这是否适合你。

install_github('robertzk/testthatsomemore')
testthatsomemore::package_stub("partykit", "ctree", function(...) return("foo"), {
  # Your code that makes use of partykit::ctree should go here. The below will print "foo"
  print(partykit::ctree("I have been overwritten"))
})

您当然可以将修改后的函数放在第三个参数中,而不是上面的存根。

于 2014-10-28T00:15:37.187 回答