0

我的理解是,这step_BoxCox()需要一个严格的正变量。但是,我尝试对具有一些负值的数据应用该步骤,但没有收到错误或警告。输出没有 NA 值。
我不知道出了什么问题,如果我的理解有缺陷,或者我是否使用了错误的语法或其他东西。

library(recipes)
library(skimr)

# create dummy data
set.seed(123)
n <- 2e3
x1 <- rpois(n, lambda = 5) # has some zero vals
x2 <- rnorm(n) # has some -ve vals
x3 <- x1 + 10 # is strictly positive
y <- x1 + x2
data <- tibble(x1, x2, x3, y)

# a BocCox recipe
rec <- recipe(y ~ ., data = data) %>% 
  step_BoxCox(all_predictors())
rec
#> Data Recipe
#> 
#> Inputs:
#> 
#>       role #variables
#>    outcome          1
#>  predictor          3
#> 
#> Operations:
#> 
#> Box-Cox transformation on all_predictors()

# bake
processed <- rec %>% 
  prep() %>% 
  bake(new_data = NULL)

# check output
summary(data)
#>        x1               x2                  x3              y         
#>  Min.   : 0.000   Min.   :-3.047861   Min.   :10.00   Min.   :-2.048  
#>  1st Qu.: 3.000   1st Qu.:-0.654767   1st Qu.:13.00   1st Qu.: 3.349  
#>  Median : 5.000   Median :-0.007895   Median :15.00   Median : 4.843  
#>  Mean   : 4.981   Mean   : 0.011176   Mean   :14.98   Mean   : 4.993  
#>  3rd Qu.: 6.000   3rd Qu.: 0.688699   3rd Qu.:16.00   3rd Qu.: 6.486  
#>  Max.   :14.000   Max.   : 3.421095   Max.   :24.00   Max.   :15.225
summary(processed)
#>        x1               x2                  x3              y         
#>  Min.   : 0.000   Min.   :-3.047861   Min.   :2.076   Min.   :-2.048  
#>  1st Qu.: 3.000   1st Qu.:-0.654767   1st Qu.:2.285   1st Qu.: 3.349  
#>  Median : 5.000   Median :-0.007895   Median :2.398   Median : 4.843  
#>  Mean   : 4.981   Mean   : 0.011176   Mean   :2.388   Mean   : 4.993  
#>  3rd Qu.: 6.000   3rd Qu.: 0.688699   3rd Qu.:2.448   3rd Qu.: 6.486  
#>  Max.   :14.000   Max.   : 3.421095   Max.   :2.756   Max.   :15.225
sum(is.na(processed$x2))
#> [1] 0
skim(processed)

reprex 包(v0.3.0)于 2021-04-29 创建

4

0 回答 0