0

在放入决策树模型以预测结果之前,我通过 R 中的库(食谱)对数值变量进行了标准化。现在,我有了决策树,年龄是节点中的重要变量之一,例如 >1.5 和 <1.5。我想将 -1.5 转换回非标准化值,以便能够赋予它实际意义(如年龄 >50 或 </= 50 岁)。我已经搜索并找不到答案。

library(recipes)
recipe_obj <- dataset %>%
  recipe(formula = anyaki ~.) %>% #specify formula
  step_center(all_numeric()) %>% #center data (0 mean)
  step_scale(all_numeric()) %>% #std = 1
  prep(data = dataset)
  dataset_scaled <- bake(recipe_obj, new_data = dataset)

年龄是在 R 的食谱包中已标准化的变量之一。现在,我正在努力将最终模型中的标准化数据转换回非标准化值,以便能够赋予它实际意义。我怎样才能做到这一点?

4

1 回答 1

1

tidy()您可以使用配方和配方步骤的方法访问这些估计值。在此处此处查看更多详细信息。

library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#>   method                   from   
#>   required_pkgs.model_spec parsnip
data(penguins)

penguin_rec <- recipe(~ ., data = penguins) %>%
  step_other(all_nominal(), threshold = 0.2, other = "another") %>%
  step_normalize(all_numeric()) %>%
  step_dummy(all_nominal())

tidy(penguin_rec)
#> # A tibble: 3 × 6
#>   number operation type      trained skip  id             
#>    <int> <chr>     <chr>     <lgl>   <lgl> <chr>          
#> 1      1 step      other     FALSE   FALSE other_ZNJ2R    
#> 2      2 step      normalize FALSE   FALSE normalize_ogEvZ
#> 3      3 step      dummy     FALSE   FALSE dummy_YVCBo
tidy(penguin_rec, number = 1)
#> # A tibble: 1 × 3
#>   terms         retained id         
#>   <chr>         <chr>    <chr>      
#> 1 all_nominal() <NA>     other_ZNJ2R


penguin_prepped <- prep(penguin_rec, training = penguins)
#> Warning: There are new levels in a factor: NA
tidy(penguin_prepped)
#> # A tibble: 3 × 6
#>   number operation type      trained skip  id             
#>    <int> <chr>     <chr>     <lgl>   <lgl> <chr>          
#> 1      1 step      other     TRUE    FALSE other_ZNJ2R    
#> 2      2 step      normalize TRUE    FALSE normalize_ogEvZ
#> 3      3 step      dummy     TRUE    FALSE dummy_YVCBo


tidy(penguin_prepped, number = 1)
#> # A tibble: 6 × 3
#>   terms   retained id         
#>   <chr>   <chr>    <chr>      
#> 1 species Adelie   other_ZNJ2R
#> 2 species Gentoo   other_ZNJ2R
#> 3 island  Biscoe   other_ZNJ2R
#> 4 island  Dream    other_ZNJ2R
#> 5 sex     female   other_ZNJ2R
#> 6 sex     male     other_ZNJ2R
tidy(penguin_prepped, number = 2)
#> # A tibble: 8 × 4
#>   terms             statistic   value id             
#>   <chr>             <chr>       <dbl> <chr>          
#> 1 bill_length_mm    mean        43.9  normalize_ogEvZ
#> 2 bill_depth_mm     mean        17.2  normalize_ogEvZ
#> 3 flipper_length_mm mean       201.   normalize_ogEvZ
#> 4 body_mass_g       mean      4202.   normalize_ogEvZ
#> 5 bill_length_mm    sd           5.46 normalize_ogEvZ
#> 6 bill_depth_mm     sd           1.97 normalize_ogEvZ
#> 7 flipper_length_mm sd          14.1  normalize_ogEvZ
#> 8 body_mass_g       sd         802.   normalize_ogEvZ

reprex 包于 2021-08-07 创建 (v2.0.0 )

于 2021-08-07T20:43:00.540 回答