1

我是 tidymodels 的新手,但显然在接受培训时,step_pca()诸如nom_comp或未threshold实施之类的论点。如下例所示,尽管设置了nom_comp = 2.

library(tidyverse)
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#>   method                   from   
#>   required_pkgs.model_spec parsnip
rec <- recipe( ~ ., data = USArrests) %>%
  step_normalize(all_numeric()) %>%
  step_pca(all_numeric(), num_comp = 2)

prep(rec) %>% tidy(number = 2, type = "coef") %>%
  pivot_wider(names_from = component, values_from = value, id_cols = terms)
#> # A tibble: 4 x 5
#>   terms       PC1    PC2    PC3     PC4
#>   <chr>     <dbl>  <dbl>  <dbl>   <dbl>
#> 1 Murder   -0.536  0.418 -0.341  0.649 
#> 2 Assault  -0.583  0.188 -0.268 -0.743 
#> 3 UrbanPop -0.278 -0.873 -0.378  0.134 
#> 4 Rape     -0.543 -0.167  0.818  0.0890
4

2 回答 2

2

完整的 PCA 已确定(因此您仍然可以计算每个项的方差)并且num_comp仅指定保留多少组件作为预测变量。如果要指定最大排名,可以通过options

library(recipes)
#> Loading required package: dplyr
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> 
#> Attaching package: 'recipes'
#> The following object is masked from 'package:stats':
#> 
#>     step
rec <- recipe( ~ ., data = USArrests) %>%
    step_normalize(all_numeric()) %>%
    step_pca(all_numeric(), num_comp = 2, options = list(rank. = 2))

prep(rec) %>% tidy(number = 2, type = "coef")
#> # A tibble: 8 × 4
#>   terms     value component id       
#>   <chr>     <dbl> <chr>     <chr>    
#> 1 Murder   -0.536 PC1       pca_AoFOm
#> 2 Assault  -0.583 PC1       pca_AoFOm
#> 3 UrbanPop -0.278 PC1       pca_AoFOm
#> 4 Rape     -0.543 PC1       pca_AoFOm
#> 5 Murder    0.418 PC2       pca_AoFOm
#> 6 Assault   0.188 PC2       pca_AoFOm
#> 7 UrbanPop -0.873 PC2       pca_AoFOm
#> 8 Rape     -0.167 PC2       pca_AoFOm

reprex 包于 2022-01-12 创建(v2.0.1)

您也可以通过tolfrom 的参数来控制它stats::prcomp(),也作为选项传入。

于 2022-01-12T18:33:05.783 回答
1

如果你bake的食谱似乎按预期工作,但我不知道你之后的目标是什么。

library(tidyverse)
library(tidymodels)

USArrests <- USArrests %>% 
  rownames_to_column("Countries")

rec <- 
  recipe( ~ ., data = USArrests) %>%
  step_normalize(all_numeric()) %>%
  step_pca(all_numeric(), num_comp = 2)

prep(rec) %>% 
  bake(new_data = NULL)
#> # A tibble: 50 x 3
#>    Countries       PC1     PC2
#>    <fct>         <dbl>   <dbl>
#>  1 Alabama     -0.976   1.12  
#>  2 Alaska      -1.93    1.06  
#>  3 Arizona     -1.75   -0.738 
#>  4 Arkansas     0.140   1.11  
#>  5 California  -2.50   -1.53  
#>  6 Colorado    -1.50   -0.978 
#>  7 Connecticut  1.34   -1.08  
#>  8 Delaware    -0.0472 -0.322 
#>  9 Florida     -2.98    0.0388
#> 10 Georgia     -1.62    1.27  
#> # ... with 40 more rows

reprex 包于 2022-01-11 创建(v2.0.1)

于 2022-01-11T14:56:49.860 回答