1

我使用自然样条拟合了一个模型,但我不确定使用 BoxCox 以及在预测变量上居中和缩放是否有任何优势。阶跃自然样条是否执行转换?在自然样条之前对预测变量进行归一化是否有优势?

library(tidyverse)
library(tidymodels)

car <- read_csv('vw.csv')
str(car)
## ---- Split data -----------------------

split <- initial_split(car, prop = 0.80, strata = 'price')
car_train <- training(split)
car_test <- testing(split)


## ---- Recipe --------------------------

rec <- recipe(price ~ . , data = car_train) %>% 
  step_mutate(
    tax = log(tax + 1)
  ) %>% 
  step_ns(mpg, mileage, engineSize, year, deg_free = 3) %>% 
  step_dummy(all_nominal()) 


## -- Model ---------------------------------

model_lasso <- linear_reg(mode = 'regression', penalty = tune(), mixture = tune()) %>% 
               set_engine('glmnet')


## --- Workflow -----------------------------

work01 <- workflow() %>% 
  add_recipe(rec) %>% 
  add_model(model_lasso)

## --- Foldes -------------------------------

folds <- vfold_cv(car_train, v = 10, strata = 'price')

## --- tune ---------------------------------

grid01 <- grid_latin_hypercube(parameters(model_lasso), size = 10)

tune01 <- tune_grid(
  work01,
  resamples = folds,
  grid = grid01,
  metrics = metric_set(rmse, rsq)
)

## --- Show_best ---------------------------------

show_best(tune01)
best01 <- select_best(tune01)

## --- Test --------------------------------------

test01 <- work01 %>% 
  finalize_workflow(best01) %>%
  last_fit(split)

test01 %>% collect_metrics()

´´´
4

0 回答 0