我使用自然样条拟合了一个模型,但我不确定使用 BoxCox 以及在预测变量上居中和缩放是否有任何优势。阶跃自然样条是否执行转换?在自然样条之前对预测变量进行归一化是否有优势?
library(tidyverse)
library(tidymodels)
car <- read_csv('vw.csv')
str(car)
## ---- Split data -----------------------
split <- initial_split(car, prop = 0.80, strata = 'price')
car_train <- training(split)
car_test <- testing(split)
## ---- Recipe --------------------------
rec <- recipe(price ~ . , data = car_train) %>%
step_mutate(
tax = log(tax + 1)
) %>%
step_ns(mpg, mileage, engineSize, year, deg_free = 3) %>%
step_dummy(all_nominal())
## -- Model ---------------------------------
model_lasso <- linear_reg(mode = 'regression', penalty = tune(), mixture = tune()) %>%
set_engine('glmnet')
## --- Workflow -----------------------------
work01 <- workflow() %>%
add_recipe(rec) %>%
add_model(model_lasso)
## --- Foldes -------------------------------
folds <- vfold_cv(car_train, v = 10, strata = 'price')
## --- tune ---------------------------------
grid01 <- grid_latin_hypercube(parameters(model_lasso), size = 10)
tune01 <- tune_grid(
work01,
resamples = folds,
grid = grid01,
metrics = metric_set(rmse, rsq)
)
## --- Show_best ---------------------------------
show_best(tune01)
best01 <- select_best(tune01)
## --- Test --------------------------------------
test01 <- work01 %>%
finalize_workflow(best01) %>%
last_fit(split)
test01 %>% collect_metrics()
´´´