我有一个包含连续变量和分类变量的数据集。我正在运行回归以根据数据集中的其他变量预测其中一个变量。在比较 ridge、lasso 和 elastic-net 回归的结果后,lasso 回归是最好的模型。
我使用“coef”函数来提取模型的系数,但是,结果是一个非常长的列表,包含 800 多个变量(因为我的一些分类变量有很多级别)。有没有一种方法可以快速将系数从大到小排序?这是一个 glmnet 模型输出
示例代码的可重现问题:
# Libraries Needed
library(caret)
library(glmnet)
library(mlbench)
library(psych)
# Data
data("BostonHousing")
data <- BostonHousing
str(data)
# Data Partition
set.seed(222)
ind <- sample(2, nrow(data), replace = T, prob = c(0.7, 0.3))
train <- data[ind==1,]
test <- data[ind==2,]
# Custom Control Parameters
custom <- trainControl(method = "repeatedcv",
number = 10,
repeats = 5,
verboseIter = T)
# Linear Model
set.seed(1234)
lm <- train(medv ~.,
train,
method='lm',
trControl = custom)
# Results
lm$results
lm
summary(lm)
plot(lm$finalModel)
# Ridge Regression
set.seed(1234)
ridge <- train(medv ~.,
train,
method = 'glmnet',
tuneGrid = expand.grid(alpha = 0,
lambda = seq(0.0001, 1, length=5)),#try 5 values for lambda between 0.0001 and 1
trControl=custom)
#increasing lambda = increasing penalty and vice versa
#increase lambda therefore will cause coefs to shrink
# Plot Results
plot(ridge)
plot(ridge$finalModel, xvar = "lambda", label = T)
plot(ridge$finalModel, xvar = 'dev', label=T)
plot(varImp(ridge, scale=T))
# Lasso Regression
set.seed(1234)
lasso <- train(medv ~.,
train,
method = 'glmnet',
tuneGrid = expand.grid(alpha=1,
lambda = seq(0.0001,1, length=5)),
trControl = custom)
# Plot Results
plot(lasso)
lasso
plot(lasso$finalModel, xvar = 'lambda', label=T)
plot(lasso$finalModel, xvar = 'dev', label=T)
plot(varImp(lasso, scale=T))
# Elastic Net Regression
set.seed(1234)
en <- train(medv ~.,
train,
method = 'glmnet',
tuneGrid = expand.grid(alpha = seq(0,1,length=10),
lambda = seq(0.0001,1,length=5)),
trControl = custom)
# Plot Results
plot(en)
plot(en$finalModel, xvar = 'lambda', label=T)
plot(en$finalModel, xvar = 'dev', label=T)
plot(varImp(en))
# Compare Models
model_list <- list(LinearModel = lm, Ridge = ridge, Lasso = lasso, ElasticNet=en)
res <- resamples(model_list)
summary(res)
bwplot(res)
xyplot(res, metric = 'RMSE')
# Best Model
en$bestTune
best <- en$finalModel
coef(best, s = en$bestTune$lambda)