0

我偶尔需要重命名因子变量的所有级别。我知道如何使用 R 基础来实现这一点,例如:levels(factor_variable) <- levels(new_variable). 但我真的很想有一种方法来使用tidyverse. 我看着,dplyrforcats我没有找到任何解决它的方法。我希望能够做我在示例 1 中实现的目标,但与%>%操作员一起工作。

示例 1,具有 R 基础(有效)

my_levels <- letters
sample_data <- data.frame(factor_data = factor(sample(my_levels,size = 500,replace = T) ,
                                               levels = my_levels),
                          Any_other_data = rnorm(500))

my_new_levels <- rnorm(length(letters))

levels(sample_data$factor_data) <- levels(factor(my_new_levels))

示例 2,我尝试过但不适用于 tidyverse 的一件事

library(tidyverse)

my_levels <- letters
sample_data <- tibble(factor_data = factor(sample(my_levels,size = 500,replace = T) ,
                                               levels = my_levels),
                          Any_other_data = rnorm(500))

my_new_levels <- rnorm(length(letters))

# Get error
sample_data <- sample_data %>%
  mutate(levels(factor_data) = levels(factor(my_new_levels)))
# Get error
sample_data <- sample_data %>%
  mutate(factor_data = recode(factor_data, levels(factor_data) = levels(factor(my_new_levels))))

我也尝试重新编码,但它不仅是手动的(一次每个值),而且它也不接受%>%操作员。这是我试图看看发生了什么的一些事情:

sample_data <- sample_data %>%
  recode(factor_data, a = '-2.5')

sample_data <- sample_data %>%
  recode_factor(factor_data, a = '-2.5')

recode(sample_data$factor_data, levels(sample_data$factor_data) = levels(factor(my_new_levels)))

recode(sample_data$factor_data, a = '-2.5')
recode_factor(sample_data$factor_data, a = '-2.5')
4

1 回答 1

5

您可以使用命名向量轻松做到这一点,并且forcats::fct_recode()

library(tidyverse)
set.seed(42)

my_levels <- letters
sample_data <- data.frame(factor_data = factor(sample(my_levels,size = 500,replace = T) ,
                                               levels = my_levels),
                          Any_other_data = rnorm(500))

my_new_levels <- rnorm(length(letters))

# create a named vector with the new levels
named_level_vector <- levels(sample_data$factor_data)
names(named_level_vector) <- my_new_levels

# use mutate and fct_recode with that vector

sample_data <- sample_data %>% 
  mutate(new_factor_data = forcats::fct_recode(factor_data, !!!named_level_vector))

head(sample_data)
#>   factor_data Any_other_data    new_factor_data
#> 1           q     0.48236947  0.223521215874458
#> 2           e     0.99294364  -1.12828853519737
#> 3           a    -1.24639550  -2.55382485095083
#> 4           y    -0.03348752   1.67099730539817
#> 5           j    -0.07096218 -0.318990710826149
#> 6           d    -0.75892065  -1.17990419995829

reprex 包于 2020-06-11 创建(v0.3.0)

于 2020-06-11T13:09:22.020 回答