一种快速且易读library(stringi)
的方法:
library(stringi)
dt1 <- data.frame(letter=c("a","b+c","c","d+e+f+g","a+g+e"))
dt2 <- data.frame(letter=c("a","b","c","d","e","f","g"), number=c("1","2","3","4","5","6","7"))
dt1$number <- stri_replace_all_fixed(dt1$letter, pattern = dt2$letter, replacement = dt2$number, vectorize_all = FALSE)
dt1
结果:
> dt1
letter number
1 a 1
2 b+c 2+3
3 c 3
4 d+e+f+g 4+5+6+7
5 a+g+e 1+7+5
另请参阅此相关答案。
编辑:当前可用答案的基准:
Unit: microseconds
expr min lq mean median uq max neval
Sotos 2689.6 2689.6 2689.6 2689.6 2689.6 2689.6 1
ismirsehregal 26.4 26.4 26.4 26.4 26.4 26.4 1
www 42247.8 42247.8 42247.8 42247.8 42247.8 42247.8 1
MerijnvanTilborg 1723.5 1723.5 1723.5 1723.5 1723.5 1723.5 1
YuriySaraykin 21859.2 21859.2 21859.2 21859.2 21859.2 21859.2 1
danlooo 4165.7 4165.7 4165.7 4165.7 4165.7 4165.7 1
要重现基准:
library(microbenchmark)
library(tidyverse)
library(stringi)
dt1 <- data.frame(letter=c("a","b+c","c","d+e+f+g","a+g+e"))
dt2 <- data.frame(letter=c("a","b","c","d","e","f","g"), number=c("1","2","3","4","5","6","7"))
microbenchmark(
Sotos = {
sapply(strsplit(dt1$letter, '+', fixed = TRUE), function(i)
paste(dt2$number[dt2$letter %in% i], collapse = '+'))
},
ismirsehregal = {
stri_replace_all_fixed(
dt1$letter,
pattern = dt2$letter,
replacement = dt2$number,
vectorize_all = FALSE
)
},
www = {
dt1 %>%
mutate(ID = 1:n()) %>%
separate_rows(letter, sep = "\\+") %>%
left_join(dt2, by = "letter") %>%
group_by(ID) %>%
summarize(across(.fns = ~ paste0(., collapse = "+"))) %>%
ungroup() %>%
select(-ID)
},
MerijnvanTilborg = {
dt1 %>% mutate(MerijnvanTilborg = stri_replace_all_regex(letter, dt2$letter, dt2$number, vectorize_all = F))
},
YuriySaraykin = {
dt1 %>%
rowwise() %>%
mutate(tmp = str_split(letter, pattern = "\\+")) %>%
ungroup() %>%
mutate(number = map_chr(tmp, ~ paste0(match(.x, dt2$letter), collapse = "+"))) %>%
select(-tmp)
},
danlooo = {
dt1 %>%
as_tibble() %>%
mutate(number = letter %>% map_chr(
~ .x %>%
str_split("[+]") %>%
simplify() %>%
map_chr( ~ deframe(dt2)[.x]) %>%
paste0(collapse = "+")
))
},
times = 1L
)