这是使用的解决方案adist
:
library(data.table)
dt_t <- data.table(Name = list("Aaron RAMSEY", "Mesut OEZIL", "Sergio AGUERO"))
dt_f <- data.table(Name = list("Özil Mesut", "Ramsey Aaron", "Kun Agüero"))
string_dist <- adist(dt_t$Name, dt_f$Name, partial=TRUE, ignore.case=TRUE)
match_idx <- apply(string_dist, 2, which.min)
dt_match <- cbind(dt_t, dt_f[match_idx])
编辑 - - - - - - - - - - - - - - - - -
逐行应用它:
library(data.table)
dt_t <- data.table(Name = (list("Aaron RAMSEY", "Mesut OEZIL", "Sergio AGUERO")))
dt_f <- data.table(Name = list("Özil Mesut", "Ramsey Aaron", "Kun Agüero"))
minDistMatch <- function(x, y){
x <- as.list(x)
y <- as.list(y)
y[which.min(adist(x, y, partial=TRUE, ignore.case=TRUE))]
}
dt_t[, Match := vapply(Name, minDistMatch, list(1L), dt_f$Name)]