比较性能建议的解决方案:
df <- read.table(text='LAT LONG
13.5330 -15.4180
13.5330 -15.4180
13.5330 -15.4180
13.5330 -15.4180
13.5330 -15.4170
13.5330 -15.4170
13.5330 -15.4170
13.5340 -14.9350
13.5340 -14.9350
13.5340 -15.9170
13.3670 -14.6190', header=TRUE)
f1 <- function(df, cols) {
df$id <- as.numeric(interaction(df[cols], drop = TRUE))
df
}
f2 <- function(df, cols) {
comb <- do.call(paste, c(as.list(df[cols]), sep = "."))
df$id <- match(comb, unique(comb))
df
}
f2(df, 1:2)
#> LAT LONG id
#> 1 13.533 -15.418 1
#> 2 13.533 -15.418 1
#> 3 13.533 -15.418 1
#> 4 13.533 -15.418 1
#> 5 13.533 -15.417 2
#> 6 13.533 -15.417 2
#> 7 13.533 -15.417 2
#> 8 13.534 -14.935 3
#> 9 13.534 -14.935 3
#> 10 13.534 -15.917 4
#> 11 13.367 -14.619 5
microbenchmark::microbenchmark(f1(df, 1:2), f2(df, 1:2))
#> Unit: microseconds
#> expr min lq mean median uq max neval cld
#> f1(df, 1:2) 486.400 510.422 575.26659 573.3945 594.1165 1622.243 100 b
#> f2(df, 1:2) 72.952 79.208 86.09265 83.5275 89.7195 159.740 100 a