这是另一种可能的解决方案,除了我发现它很容易遵循之外没有特别的优势:
myd$M5 = c("AB", "GT", "GA", "QW", "CK") # Add another test column.
mat = as.matrix(myd) # Convert to matrix for speed and indexing benefits.
# Construct new column names.
new_names = character(length=ncol(mat) * 2)
new_names[seq(1, ncol(mat) * 2, 2)] = paste(colnames(mat), "a", sep="")
new_names[seq(2, ncol(mat) * 2, 2)] = paste(colnames(mat), "b", sep="")
# Create empty matrix with correct column names.
newmat = matrix(ncol=ncol(mat) * 2, nrow=nrow(mat))
colnames(newmat) = new_names
# Split columns.
for (i in seq(1, ncol(mat))) {
newmat[, (i * 2) - 1] = substr(mat[, i], 1, 1)
newmat[, i * 2 ] = substr(mat[, i], 2, 2)
}
# Use named vector to recode data.
recode = c(A=1, B=2, C=3, G=4, T=5)
newmat[] = recode[newmat]
newmat
# M1a M1b M2ka M2kb M3la M3lb M4a M4b M5a M5b
# [1,] "1" "1" "1" "4" "1" "1" "3" "3" "1" "2"
# [2,] "2" "2" "4" "4" "5" "5" "3" "5" "4" "5"
# [3,] "1" "2" "1" "1" "1" "1" "3" "5" "4" "1"
# [4,] "2" "2" "4" "4" "5" "5" "3" "5" NA NA
# [5,] "2" "2" "4" "4" "1" "5" "3" "3" "3" NA