我经常需要返回文本字符串的一部分(比如用“。”分隔的文本中间位,两端都有文本)。我最终使用了一段时髦的代码,它本质上是:1. 用 分割字符串strsplit
,2.unlist
字符串组件,3.matrix
使用等于字符串子元素数的行数制作 a,以及 4. 减去行我需要的。一定有更好的方法,对吧?虽然它可能更简单,但我经常无法使用substr
,因为字符串组件的长度在整个向量中不是恒定的。
例子:
#make data
set.seed(1)
n <- 50
let1 <- LETTERS[runif(n, min=1, max=26)]
num <- round(runif(100, min=1, max=100))
let2 <- c(LETTERS[runif(n, min=1, max=26)], LETTERS[runif(n, min=1, max=26)])
tmpstr <- paste(let1, num, let2, sep=".")
tmpstr
#resulting string
> tmpstr
[1] "G.48.P" "J.86.N" "O.44.I" "W.25.L" "F.8.M" "W.11.E" "X.32.N"
[8] "Q.52.B" "P.67.G" "B.41.F" "F.91.H" "E.30.W" "R.46.L" "J.34.T"
[15] "T.65.W" "M.27.K" "R.48.B" "Y.77.I" "J.9.S" "T.88.I" "X.35.P"
[22] "F.84.V" "Q.35.V" "D.34.J" "G.48.J" "J.89.W" "A.87.Q" "J.40.S"
[29] "V.78.P" "I.96.W" "M.44.H" "O.72.E" "M.41.W" "E.33.M" "U.76.V"
[36] "Q.21.E" "T.71.S" "C.13.S" "S.25.X" "K.15.N" "U.25.R" "Q.7.J"
[43] "T.65.C" "N.88.X" "N.78.H" "T.80.O" "A.46.C" "L.42.V" "S.81.H"
[50] "R.61.T" "G.66.G" "J.36.F" "O.28.M" "W.99.G" "F.64.E" "W.22.M"
[57] "X.14.O" "Q.48.D" "P.92.G" "B.60.R" "F.98.Y" "E.73.C" "R.36.T"
[64] "J.44.X" "T.16.U" "M.2.H" "R.72.Q" "Y.11.X" "J.45.X" "T.64.I"
[71] "X.99.G" "F.50.E" "Q.49.I" "D.18.M" "G.76.X" "J.46.M" "A.52.G"
[78] "J.22.B" "V.24.K" "I.60.V" "M.58.I" "O.9.D" "M.5.J" "E.65.P"
[85] "U.93.J" "Q.60.R" "T.57.R" "C.53.N" "S.99.K" "K.51.L" "U.69.H"
[92] "Q.61.O" "T.25.W" "N.27.D" "N.73.K" "T.46.F" "A.18.K" "L.75.D"
[99] "S.11.L" "R.87.X"
#possible substring extraction (e.g. the numbers in between the letters)
matrix(unlist(strsplit(tmpstr, ".", fixed = TRUE)), nrow=3)[2,] #version 1
unlist(lapply(as.list(tmpstr), FUN=function(x) strsplit(x, ".", fixed=TRUE)[[1]][2])) #version 2 - not much shorter
#desired result
[1] "48" "86" "44" "25" "8" "11" "32" "52" "67" "41" "91" "30" "46"
[14] "34" "65" "27" "48" "77" "9" "88" "35" "84" "35" "34" "48" "89"
[27] "87" "40" "78" "96" "44" "72" "41" "33" "76" "21" "71" "13" "25"
[40] "15" "25" "7" "65" "88" "78" "80" "46" "42" "81" "61" "66" "36"
[53] "28" "99" "64" "22" "14" "48" "92" "60" "98" "73" "36" "44" "16"
[66] "2" "72" "11" "45" "64" "99" "50" "49" "18" "76" "46" "52" "22"
[79] "24" "60" "58" "9" "5" "65" "93" "60" "57" "53" "99" "51" "69"
[92] "61" "25" "27" "73" "46" "18" "75" "11" "87"