对于以更直接的方式处理语料库对象的替代方案,这又如何呢?
require(quanteda)
require(magrittr)
corpus3 <- corpus(c("• R Tutorial", "More of these • characters •", "Tricky •!"))
# remove the character from the tokenized corpus
tokens(corpus3)
## tokens from 3 documents.
## text1 :
## [1] "R" "Tutorial"
##
## text2 :
## [1] "More" "of" "these" "characters"
##
## text3 :
## [1] "Tricky" "!"
tokens(corpus3) %>% tokens_remove("•")
## tokens from 3 documents.
## [1] "R" "Tutorial"
## text1 :
##
## text2 :
## [1] "More" "of" "these" "characters"
##
## text3 :
## [1]] "Tricky" "!"
# remove the character from the corpus itself
texts(corpus3) <- gsub("•", "", texts(corpus3), fixed = TRUE)
texts(corpus3)
## text1 text2 text3
## " R Tutorial" "More of these characters " "Tricky !"