无论出于何种原因,似乎qdap:::strip
总是去掉"/"
字符向量。这是在函数末尾的源代码中:
x <- clean(gsub("/", " ", gsub("-", " ", x)))
这是在执行在函数体中定义的剥离的实际函数之前运行的strip
......
所以只需用您自己的版本替换该功能:
strip.new <- function (x, char.keep = "~~", digit.remove = TRUE, apostrophe.remove = TRUE,
lower.case = TRUE)
{
strp <- function(x, digit.remove, apostrophe.remove, char.keep,
lower.case) {
if (!is.null(char.keep)) {
x2 <- Trim(gsub(paste0(".*?($|'|", paste(paste0("\\",
char.keep), collapse = "|"), "|[^[:punct:]]).*?"),
"\\1", as.character(x)))
}
else {
x2 <- Trim(gsub(".*?($|'|[^[:punct:]]).*?", "\\1",
as.character(x)))
}
if (lower.case) {
x2 <- tolower(x2)
}
if (apostrophe.remove) {
x2 <- gsub("'", "", x2)
}
ifelse(digit.remove == TRUE, gsub("[[:digit:]]", "",
x2), x2)
}
unlist(lapply(x, function(x) Trim(strp(x = x, digit.remove = digit.remove,
apostrophe.remove = apostrophe.remove, char.keep = char.keep,
lower.case = lower.case))))
}
strip.new(htxt, char.keep = "/", digit.remove = F, apostrophe.remove = TRUE, lower.case = TRUE)
#[1] "rtf1ansiansicpg1252cocoartf1038cocoasubrtf360/"
#[2] "fonttblf0fswissfcharset0 helvetica"
#[3] "margl1440margr1440vieww9000viewh8400viewkind0"
包作者在此站点上非常活跃,因此他可能会弄清楚为什么strip
默认情况下会这样做。