我在 R 中的 LDA 模型有问题。每次我尝试在我的 LDA_VEM 对象上执行 tidy() 函数时,我都会收到错误“错误:找不到绑定:'Var1'。你能解释一下如何解决这个问题吗?代码如下:
why <-read.csv("FakeDoc.csv", header = FALSE, na.strings = "")
why.char <- data_frame(text=as.character(why$V1))
why.char <- why.char %>%
mutate(document = row_number())
why.tidy <- why.char %>%
unnest_tokens(word, text)
why.tidy <- why.tidy %>%
anti_join(stop_words)
why.tidy <- why.tidy %>%
filter(!str_detect(word,"[0-9]"))
#Frequency Table
why.doc <- why.tidy %>%
count(document, word, sort = TRUE) %>%
ungroup()
why.words <- why.doc %>%
group_by(document) %>%
summarize(total = sum(n))
why.ft <- left_join(why.doc, why.words)
grams1_united <- why.ft[c("document", "word", "total")]
#N-grams
tidy.n2 <- why.char %>%
unnest_tokens(ngram, text, token = "ngrams", n=2)
tidy.n3 <- why.char %>%
unnest_tokens(ngram, text, token = "ngrams", n=3)
tidy.n2 <- tidy.n2 %>%
filter(!str_detect(ngram, "[0-9]"))
tidy.n3 <- tidy.n3 %>%
filter(!str_detect(ngram, "[0-9]"))
tidy.n2 %>%
count(ngram, sort = TRUE)
tidy.n3 %>%
count(ngram, sort = TRUE)
grams2_seperated <- tidy.n2 %>%
separate(ngram, c("word1", "word2"), sep = " ")
grams2_filtered <- grams2_seperated %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word)
gram2_counts <- grams2_filtered %>%
count(word1, word2, sort = TRUE)
grams2_united <- grams2_filtered %>%
unite(ngram, word1, word2, sep = " ")
grams2_united <- grams2_united %>%
group_by(document) %>%
count(ngram, sort = TRUE)
grams2_united
grams3_seperated <- tidy.n3 %>%
separate(ngram, c("word1", "word2", "word3"), sep = " ")
grams3_filtered <- grams3_seperated %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
filter(!word3 %in% stop_words$word)
gram3_counts <- grams3_filtered %>%
count(word1, word2, word3, sort = TRUE)
grams3_united <- grams3_filtered %>%
unite(ngram, word1, word2, word3, sep = " ")
grams3_united <- grams3_united %>%
group_by(document) %>%
count(ngram, sort = TRUE)
colnames(grams2_united) <- c("document", "word", "total")
colnames(grams3_united) <- c("document", "word", "total")
#DTM
grams1_united
grams2_united
grams3_united
detractorwhy.tots <- rbind.data.frame(grams1_united, grams2_united, grams3_united)
dtwtots <- as.data.frame(detractorwhy.tots)
dtw.dtm <- dtwtots %>%
cast_dtm(document, word, total)
dtw_5lda <- LDA(dtw.dtm,control = list(alpha = 0.05), k = 5)
topics <- tidy(dtw_5lda)