你可以帮帮我吗?我想分析一些句子以将它们分配给主题这不是一个困难的过程,但是当我调用 terms(x) 时,我只看到数字,例如
terms(x) 主题 1 主题 2 主题 3 主题 4 主题 5 “121880” “173807” “38655” “190018” “67905” 我的代码有什么问题?
library("RODBC")
library("tm")
library("wordcloud")
library("SnowballC")
library("pvclust")
library("textcat")
dbHandle <- odbcDriverConnect("driver={SQL Server};server=BIDBRU01;database=ClinCheckCommentsClust;trusted_connection=true")
sql <-
"select --top (10000)
rx_form_id
, doctor_comment
, comment_date
from dbo.tblPuFormBinInternal cc
where comment_date >= '20140101'
and comment_date < '20140601'"
cccomments <- sqlQuery(dbHandle, sql)
cccomments.lang_category <- textcat(cccomments$doctor_comment);
comments.eng <- cccomments[cccomments.lang_category == 'english',]
cc.corpus <- Corpus(VectorSource(comments.eng$doctor_comment))
tdm <- TermDocumentMatrix(cc.corpus,
control = list(removePunctuation = TRUE,
stopwords =TRUE,
weighting =
function(x)
weightTf(x)
))
tdm <- removeSparseTerms(tdm, sparse=0.8)
library(RTextTools)
library(topicmodels)
k <- length(unique(cc.corpus$Topic.Code))
x <- LDA(tdm, 5)
str(x)
terms(x)
topics(x)
m = as.matrix(tdm);
v = sort(colSums(m), decreasing=TRUE);
myNames = names(v);
k = which(names(v)=="miners");
myNames[k] = "mining";
d = data.frame(word=myNames, freq=v);
wordcloud(d$word, colors=c(3,4), random.color=FALSE, d$freq, min.freq=20);
> str(x)
Formal class 'LDA_VEM' [package "topicmodels"] with 14 slots
..@ alpha : num 5160
..@ call : language LDA(x = tdm, k = 5)
..@ Dim : int [1:2] 3 284848
..@ control :Formal class 'LDA_VEMcontrol' [package "topicmodels"] with 13 slots
.. .. ..@ estimate.alpha: logi TRUE
.. .. ..@ alpha : num 10
.. .. ..@ seed : int 1423217142
.. .. ..@ verbose : int 0
.. .. ..@ prefix : chr "C:\\Users\\Admin\\AppData\\Local\\Temp\\RtmpIvmDWl\\file136030c2577d"
.. .. ..@ save : int 0
.. .. ..@ nstart : int 1
.. .. ..@ best : logi TRUE
.. .. ..@ keep : int 0
.. .. ..@ estimate.beta : logi TRUE
.. .. ..@ var :Formal class 'OPTcontrol' [package "topicmodels"] with 2 slots
.. .. .. .. ..@ iter.max: int 500
.. .. .. .. ..@ tol : num 1e-06
.. .. ..@ em :Formal class 'OPTcontrol' [package "topicmodels"] with 2 slots
.. .. .. .. ..@ iter.max: int 1000
.. .. .. .. ..@ tol : num 1e-04
.. .. ..@ initialize : chr "random"
..@ k : int 5
..@ terms : chr [1:284848] "1" "2" "3" "4" ...
..@ documents : chr [1:3] "ipr" "lower" "upper"
..@ beta : num [1:5, 1:284848] -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 ...
..@ gamma : num [1:3, 1:5] 0.208 0.196 0.197 0.195 0.203 ...
..@ wordassignments:List of 5
.. ..$ i : int [1:244179] 1 1 1 1 1 1 1 1 1 1 ...
.. ..$ j : int [1:244179] 6 15 34 50 54 63 69 77 88 91 ...
.. ..$ v : num [1:244179] 1 1 3 3 1 3 3 3 3 3 ...
.. ..$ nrow: int 3
.. ..$ ncol: int 284848
.. ..- attr(*, "class")= chr "simple_triplet_matrix"
..@ loglikelihood : num [1:3] -1210792 -1400929 -1405806
..@ iter : int 3
..@ logLiks : num(0)
..@ n : int 343192
> terms(x)
Topic 1 Topic 2 Topic 3 Topic 4 Topic 5
"121880" "173807" "38655" "190018" "67905"
> topics(x)
ipr lower upper
3 2 4