17

我想用 R 构建一个词云(我已经用包wordcloud 做到了),然后给特定的词涂上某种颜色。目前该函数的行为是根据频率为单词着色(这可能很有用),但单词大小已经这样做了,所以我想使用颜色来获得额外的含义。

关于如何在 wordcloud 中为特定单词着色的任何想法?(如果 R 中有另一个 wordcloud 功能,我不知道我非常愿意走那条路。)

一个模拟示例和我的尝试(我试图在同一个庄园中处理 color 参数,我将使用 plot 函数的常规绘图):

library(wordcloud)

x <- paste(rep("how do keep the two words as one chunk in the word cloud", 3), 
           collapse = " ")
X <- data.frame(table(strsplit(x, " ")))
COL <- ifelse(X$Var1 %in% c("word", "cloud", "words"), "red", "black")
wordcloud(X$Var1, X$Freq, color=COL)

编辑:我想补充一点,wordcloud 的新版本(2010 年 1 月 10 日;2.0 版)[谢谢 Ian Fellows 和 David Robinson] 现在是这个功能以及其他一些了不起的补充。这是在 wordcloud 中实现原始目标的代码:

wordcloud(X$Var1, X$Freq, color=COL, ordered.colors=TRUE, random.color=FALSE)
4

1 回答 1

15

编辑:如评论中所述,下面描述的功能现已添加到wordcloud库中。


我的方法是获取 R 函数的代码并对其进行自定义。它只需要更改几行,现在可以采用单一颜色或长度与words.

library(wordcloud)

colored.wordcloud <- function(words,freq,scale=c(4,.5),min.freq=3,max.words=Inf,random.order=TRUE,random.color=FALSE,
        rot.per=.1,colors="black",ordered.colors=FALSE,use.r.layout=FALSE,...) { 
    tails <- "g|j|p|q|y"
    last <- 1
    nc<- length(colors)

    if (ordered.colors) {
        if (length(colors) != 1 && length(colors) != length(words)) {
            stop(paste("Length of colors does not match length of words",
                       "vector"))
        }
    }

    overlap <- function(x1, y1, sw1, sh1) {
        if(!use.r.layout)
            return(.overlap(x1,y1,sw1,sh1,boxes))
        s <- 0
        if (length(boxes) == 0) 
            return(FALSE)
        for (i in c(last,1:length(boxes))) {
            bnds <- boxes[[i]]
            x2 <- bnds[1]
            y2 <- bnds[2]
            sw2 <- bnds[3]
            sh2 <- bnds[4]
            if (x1 < x2) 
                overlap <- x1 + sw1 > x2-s
            else 
                overlap <- x2 + sw2 > x1-s

            if (y1 < y2) 
                overlap <- overlap && (y1 + sh1 > y2-s)
            else 
                overlap <- overlap && (y2 + sh2 > y1-s)
            if(overlap){
                last <<- i
                return(TRUE)
            }
        }
        FALSE
    }

    ord <- rank(-freq, ties.method = "random")
    words <- words[ord<=max.words]
    freq <- freq[ord<=max.words]
    if (ordered.colors) {
        colors <- colors[ord<=max.words]
    }

    if(random.order)
        ord <- sample.int(length(words))
    else
        ord <- order(freq,decreasing=TRUE)
    words <- words[ord]
    freq <- freq[ord]
    words <- words[freq>=min.freq]
    freq <- freq[freq>=min.freq]
    if (ordered.colors) {
        colors <- colors[ord][freq>=min.freq]
    }

    thetaStep <- .1
    rStep <- .05
    plot.new()
    op <- par("mar")
    par(mar=c(0,0,0,0))
    plot.window(c(0,1),c(0,1),asp=1)
    normedFreq <- freq/max(freq)
    size <- (scale[1]-scale[2])*normedFreq + scale[2]
    boxes <- list()



    for(i in 1:length(words)){
        rotWord <- runif(1)<rot.per
        r <-0
        theta <- runif(1,0,2*pi)
        x1<-.5
        y1<-.5
        wid <- strwidth(words[i],cex=size[i],...)
        ht <- strheight(words[i],cex=size[i],...)
        #mind your ps and qs
        if(grepl(tails,words[i]))
            ht <- ht + ht*.2
        if(rotWord){
            tmp <- ht
            ht <- wid
            wid <- tmp  
        }
        isOverlaped <- TRUE
        while(isOverlaped){
            if(!overlap(x1-.5*wid,y1-.5*ht,wid,ht) &&
                    x1-.5*wid>0 && y1-.5*ht>0 &&
                    x1+.5*wid<1 && y1+.5*ht<1){
        if (!random.color) {
                if (ordered.colors) {
                    cc <- colors[i]
                }
                else {
                    cc <- ceiling(nc*normedFreq[i])
                    cc <- colors[cc]
                }
        } else {
         cc <- colors[sample(1:nc,1)]
        }
                text(x1,y1,words[i],cex=size[i],offset=0,srt=rotWord*90,
                        col=cc,...)
                #rect(x1-.5*wid,y1-.5*ht,x1+.5*wid,y1+.5*ht)
                boxes[[length(boxes)+1]] <- c(x1-.5*wid,y1-.5*ht,wid,ht)
                isOverlaped <- FALSE
            }else{
                if(r>sqrt(.5)){
                    warning(paste(words[i],
                                    "could not be fit on page. It will not be plotted."))
                    isOverlaped <- FALSE
                }
                theta <- theta+thetaStep
                r <- r + rStep*thetaStep/(2*pi)
                x1 <- .5+r*cos(theta)
                y1 <- .5+r*sin(theta)
            }
        }
    }
    par(mar=op)
    invisible()
}

一些代码来试试:

colors = c("blue", "red", "orange", "green")
colored.wordcloud(colors, c(10, 5, 3, 9), colors=colors)
于 2012-01-08T22:20:22.903 回答