python - 如何在 Python 中从语料库创建词云？

Question

通过从R 中的语料库中创建单词子集，回答者可以轻松地将 aterm-document matrix转换为词云。

python库中是否有类似的函数将原始单词文本文件或NLTK语料库或GensimMmcorpus带入词云？

结果看起来有点像这样：在此处输入图像描述

score 61 · Accepted Answer

这是一篇博客文章：http: //peekaboo-vision.blogspot.com/2012/11/a-wordcloud-in-python.html

整个代码在这里：https ://github.com/amueller/word_cloud

score 18 · Accepted Answer

from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
stopwords = set(STOPWORDS)

def show_wordcloud(data, title = None):
    wordcloud = WordCloud(
        background_color='white',
        stopwords=stopwords,
        max_words=200,
        max_font_size=40, 
        scale=3,
        random_state=1 # chosen at random by flipping a coin; it was heads
    ).generate(str(data))

    fig = plt.figure(1, figsize=(12, 12))
    plt.axis('off')
    if title: 
        fig.suptitle(title, fontsize=20)
        fig.subplots_adjust(top=2.3)

    plt.imshow(wordcloud)
    plt.show()

show_wordcloud(Samsung_Reviews_Negative['Reviews'])
show_wordcloud(Samsung_Reviews_positive['Reviews'])

score 10 · Accepted Answer

如果您需要这些词云在网站或 Web 应用程序中显示它们，您可以将数据转换为 json 或 csv 格式并将其加载到 JavaScript 可视化库，例如d3。d3 上的词云

如果没有，Marcin 的回答是按照您的描述进行操作的好方法。

score 10 · Accepted Answer

amueller 的代码示例

在命令行/终端中：

sudo pip install wordcloud

然后运行python脚本：

## Simple WordCloud
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS 

text = 'all your base are belong to us all of your base base base'

def generate_wordcloud(text): # optionally add: stopwords=STOPWORDS and change the arg below
    wordcloud = WordCloud(font_path='/Library/Fonts/Verdana.ttf',
                          width=800, height=400,
                          relative_scaling = 1.0,
                          stopwords = {'to', 'of'} # set or space-separated string
                          ).generate(text)
    
    fig = plt.figure(1, figsize=(8, 4))
    plt.axis('off')
    plt.imshow(wordcloud)
    plt.axis("off")
    ## Pick One:
    # plt.show()
    plt.savefig("WordCloud.png")

generate_wordcloud(text)

score 2 · Accepted Answer

这是短代码

#make wordcoud

from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
stopwords = set(STOPWORDS)

def show_wordcloud(data, title = None):
    wordcloud = WordCloud(
        background_color='white',
        stopwords=stopwords,
        max_words=200,
        max_font_size=40, 
        scale=3,
        random_state=1 # chosen at random by flipping a coin; it was heads
    ).generate(str(data))

    fig = plt.figure(1, figsize=(12, 12))
    plt.axis('off')
    if title: 
        fig.suptitle(title, fontsize=20)
        fig.subplots_adjust(top=2.3)

    plt.imshow(wordcloud)
    plt.show()


if __name__ == '__main__':

    show_wordcloud(text_str)

score 0 · Accepted Answer

cv = CountVectorizer()
cvData = cv.fit_transform(DF["W"]).toarray()
cvDF = pd.DataFrame(data=cvData,          columns=cv.get_feature_names())
cvDF["target"] = DF["T"]

def w_count(tar):
    MO = cvDF[cvDF["target"] == tar].drop("target",axis=1)
    x=[]
    y=[]
    for i in range(MO.shape[0]):
        for j in cvDF.drop("target",axis=1):
             if MO.iloc[i][j]>4:
                x.append(j)
                y.append(MO.iloc[i][j])
    return x,y

for i in cvDF["target"]:
    x,y = w_count(i)
    plt.figure(figsize=(10, 6))
    plt.title(i)
    plt.xticks(rotation="vertical")
    plt.bar(x,y)
    plt.show()

for c in range(len(DF)):
    w=[]
    for i,j in zip(cvDF.T[c].index, cvDF.T[c].values):
        a=[]
        if j > 1:
            a.append(i)
            a.append(j)
            w.append(a)
    pd.DataFrame(w)
    data = dict(w)
    wc = WordCloud(width=800, height=400, max_words=200).generate_from_frequencies(data)
    plt.figure(figsize=(10, 10))
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    plt.title(DF['T'][c])
    plt.show()

python - 如何在 Python 中从语料库创建词云？

6 回答 6

Related

Reference