我想取出孟加拉语文本的词云,但是在打印的时候,每个单词的辅音都是分开打印的。
data = pd.read_csv('/content/gdrive/MyDrive/data.csv',encoding='UTF-8')
refined_sentence = " ".join(data)
regex = r"[\u0980-\u09FF]+"
wc = WordCloud(width=800, height=400, mode="RGBA",background_color=None, colormap="hsv",
stopwords = stopwords, font_path="kalpurush.ttf", regexp=regex).generate(refined_sentence)
plt.figure(figsize=(7, 7))
plt.imshow(wc, interpolation='none')
plt.axis("off")
plt.show()