我有一个如下所示的数据集:
我想做以下事情:
- 确保条不重叠。
- 将每个条视为一个单独的数据集,即 x 轴上的标签应该是分开的,一个用于黄色系列,一个用于红色系列。这些标签应该是单词(我想在这个图表中有两个系列的 xtick 标签)一个代表
words_2
,一个代表words_1
..
当前代码:
import matplotlib.pyplot as plt
import numpy as np
import copy
import random
from random import randint
random.seed(11)
word_freq_1 = [('test', 510), ('Hey', 362), ("please", 753), ('take', 446), ('herbert', 325), ('live', 222), ('hate', 210), ('white', 191), ('simple', 175), ('harry', 172), ('woman', 170), ('basil', 153), ('things', 129), ('think', 126), ('bye', 124), ('thing', 120), ('love', 107), ('quite', 107), ('face', 107), ('eyes', 107), ('time', 106), ('himself', 105), ('want', 105), ('good', 105), ('really', 103), ('away',100), ('did', 100), ('people', 99), ('came', 97), ('say', 97), ('cried', 95), ('looked', 94), ('tell', 92), ('look', 91), ('world', 89), ('work', 89), ('project', 88), ('room', 88), ('going', 87), ('answered', 87), ('mr', 87), ('little', 87), ('yes', 84), ('silly', 82), ('thought', 82), ('shall', 81), ('circle', 80), ('hallward', 80), ('told', 77), ('feel', 76), ('great', 74), ('art', 74), ('dear',73), ('picture', 73), ('men', 72), ('long', 71), ('young', 70), ('lady', 69), ('let', 66), ('minute', 66), ('women', 66), ('soul', 65), ('door', 64), ('hand',63), ('went', 63), ('make', 63), ('night', 62), ('asked', 61), ('old', 61), ('passed', 60), ('afraid', 60), ('night', 59), ('looking', 58), ('wonderful', 58), ('gutenberg-tm', 56), ('beauty', 55), ('sir', 55), ('table', 55), ('turned', 54), ('lips', 54), ("one's", 54), ('better', 54), ('got', 54), ('vane', 54), ('right',53), ('left', 53), ('course', 52), ('hands', 52), ('portrait', 52), ('head', 51), ("can't", 49), ('true', 49), ('house', 49), ('believe', 49), ('black', 49), ('horrible', 48), ('oh', 48), ('knew', 47), ('curious', 47), ('myself', 47)]
word_freq_2 = [((tuple[0], randint(1,500))) for i,tuple in enumerate(word_freq_1)]
N = 25
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
fig, ax = plt.subplots()
words_1 = [x[0] for x in word_freq_1][:25]
values_1 = [int(x[1]) for x in word_freq_1][:25]
words_2 = [x[0] for x in word_freq_2][:25]
values_2 = [int(x[1]) for x in word_freq_2][:25]
print words_2
rects1 = ax.bar(ind, values_1, color='r')
rects2 = ax.bar(ind + width, values_2, width, color='y')
# add some text for labels, title and axes ticks
ax.set_ylabel('Words')
ax.set_title('Word Frequencies by Test and Training Set')
ax.set_xticks(ind + width)
ax.set_xticklabels(words_2,rotation=90)
ax.tick_params(axis='both', which='major', labelsize=6)
ax.tick_params(axis='both', which='minor', labelsize=6)
fig.tight_layout()
ax.legend((rects1[0], rects2[0]), ('Test', 'Train'))
plt.savefig('test.png')