from itertools import chain
def getKey(dic, value):
return [k for k,v in sorted(dic.items()) if v == value]
# Vectorize will return a list of tuples and each tuple is made up of
# (<position of word in dictionar>,<number of times it occurs in sentence>)
def vectorize(sentence, dictionary): # is there simpler way to do this?
vector = []
for word in sentence.split():
word_count = sentence.lower().split().count(word)
dic_pos = getKey(dictionary, word)[0]
return vector
s1 = "this is is a foo"
s2 = "this is a a bar"
s3 = "that 's a foobar"
uniq = list(set(chain(" ".join([s1,s2,s3]).split()))) # is there simpler way for this?
dictionary = {}
for i in range(len(uniq)): # can this be done with dict(list_comprehension)?
dictionary[i] = uniq[i]
v1 = vectorize(s1, dictionary)
v2 = vectorize(s2, dictionary)
v3 = vectorize(s3, dictionary)
print v1
print v2
print v3