Here is the example of using bisect and comparison with using the sort of the partially sorted list. The bisect solution clearly wins:
import bisect
import random
import timeit
def bisect_solution(size=10000):
lst = []
for n in xrange(size):
value = random.randint(1, 1000000)
bisect.insort_left(lst, value)
return lst
# Cut out of the bisect module to be used in bisect_solution2()
def insort_left(a, x, lo=0, hi=None):
"""Insert item x in list a, and keep it sorted assuming a is sorted.
If x is already in a, insert it to the left of the leftmost x.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if a[mid] < x: lo = mid+1
else: hi = mid
a.insert(lo, x)
def bisect_solution2(size=10000):
lst = []
for n in xrange(size):
value = random.randint(1, 1000000)
insort_left(lst, value)
return lst
def sort_solution(size=10000):
lst = []
for n in xrange(size):
value = random.randint(1, 1000000)
lst.append(value)
lst.sort()
return lst
t = timeit.timeit('bisect_solution()', 'from __main__ import bisect_solution', number = 10)
print "bisect_solution: ", t
t = timeit.timeit('bisect_solution2()', 'from __main__ import bisect_solution2', number = 10)
print "bisect_solution2: ", t
t = timeit.timeit('sort_solution()', 'from __main__ import sort_solution', number = 10)
print "sort_solution: ", t
The bisect_solution2() is almost the same as bisect_solution() -- only with the code copied-out of the module. Someone else should explain why it takes more time :)
The bisect_solution2() is here to be modified for cmp() function to be able to compare the tuples.
It shows the following results on my computer:
bisect_solution: 0.637892403587
bisect_solution2: 0.988893038133
sort_solution: 15.3521410901
Here is a bisect solution adopted for the tuples where date is a string:
import random
import timeit
def random_date_tuple():
s1 = '{0}-{1:02}-{2:02}'.format(random.randint(2000, 2050),
random.randint(1, 12),
random.randint(1, 31))
e2 = random.randint(1,50)
return (s1, e2)
def my_cmp(a, b):
result = cmp(a[0], b[0]) # comparing the date part of the tuple
if result == 0:
return cmp(a[1], b[1]) # comparint the other part of the tuple
return result
def my_insort_left(a, x, cmp=my_cmp, lo=0, hi=None):
"""The bisect.insort_left() modified for comparison of tuples."""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if cmp(a[mid], x) < 0:
lo = mid+1
else:
hi = mid
a.insert(lo, x)
def bisect_solution3(size=1000):
lst = []
for n in xrange(size):
value = random_date_tuple()
my_insort_left(lst, value)
return lst
def sort_solution(size=1000):
lst = []
for n in xrange(size):
value = random_date_tuple()
lst.append(value)
lst.sort(cmp=my_cmp)
return lst
t = timeit.timeit('bisect_solution3()', 'from __main__ import bisect_solution3', number = 10)
print "bisect_solution3: ", t
t = timeit.timeit('sort_solution()', 'from __main__ import sort_solution', number = 10)
print "sort_solution: ", t
print bisect_solution3()[:10]
Notice that the list size is 10 times less than in the previous as the sort solution was very slow. It prints:
bisect_solution3: 0.223602245968
sort_solution: 3.69388944301
[('2000-02-01', 20), ('2000-02-13', 48), ('2000-03-11', 25), ('2000-03-13', 43),
('2000-03-26', 48), ('2000-05-04', 17), ('2000-06-06', 23), ('2000-06-12', 31),
('2000-06-15', 15), ('2000-07-07', 50)]