Here is an interesting way to solve your problem, this is a robust function that returns a generator:
def combine_item_pairs(l1, l2):
D = {k:[v, False] for k, v in l1}
for key, value in l2:
if key in D:
D[key][1] = value
else:
D[key] = [False, value]
return (tuple([key]+value) for key, value in D.iteritems())
Using it:
>>> list(combine_item_pairs(list_a, list_b))
[('item_2', 'attribute_y', False), ('item_3', 'attribute_z', 'attribute_p'), ('item_1', 'attribute_x', 'attribute_n')]
Here is an extra bonus solution (same interface, but more efficient solution:
from itertools import groupby
from operator import itemgetter as I
def combine_item_pairs(l1, l2):
return (tuple(list([k]+[I(1)(i) for i in g]+[False])[:3]) for k, g in groupby(sorted(l1+l2), key=I(0)))
Results:
>>> list(combine_item_pairs(list_a, list_b))
[('item_1', 'attribute_n', 'attribute_x'), ('item_2', 'attribute_y', False), ('item_3', 'attribute_p', 'attribute_z')]
note: efficiency of this solution is diminished if the lists require much sorting, or if a lot of values are absent. (Also, currently all absences will be reflected by a False
value only in the last item of the tuple, with no way of knowing which list is missing an item (that's the price of efficiency) this should be used with large data when it is less important to know which list is missing an item)
edit: Timers:
a = [('item_1', 'attribute_x'), ('item_2', 'attribute_y'), ('item_3', 'attribute_z')]
b = [('item_1', 'attribute_n'), ('item_3', 'attribute_p')]
def inbar(l1, l2):
D = {k:[v, False] for k, v in l1}
for key, value in l2:
if key in D:
D[key][1] = value
else:
D[key] = [False, value]
return (tuple([key]+value) for key, value in D.iteritems())
def solus(l1, l2):
dict_a,dict_b = dict(l1), dict(l2)
items = sorted({i for i,_ in l1+l2})
return [(i, dict_a.get(i,False), dict_b.get(i,False)) for i in items]
import timeit # running each timer 3 times just to be sure.
print timeit.Timer('inbar(a, b)', 'from __main__ import a, b, inbar').repeat()
# [2.2363221572247483, 2.1427426716407836, 2.1545361420851963]
# [2.2058199808040575, 2.137495707329387, 2.178640404817184]
# [2.4588094406466743, 2.4221991975274215, 2.3586636366037856]
print timeit.Timer('solus(a, b)', 'from __main__ import a, b, solus').repeat()
# [5.841498824468664, 5.951693880486182, 5.866254325691159]
# [5.843569212526087, 5.919173415087307, 6.027018876010061]
# [6.41402184345621, 6.229860036924308, 6.562849100520403]