这里有一个包括 len 和 count 的解决方案:
class GeneratorWithLen(object):
"""
Generator that includes len and count for given queryset
"""
def __init__(self, generator, length):
self.generator = generator
self.length = length
def __len__(self):
return self.length
def __iter__(self):
return self.generator
def __getitem__(self, item):
return self.generator.__getitem__(item)
def next(self):
return next(self.generator)
def count(self):
return self.__len__()
def batch(queryset, batch_size=1024):
"""
returns a generator that does not cache results on the QuerySet
Aimed to use with expected HUGE/ENORMOUS data sets, no caching, no memory used more than batch_size
:param batch_size: Size for the maximum chunk of data in memory
:return: generator
"""
total = queryset.count()
def batch_qs(_qs, _batch_size=batch_size):
"""
Returns a (start, end, total, queryset) tuple for each batch in the given
queryset.
"""
for start in range(0, total, _batch_size):
end = min(start + _batch_size, total)
yield (start, end, total, _qs[start:end])
def generate_items():
queryset.order_by() # Clearing... ordering by id if PK autoincremental
for start, end, total, qs in batch_qs(queryset):
for item in qs:
yield item
return GeneratorWithLen(generate_items(), total)
用法:
events = batch(Event.objects.all())
len(events) == events.count()
for event in events:
# Do something with the Event