curiousest · October 15, 2015 11:28
diff --git a/gistfile1.txt b/gistfile1.txt
 import gc


 # inspiration: https://djangosnippets.org/snippets/1949/
 def queryset_memreduce_iterator(queryset, field='pk', queryset_filter='pk__gt',
                                ordering_function=lambda x, y: x < y,
                                chunksize=1000):
    '''''
    Iterate over a Django Queryset ordered by field (kwarg)

    This method loads a maximum of chunksize (default: 1000) rows in it's
    memory at the same time while django normally would load all rows in it's
    memory. Using the iterator() method only causes it to not preload all the
    classes.

    ordering_function: defaults to ascending, don't use >= or <= here or you
    will get an infinite loop
    queryset_filter: defaults to ascending, set the field here again if you're
    not using pk as the field

    '''

    assert(queryset.count() > 1)

    current_item = getattr(queryset.first(), field)
    last_item = getattr(queryset.last(), field)

    while ordering_function(current_item, last_item):
        chunk = queryset.filter(**{queryset_filter: current_item})[:chunksize]
        for row in chunk:
            current_item = getattr(row, field)
            yield row
        gc.collect()


 # Usage example (date field on Job model, desc):
 jobs = Job.objects.all().order_by('-date')
 memreduced_jobs = queryset_memreduce_iterator(
    jobs, field='date', queryset_filter='date__lte',
    ordering_function=lambda x, y: x > y
 )
 for job in memreduced_jobs:
    pass
	import gc


	# inspiration: https://djangosnippets.org/snippets/1949/
	def queryset_memreduce_iterator(queryset, field='pk', queryset_filter='pk__gt',
	ordering_function=lambda x, y: x < y,
	chunksize=1000):
	'''''
	Iterate over a Django Queryset ordered by field (kwarg)

	This method loads a maximum of chunksize (default: 1000) rows in it's
	memory at the same time while django normally would load all rows in it's
	memory. Using the iterator() method only causes it to not preload all the
	classes.

	ordering_function: defaults to ascending, don't use >= or <= here or you
	will get an infinite loop
	queryset_filter: defaults to ascending, set the field here again if you're
	not using pk as the field

	'''

	assert(queryset.count() > 1)

	current_item = getattr(queryset.first(), field)
	last_item = getattr(queryset.last(), field)

	while ordering_function(current_item, last_item):
	chunk = queryset.filter(**{queryset_filter: current_item})[:chunksize]
	for row in chunk:
	current_item = getattr(row, field)
	yield row
	gc.collect()


	# Usage example (date field on Job model, desc):
	jobs = Job.objects.all().order_by('-date')
	memreduced_jobs = queryset_memreduce_iterator(
	jobs, field='date', queryset_filter='date__lte',
	ordering_function=lambda x, y: x > y
	)
	for job in memreduced_jobs:
	pass
No results found