Skip to content

Commit d90aa66

Browse files
committed
PB-1575: make deletion batch size configurable.
One `delete()` per object is very slow. One `delete()` for all the objects consumes a lot of memory and fails in an all-or-nothing mode (i.e. when it fails mid-way, nothing gets deleted). With this change, we split the objects into batches of configurable size and run `delete()` over each batch. It consumes a ~constant amount of memory, is faster than having a batch size of 1 and is more robust. I.e. when it fails mid-way, it will most likely have deleted at least some objects and the next run has that much less work to do. There will be cases where expired items are partially deleted (i.e. some of their assets are deleted but not all). I don't expect that to be a problem for our use case.
1 parent c721eaf commit d90aa66

File tree

3 files changed

+68
-14
lines changed

3 files changed

+68
-14
lines changed

app/config/settings_prod.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@
209209
DELETE_EXPIRED_ITEMS_OLDER_THAN_HOURS = 24
210210
DELETE_EXPIRED_ITEMS_MAX = 110 * 1000
211211
DELETE_EXPIRED_ITEMS_MAX_PERCENTAGE = 50
212+
DELETE_EXPIRED_ITEMS_BATCH_SIZE = 10 * 1000
212213

213214
# Media files (i.e. uploaded content=assets in this project)
214215
UPLOAD_FILE_CHUNK_SIZE = 1024 * 1024 # Size in Bytes

app/stac_api/management/commands/remove_expired_items.py

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from django.conf import settings
55
from django.core.management.base import CommandParser
6+
from django.core.paginator import Paginator
67
from django.utils import timezone
78

89
from stac_api.models.general import BaseAssetUpload
@@ -22,11 +23,53 @@ def __str__(self):
2223

2324
class Handler(CommandHandler):
2425

25-
def delete(self, instance, object_type):
26-
if self.options['dry_run']:
27-
self.print_success(f'skipping deletion of {object_type} {instance}')
28-
else:
29-
instance.delete()
26+
def delete_by_batch(self, queryset, object_type, batch_size):
27+
# When many rows are involved, looping over each one is very slow.
28+
# Running a single delete() against all of them consumes a lot of memory
29+
# and does not delete anything if it fails mid-way. Hence, we batch.
30+
#
31+
# Django's delete() method already batches deletions in groups of 100
32+
# rows. These batches are wrapped within transactions. It does not seem
33+
# to be designed to allow disabling the transaction or tweaking the
34+
# batch size.
35+
# https://github.com/django/django/blob/main/django/db/models/sql/subqueries.py#L26
36+
# https://github.com/django/django/blob/main/django/db/models/deletion.py#L454
37+
# Also, it does not seem to do anything to reduce memory consumption.
38+
#
39+
# In our case, we don't need the deletions to be transactional. If we
40+
# die in the middle, it's fine if some rows are deleted and some are
41+
# not. We can remove the remaining rows next time we run. That's better
42+
# than waiting forever, to fail and to have to start from scratch next
43+
# time.
44+
type_name = f'stac_api.{object_type.__name__}'
45+
deleted_count = 0
46+
# We delete rows as we iterate over them. This only works if we iterate
47+
# from the end to the beginning. But we also want to delete the objects
48+
# in the order of the QuerySet we received. Hence, we first reverse the
49+
# the QuerySet then we reverse the iterator.
50+
queryset = queryset.reverse()
51+
paginator = Paginator(queryset, batch_size)
52+
for page_number in reversed(paginator.page_range):
53+
page = paginator.page(page_number)
54+
# We cannot just call page.object_list.delete() because DELETE
55+
# does not support LIMIT/OFFSET. So instead we extract the ids
56+
# then we'll build a new QuerySet to DELETE them.
57+
ids = page.object_list.values('id')
58+
expected_deletions = len(ids)
59+
dry_run_prefix = ''
60+
if self.options['dry_run']:
61+
dry_run_prefix = '[dry run]: '
62+
deleted_objs = {}
63+
actual_deletions = expected_deletions
64+
else:
65+
(_, deleted_objs) = object_type.objects.filter(id__in=ids).delete()
66+
actual_deletions = deleted_objs.get(type_name, 0)
67+
deleted_count += actual_deletions
68+
self.print_success(
69+
f'{dry_run_prefix}Deleted {deleted_count}/{paginator.count} {type_name}.'
70+
f' In this batch: {actual_deletions}/{expected_deletions}.'
71+
f' All objects in this batch: {deleted_objs}.'
72+
)
3073

3174
def _raise_if_too_many_deletions(self, max_deletions, max_deletions_pct, items_count):
3275
if items_count > max_deletions:
@@ -42,18 +85,20 @@ def _raise_if_too_many_deletions(self, max_deletions, max_deletions_pct, items_c
4285

4386
def run(self):
4487
self.print_success('running command to remove expired items')
88+
batch_size = self.options['batch_size']
4589
min_age_hours = self.options['min_age_hours']
4690
max_deletions = self.options['max_deletions']
4791
max_deletions_pct = self.options['max_deletions_percentage']
4892
self.print_warning(
4993
f"deleting no more than {max_deletions} or "
5094
f"{max_deletions_pct}%% items expired for longer"
51-
f" than {min_age_hours} hours"
95+
f" than {min_age_hours} hours, {batch_size} at a time"
5296
)
5397

5498
expiration = timezone.now() - timedelta(hours=min_age_hours)
5599

56-
items = Item.objects.filter(properties_expires__lte=expiration)
100+
items = Item.objects.filter(properties_expires__lte=expiration
101+
).order_by('properties_expires')
57102
items_count = items.count()
58103

59104
self._raise_if_too_many_deletions(max_deletions, max_deletions_pct, items_count)
@@ -69,8 +114,8 @@ def run(self):
69114
"WARNING: There were still pending asset uploads for expired items. "
70115
"These were likely stale, so we aborted them"
71116
)
72-
self.delete(assets, 'assets')
73-
self.delete(items, 'items')
117+
self.delete_by_batch(assets, Asset, batch_size)
118+
self.delete_by_batch(items, Item, batch_size)
74119

75120
if self.options['dry_run']:
76121
self.print_success(f'[dry run] would have removed {items_count} expired items')
@@ -105,6 +150,13 @@ def add_arguments(self, parser: CommandParser) -> None:
105150
action='store_true',
106151
help='Simulate deleting items, without actually deleting them'
107152
)
153+
default_batch_size = settings.DELETE_EXPIRED_ITEMS_BATCH_SIZE
154+
parser.add_argument(
155+
'--batch-size',
156+
type='positive_int',
157+
default=default_batch_size,
158+
help=f"How many rows to delete at a time ({default_batch_size})"
159+
)
108160
default_min_age = settings.DELETE_EXPIRED_ITEMS_OLDER_THAN_HOURS
109161
parser.add_argument(
110162
'--min-age-hours',

app/tests/tests_10/test_remove_expired_items.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -230,11 +230,6 @@ def test_remove_item_dry_run(self):
230230
expected_stdout_patterns=[
231231
"running command to remove expired items",
232232
"deleting no more than 110000 or 50% items expired for longer than 24 hours",
233-
(
234-
"skipping deletion of assets <QuerySet"
235-
" [<Asset: asset-0.tiff>, <Asset: asset-1.tiff>]>"
236-
),
237-
"skipping deletion of items <ItemQuerySet [<Item: collection-1/item-0>]>",
238233
"[dry run] would have removed 1 expired items",
239234
]
240235
)
@@ -331,6 +326,12 @@ class RemoveExpiredItemsManyWithProfiling(RemoveExpiredItemsBase):
331326
def test_remove_item(self):
332327
self.run_test(command_args=["--max-deletions-percentage=99"], expected_stdout_patterns=[])
333328

329+
def test_multiple_batches(self):
330+
self.run_test(
331+
command_args=["--max-deletions-percentage=99", "--batch-size=7"],
332+
expected_stdout_patterns=[]
333+
)
334+
334335
@staticmethod
335336
def _diff_memory(before, after):
336337
diff = after.compare_to(before, 'filename')

0 commit comments

Comments
 (0)