Skip to content

Commit 3f4b4d3

Browse files
authored
Merge pull request #34 from nansencenter/processing_cleanup
Updates to processing results cleanup
2 parents 83f5b13 + 26ee684 commit 3f4b4d3

File tree

7 files changed

+217
-23
lines changed

7 files changed

+217
-23
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ on:
77
types: [prereleased, released]
88
env:
99
TESTING_IMAGE_NAME: geospaas_rest_api_tests
10-
GEOSPAAS_PROCESSING_VERSION: 3.1.0
10+
GEOSPAAS_PROCESSING_VERSION: 3.8.0
1111
GEOSPAAS_HARVESTING_VERSION: 3.8.0
1212
METANORM_VERSION: 4.1.2
1313
jobs:
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Generated by Django 3.2 on 2024-11-14 14:41
2+
3+
from django.db import migrations
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('geospaas_rest_api', '0005_alter_job_task_id'),
10+
]
11+
12+
operations = [
13+
migrations.CreateModel(
14+
name='SyntoolCompareJob',
15+
fields=[
16+
],
17+
options={
18+
'proxy': True,
19+
'indexes': [],
20+
'constraints': [],
21+
},
22+
bases=('geospaas_rest_api.job',),
23+
),
24+
]
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Generated by Django 3.2 on 2024-11-18 08:27
2+
3+
from django.db import migrations
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('geospaas_rest_api', '0006_syntoolcomparejob'),
10+
]
11+
12+
operations = [
13+
migrations.CreateModel(
14+
name='WorkdirCleanupJob',
15+
fields=[
16+
],
17+
options={
18+
'proxy': True,
19+
'indexes': [],
20+
'constraints': [],
21+
},
22+
bases=('geospaas_rest_api.job',),
23+
),
24+
]

geospaas_rest_api/models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@
1010
ConvertJob,
1111
SyntoolCleanupJob,
1212
SyntoolCompareJob,
13-
HarvestJob)
13+
HarvestJob,
14+
WorkdirCleanupJob)

geospaas_rest_api/processing_api/models.py

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -138,15 +138,22 @@ def get_signature(cls, parameters):
138138
tasks_core.archive.signature(),
139139
tasks_core.publish.signature())
140140
elif conversion_format == 'syntool':
141-
syntool_chain = celery.chain(
141+
syntool_tasks = [
142142
tasks_core.download.signature(),
143143
tasks_core.unarchive.signature(),
144144
tasks_core.crop.signature(
145145
kwargs={'bounding_box': parameters.get('bounding_box', None)}),
146146
tasks_syntool.convert.signature(
147-
kwargs={'converter_options': parameters.get('converter_options', None)}),
147+
kwargs={
148+
'converter_options': parameters.get('converter_options', None),
149+
'ttl': parameters.get('ttl', None),
150+
}),
148151
tasks_syntool.db_insert.signature(),
149-
tasks_core.remove_downloaded.signature())
152+
]
153+
if parameters.get('remove_downloaded', True):
154+
syntool_tasks.append(tasks_core.remove_downloaded.signature())
155+
syntool_chain = celery.chain(syntool_tasks)
156+
150157
if parameters.pop('skip_check', False):
151158
return syntool_chain
152159
else:
@@ -162,7 +169,14 @@ def check_parameters(parameters):
162169
- bounding_box: 4-elements list
163170
- format: value in ['idf']
164171
"""
165-
accepted_keys = ('dataset_id', 'format', 'bounding_box', 'skip_check', 'converter_options')
172+
accepted_keys = (
173+
'dataset_id',
174+
'format',
175+
'bounding_box',
176+
'skip_check',
177+
'converter_options',
178+
'remove_downloaded',
179+
'ttl')
166180
if not set(parameters).issubset(set(accepted_keys)):
167181
raise ValidationError(
168182
f"The convert action accepts only these parameters: {', '.join(accepted_keys)}")
@@ -185,6 +199,10 @@ def check_parameters(parameters):
185199
not isinstance(parameters['converter_options'], dict)):
186200
raise ValidationError("'converter_options' should be a dictionary")
187201

202+
if ('ttl' in parameters and not (
203+
parameters['ttl'] is None or isinstance(parameters['ttl'], dict))):
204+
raise ValidationError("'ttl' should be a dictionary or None")
205+
188206
return parameters
189207

190208
@staticmethod
@@ -231,18 +249,25 @@ class Meta:
231249

232250
@classmethod
233251
def get_signature(cls, parameters):
234-
return celery.chain(
235-
tasks_syntool.compare_profiles.signature(),
252+
tasks = [
253+
tasks_syntool.compare_profiles.signature(kwargs={'ttl': parameters.get('ttl', None)}),
236254
tasks_syntool.db_insert.signature(),
237-
tasks_core.remove_downloaded.signature(),
238-
)
255+
]
256+
if parameters.get('remove_downloaded', True):
257+
tasks.append(tasks_core.remove_downloaded.signature())
258+
return celery.chain(tasks)
239259

240260
@staticmethod
241261
def check_parameters(parameters):
242-
accepted_keys = ('model', 'profiles')
243-
if not set(parameters) == set(accepted_keys):
262+
required_keys = ('model', 'profiles')
263+
accepted_keys = (*required_keys, 'ttl')
264+
if not set(parameters).issubset(accepted_keys):
244265
raise ValidationError(
245-
f"The convert action accepts only these parameters: {', '.join(accepted_keys)}")
266+
f"The compare action accepts only these parameters: {', '.join(accepted_keys)}")
267+
268+
if not set(required_keys).issubset(parameters):
269+
raise ValidationError(
270+
f"The following parameters are required for the compare action: {required_keys}")
246271

247272
if ((not isinstance(parameters['model'], Sequence)) or
248273
len(parameters['model']) != 2 or
@@ -265,6 +290,11 @@ def check_parameters(parameters):
265290
break
266291
if not valid_profiles:
267292
raise ValidationError("'profiles' must be a list of tuples (profile_id, profile_path)")
293+
294+
if ('ttl' in parameters and not (
295+
parameters['ttl'] is None or isinstance(parameters['ttl'], dict))):
296+
raise ValidationError("'converter_options' should be a dictionary or None")
297+
268298
return parameters
269299

270300
@staticmethod
@@ -292,3 +322,21 @@ def check_parameters(parameters):
292322
@staticmethod
293323
def make_task_parameters(parameters):
294324
return ((parameters['search_config_dict'],), {})
325+
326+
327+
class WorkdirCleanupJob(Job):
328+
"""Remove everything in the working directory"""
329+
class Meta:
330+
proxy = True
331+
332+
@classmethod
333+
def get_signature(cls, parameters):
334+
return tasks_core.cleanup_workdir.signature()
335+
336+
@staticmethod
337+
def check_parameters(parameters):
338+
return parameters
339+
340+
@staticmethod
341+
def make_task_parameters(parameters):
342+
return (tuple(), {})

geospaas_rest_api/processing_api/serializers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class JobSerializer(rest_framework.serializers.Serializer):
1515
'harvest': models.HarvestJob,
1616
'syntool_cleanup': models.SyntoolCleanupJob,
1717
'compare_profiles': models.SyntoolCompareJob,
18+
'workdir_cleanup': models.WorkdirCleanupJob,
1819
}
1920

2021
# Actual Job fields
@@ -29,6 +30,7 @@ class JobSerializer(rest_framework.serializers.Serializer):
2930
'convert',
3031
'harvest',
3132
'syntool_cleanup',
33+
'workdir_cleanup',
3234
'compare_profiles',
3335
],
3436
required=True, write_only=True,

geospaas_rest_api/tests/test_processing_api.py

Lines changed: 105 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,8 @@ def test_check_parameters_wrong_key(self):
295295
self.assertListEqual(
296296
raised.exception.detail,
297297
[ErrorDetail(string="The convert action accepts only these parameters: "
298-
"dataset_id, format, bounding_box, skip_check, converter_options",
298+
"dataset_id, format, bounding_box, skip_check, converter_options, "
299+
"remove_downloaded, ttl",
299300
code='invalid')])
300301

301302
def test_check_parameters_wrong_format(self):
@@ -314,7 +315,8 @@ def test_check_parameters_extra_param(self):
314315
self.assertListEqual(
315316
raised.exception.detail,
316317
[ErrorDetail(string="The convert action accepts only these parameters: "
317-
"dataset_id, format, bounding_box, skip_check, converter_options",
318+
"dataset_id, format, bounding_box, skip_check, converter_options, "
319+
"remove_downloaded, ttl",
318320
code='invalid')])
319321

320322
def test_check_parameters_wrong_type_for_dataset_id(self):
@@ -348,15 +350,34 @@ def test_check_parameters_wrong_converter_options_type(self):
348350
models.ConvertJob.check_parameters(
349351
{'dataset_id': 1, 'format': 'idf', 'converter_options': '2'})
350352

353+
def test_check_parameters_ttl(self):
354+
"""`check_parameters()` must not raise an exception if 'ttl' is
355+
a dict or None
356+
"""
357+
self.assertEqual(
358+
models.ConvertJob.check_parameters(
359+
{'dataset_id': 1, 'format': 'syntool', 'ttl': {'days': 2}}),
360+
{'dataset_id': 1, 'format': 'syntool', 'ttl': {'days': 2}})
361+
self.assertEqual(
362+
models.ConvertJob.check_parameters(
363+
{'dataset_id': 1, 'format': 'syntool', 'ttl': None}),
364+
{'dataset_id': 1, 'format': 'syntool', 'ttl': None})
365+
366+
def test_check_parameters_wrong_ttl_type(self):
367+
"""`check_parameters()` must raise an exception if the 'ttl'
368+
value is of the wrong type"""
369+
with self.assertRaises(ValidationError):
370+
models.ConvertJob.check_parameters(
371+
{'dataset_id': 1, 'format': 'syntool', 'ttl': 2})
372+
351373
def test_get_signature_syntool(self):
352374
"""Test the right signature is returned"""
353-
self.maxDiff = None
354375
base_chain = celery.chain(
355376
tasks_core.download.signature(),
356377
tasks_core.unarchive.signature(),
357378
tasks_core.crop.signature(
358379
kwargs={'bounding_box': [0, 20, 20, 0]}),
359-
tasks_syntool.convert.signature(kwargs={'converter_options': None}),
380+
tasks_syntool.convert.signature(kwargs={'converter_options': None, 'ttl': None}),
360381
tasks_syntool.db_insert.signature(),
361382
tasks_core.remove_downloaded.signature())
362383

@@ -459,11 +480,26 @@ def test_get_signature(self):
459480
'geospaas_rest_api.processing_api.models.tasks_core') as mock_core_tasks, \
460481
mock.patch('celery.chain') as mock_chain:
461482
_ = models.SyntoolCompareJob.get_signature({})
462-
mock_chain.assert_called_once_with(
483+
mock_chain.assert_called_once_with([
463484
mock_syntool_tasks.compare_profiles.signature.return_value,
464485
mock_syntool_tasks.db_insert.signature.return_value,
465486
mock_core_tasks.remove_downloaded.signature.return_value,
466-
)
487+
])
488+
489+
def test_get_signature_no_remove(self):
490+
"""Test that remove_downloaded is not added to the signature if
491+
the parameter is False
492+
"""
493+
with mock.patch(
494+
'geospaas_rest_api.processing_api.models.tasks_syntool') as mock_syntool_tasks, \
495+
mock.patch(
496+
'geospaas_rest_api.processing_api.models.tasks_core') as mock_core_tasks, \
497+
mock.patch('celery.chain') as mock_chain:
498+
_ = models.SyntoolCompareJob.get_signature({'remove_downloaded': False})
499+
mock_chain.assert_called_once_with([
500+
mock_syntool_tasks.compare_profiles.signature.return_value,
501+
mock_syntool_tasks.db_insert.signature.return_value,
502+
])
467503

468504
def test_check_parameters_ok(self):
469505
"""Test that check_parameters() returns the parameters when
@@ -472,10 +508,36 @@ def test_check_parameters_ok(self):
472508
self.assertDictEqual(
473509
models.SyntoolCompareJob.check_parameters({
474510
'model': (123, '/foo'),
475-
'profiles': ((456, '/bar'), (789, '/baz'))
511+
'profiles': ((456, '/bar'), (789, '/baz')),
476512
}),
477513
{'model': (123, '/foo'), 'profiles': ((456, '/bar'), (789, '/baz'))})
478514

515+
def test_check_parameters_ttl(self):
516+
"""ttl must be a dict or None"""
517+
self.assertDictEqual(
518+
models.SyntoolCompareJob.check_parameters({
519+
'model': (123, '/foo'),
520+
'profiles': ((456, '/bar'), (789, '/baz')),
521+
'ttl': {'days': 2},
522+
}),
523+
{
524+
'model': (123, '/foo'),
525+
'profiles': ((456, '/bar'), (789, '/baz')),
526+
'ttl': {'days': 2},
527+
})
528+
self.assertDictEqual(
529+
models.SyntoolCompareJob.check_parameters({
530+
'model': (123, '/foo'),
531+
'profiles': ((456, '/bar'), (789, '/baz')),
532+
'ttl': None,
533+
}),
534+
{
535+
'model': (123, '/foo'),
536+
'profiles': ((456, '/bar'), (789, '/baz')),
537+
'ttl': None,
538+
})
539+
540+
479541
def test_check_parameters_unknown(self):
480542
"""An error should be raised when an unknown parameter is given
481543
"""
@@ -529,6 +591,13 @@ def test_check_parameters_wrong_type(self):
529591
'model': (123, '/foo'),
530592
'profiles': ((456, '/bar'), (789, False))
531593
})
594+
# wrong ttl type
595+
with self.assertRaises(ValidationError):
596+
models.SyntoolCompareJob.check_parameters({
597+
'model': (123, '/foo'),
598+
'profiles': ((456, '/bar'), (789, '/baz')),
599+
'ttl': 2,
600+
})
532601

533602
def test_make_task_parameters(self):
534603
"""Test that the right arguments are builts from the request
@@ -582,6 +651,29 @@ def test_make_task_parameters(self):
582651
(({'foo': 'bar'},), {}))
583652

584653

654+
class WorkdirCleanupJobTests(unittest.TestCase):
655+
"""Tests for WorkdirCleanupJob"""
656+
657+
def test_get_signature(self):
658+
"""The signature is the cleanup_workdir task"""
659+
self.assertEqual(
660+
models.WorkdirCleanupJob.get_signature({}),
661+
tasks_core.cleanup_workdir.signature()
662+
)
663+
664+
def test_check_parameters(self):
665+
"""No check needed"""
666+
parameters = mock.Mock()
667+
self.assertEqual(
668+
models.WorkdirCleanupJob.check_parameters(parameters),
669+
parameters)
670+
671+
def test_make_task_parameters(self):
672+
"""No parameters needed"""
673+
self.assertTupleEqual(
674+
models.WorkdirCleanupJob.make_task_parameters({}),
675+
(tuple(), {}))
676+
585677
class JobViewSetTests(django.test.TestCase):
586678
"""Test jobs/ endpoints"""
587679

@@ -874,13 +966,15 @@ def test_list_tasks(self):
874966
"dataset": 1,
875967
"path": "ingested/product_name/granule_name_1/",
876968
"type": "syntool",
877-
"created": "2023-10-25T15:38:47Z"
969+
"created": "2023-10-25T15:38:47Z",
970+
"ttl": None,
878971
}, {
879972
"id": 2,
880973
"dataset": 2,
881974
"path": "ingested/product_name/granule_name_2/",
882975
"type": "syntool",
883-
"created": "2023-10-26T09:10:19Z"
976+
"created": "2023-10-26T09:10:19Z",
977+
"ttl": None,
884978
}
885979
]
886980
}
@@ -894,5 +988,6 @@ def test_retrieve_task(self):
894988
"dataset": 1,
895989
"path": "ingested/product_name/granule_name_1/",
896990
"type": "syntool",
897-
"created": "2023-10-25T15:38:47Z"
991+
"created": "2023-10-25T15:38:47Z",
992+
"ttl": None,
898993
})

0 commit comments

Comments
 (0)