Skip to content

Commit ad109d6

Browse files
authored
Merge pull request #144 from nansencenter/upgrade_earthdata_normalizer
Make Earthdata CMR normalizer more generic
2 parents aa68a1d + a469ab4 commit ad109d6

File tree

3 files changed

+54
-12
lines changed

3 files changed

+54
-12
lines changed

metanorm/normalizers/geospaas/earthdata_cmr.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,27 @@ def get_entry_title(self, raw_metadata):
2525

2626
@utils.raises((KeyError, IndexError))
2727
def get_entry_id(self, raw_metadata):
28-
return raw_metadata['umm']['DataGranule']['Identifiers'][0]['Identifier'].rstrip('.nc')
28+
try:
29+
return raw_metadata['umm']['DataGranule']['Identifiers'][0]['Identifier'].rstrip('.nc')
30+
except KeyError:
31+
return raw_metadata['umm']['GranuleUR']
2932

3033
@utils.raises((KeyError, IndexError))
3134
def get_summary(self, raw_metadata):
3235
summary_fields = {}
3336
description = ''
3437
umm = raw_metadata['umm']
3538

36-
for platform in umm['Platforms']:
37-
description += (
38-
f"Platform={platform['ShortName']}, " +
39-
', '.join(f"Instrument={i['ShortName']}" for i in platform['Instruments']))
39+
try:
40+
for platform in umm['Platforms']:
41+
description += (
42+
f"Platform={platform['ShortName']}, " +
43+
', '.join(f"Instrument={i['ShortName']}" for i in platform['Instruments']) +
44+
', ')
45+
except KeyError:
46+
pass
47+
description += f"Start date={umm['TemporalExtent']['RangeDateTime']['BeginningDateTime']}"
4048

41-
description += (
42-
f", Start date={umm['TemporalExtent']['RangeDateTime']['BeginningDateTime']}")
4349
summary_fields[utils.SUMMARY_FIELDS['description']] = description
4450

4551
processing_level_match = re.match(
@@ -66,7 +72,10 @@ def get_platform(self, raw_metadata):
6672
"""Only get the first platform from the raw metadata, because
6773
GeoSPaaS does not support more than one platform per dataset
6874
"""
69-
return utils.get_gcmd_platform(raw_metadata['umm']['Platforms'][0]['ShortName'])
75+
try:
76+
return utils.get_gcmd_platform(raw_metadata['umm']['Platforms'][0]['ShortName'])
77+
except KeyError:
78+
return utils.get_gcmd_platform(utils.UNKNOWN)
7079

7180
@utils.raises((KeyError, IndexError))
7281
def get_instrument(self, raw_metadata):

metanorm/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ def export_subclasses(package__all__, package_name, package_dir, base_class):
8080
# providers
8181
'ESA/EO': ('ESA',),
8282
'OB.DAAC': ('OB_DAAC',),
83+
'NASA/JPL/PODAAC': ('POCLOUD',),
8384
'C-SAR': ('SAR-C', 'SAR-C SAR'),
8485
'EUMETSAT/OSISAF': ('EUMETSAT OSI SAF',),
8586
'NSIDC': ('NSIDC_ECS',),

tests/normalizers/test_earthdata_cmr.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for the ACDD metadata normalizer"""
22
import unittest
33
import unittest.mock as mock
4+
from collections import OrderedDict
45
from datetime import datetime
56

67
from dateutil.tz import tzutc
@@ -38,6 +39,11 @@ def test_entry_id(self):
3839
}
3940
self.assertEqual(self.normalizer.get_entry_id(attributes), 'V2020245000600.L2_SNPP_OC')
4041

42+
def test_entry_id_from_granuleUR(self):
43+
"""Test getting the ID from the GranuleUR field"""
44+
attributes = {'umm': {'GranuleUR': 'foo'}}
45+
self.assertEqual(self.normalizer.get_entry_id(attributes), 'foo')
46+
4147
def test_entry_id_missing_attribute(self):
4248
"""A MetadataNormalizationError must be raised if the raw
4349
attribute is missing
@@ -109,6 +115,27 @@ def test_summary(self):
109115
'Description: Platform=SUOMI-NPP, ' +
110116
'Instrument=VIIRS, Start date=2020-09-01T00:06:00Z')
111117

118+
def test_summary_no_platform(self):
119+
"""Test getting a summary when no platform info is available
120+
"""
121+
attributes = {
122+
"umm": {
123+
"TemporalExtent": {
124+
"RangeDateTime": {
125+
"BeginningDateTime": "2020-09-01T00:06:00Z",
126+
"EndingDateTime": "2020-09-01T00:11:59Z"
127+
}
128+
},
129+
"CollectionReference": {
130+
"ShortName": "VIIRSN_L2_OC",
131+
"Version": "2018"
132+
}
133+
}
134+
}
135+
self.assertEqual(
136+
self.normalizer.get_summary(attributes),
137+
'Description: Start date=2020-09-01T00:06:00Z;Processing level: 2')
138+
112139
def test_summary_missing_attribute(self):
113140
"""A MetadataNormalizationError must be raised if the raw
114141
attribute is missing
@@ -189,10 +216,15 @@ def test_platform_missing_attribute(self):
189216
"""A MetadataNormalizationError must be raised if the raw
190217
attribute is missing
191218
"""
192-
with self.assertRaises(MetadataNormalizationError):
193-
self.normalizer.get_platform({})
194-
with self.assertRaises(MetadataNormalizationError):
195-
self.normalizer.get_platform({'umm': {'foo': 'bar'}})
219+
unknown_platform = OrderedDict([
220+
('Category', 'Unknown'),
221+
('Series_Entity', 'Unknown'),
222+
('Short_Name', 'Unknown'),
223+
('Long_Name', 'Unknown')
224+
])
225+
self.assertDictEqual(self.normalizer.get_platform({}), unknown_platform)
226+
self.assertDictEqual(self.normalizer.get_platform({'umm': {'foo': 'bar'}}),
227+
unknown_platform)
196228

197229
def test_instrument(self):
198230
"""Test getting the instrument"""

0 commit comments

Comments
 (0)