Skip to content

Commit d51f777

Browse files
authored
Merge pull request #77 from nansencenter/gcmd_api_changes
GCMD api changes
2 parents 8165571 + 3be4e5d commit d51f777

File tree

4 files changed

+33
-132
lines changed

4 files changed

+33
-132
lines changed

.github/workflows/ci.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ jobs:
1212
strategy:
1313
matrix:
1414
python_version:
15-
- '3.7'
1615
- '3.8'
1716
- '3.9'
1817
- '3.10'

pythesint/gcmd_vocabulary.py

Lines changed: 32 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from __future__ import absolute_import
22

3-
from collections import OrderedDict
3+
import csv
44
import requests
55
import warnings
6+
from collections import OrderedDict
67

78
from pythesint.json_vocabulary import JSONVocabulary
89

@@ -31,57 +32,34 @@ def _fetch_online_data(self, version=None):
3132
except requests.RequestException:
3233
print("Could not get the vocabulary file at '{}'".format(self.url))
3334
raise
34-
rlines = [line for line in r.text.splitlines()]
35-
gcmd_list = []
36-
37-
_read_revision(rlines[0], gcmd_list)
38-
39-
categories = _get_categories(rlines)
40-
self._check_categories(categories)
41-
42-
for line in rlines[2:]:
43-
_read_line(line, gcmd_list, categories)
44-
45-
return gcmd_list
46-
4735

48-
def _read_revision(line, gcmd_list):
49-
''' Reads the line, extracts the Revision into a new dictionary and appends
50-
it to gcmd_list
51-
'''
52-
# TODO: Cast exception if not found?
53-
if 'Keyword Version' and 'Revision' in line:
54-
meta = line.split('","')
55-
gcmd_list.append({
56-
'Revision': meta[1][10:],
57-
'Keyword Version': meta[0].split(': ')[1]
58-
})
59-
60-
61-
def _get_categories(lines):
62-
'''Get the categories from the lines read from the source'''
63-
return lines[1].split(',')[:-1]
64-
65-
66-
def _read_line(line, gcmd_list, categories):
67-
''' Converts line into dictionary values for elements in the categories
68-
appends the dictionary to gcmd_list
69-
'''
70-
gcmd_keywords = line.split('","')
71-
gcmd_keywords[0] = gcmd_keywords[0].strip('"')
72-
if gcmd_keywords[0] == 'NOT APPLICABLE':
73-
return
74-
# Remove last item (the ID is not needed)
75-
gcmd_keywords.pop(-1)
76-
# skip record if it is longer than the definition of categories
77-
if len(gcmd_keywords) > len(categories):
78-
return
79-
line_kw = OrderedDict()
80-
for i, key in enumerate(categories):
81-
if i < len(gcmd_keywords):
82-
# if the record is equal to definition of categories
83-
line_kw[key] = gcmd_keywords[i]
84-
else:
85-
# if the record is shorter than definition of categories: add empty string
86-
line_kw[key] = ""
87-
gcmd_list.append(line_kw)
36+
lines = r.text.splitlines()
37+
keywords = []
38+
# Add version+revision information
39+
self._read_revision(lines[0], keywords)
40+
# parse actual CSV contents
41+
reader = csv.DictReader(lines[1:], dialect='unix', restval='')
42+
self._check_categories(reader.fieldnames)
43+
keywords.extend(list(reader))
44+
# remove UUID and extra fields
45+
for kw in keywords[1:]:
46+
for key in ('UUID', None):
47+
try:
48+
del kw[key]
49+
except KeyError:
50+
pass
51+
52+
return keywords
53+
54+
@staticmethod
55+
def _read_revision(line, gcmd_list):
56+
''' Reads the line, extracts the Revision into a new dictionary and appends
57+
it to gcmd_list
58+
'''
59+
# TODO: Cast exception if not found?
60+
if 'Keyword Version' and 'Revision' in line:
61+
meta = line.split('","')
62+
gcmd_list.append({
63+
'Revision': meta[1][10:],
64+
'Keyword Version': meta[0].split(': ')[1]
65+
})

pythesint/pythesintrc.yaml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
- Subtype
2929
- Short_Name
3030
- Long_Name
31-
version: 13.6
3231

3332
- name: gcmd_science_keyword
3433
module: gcmd_vocabulary
@@ -43,7 +42,6 @@
4342
- Variable_Level_2
4443
- Variable_Level_3
4544
- Detailed_Variable
46-
version: 13.6
4745

4846
- name: gcmd_provider
4947
module: gcmd_vocabulary
@@ -58,7 +56,6 @@
5856
- Short_Name
5957
- Long_Name
6058
- Data_Center_URL
61-
version: 13.6
6259

6360
- name: gcmd_platform
6461
module: gcmd_vocabulary
@@ -71,7 +68,6 @@
7168
- Sub_Category
7269
- Short_Name
7370
- Long_Name
74-
version: 13.6
7571

7672
- name: gcmd_location
7773
module: gcmd_vocabulary
@@ -85,7 +81,6 @@
8581
- Location_Subregion2
8682
- Location_Subregion3
8783
- Location_Subregion4
88-
version: 13.6
8984

9085
- name: gcmd_horizontalresolutionrange
9186
module: gcmd_vocabulary
@@ -94,7 +89,6 @@
9489
url: https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/horizontalresolutionrange/?format=csv
9590
categories:
9691
- Horizontal_Resolution_Range
97-
version: 13.6
9892

9993
- name: gcmd_verticalresolutionrange
10094
module: gcmd_vocabulary
@@ -103,7 +97,6 @@
10397
url: https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/verticalresolutionrange/?format=csv
10498
categories:
10599
- Vertical_Resolution_Range
106-
version: 13.6
107100

108101
- name: gcmd_temporalresolutionrange
109102
module: gcmd_vocabulary
@@ -112,7 +105,6 @@
112105
url: https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/temporalresolutionrange/?format=csv
113106
categories:
114107
- Temporal_Resolution_Range
115-
version: 13.6
116108

117109
- name: gcmd_project
118110
module: gcmd_vocabulary
@@ -123,7 +115,6 @@
123115
- Bucket
124116
- Short_Name
125117
- Long_Name
126-
version: 13.6
127118

128119
- name: gcmd_rucontenttype
129120
module: gcmd_vocabulary
@@ -134,7 +125,6 @@
134125
- URLContentType
135126
- Type
136127
- Subtype
137-
version: 13.6
138128

139129
- name: cf_standard_name
140130
module: cf_vocabulary

pythesint/tests/test_gcmd_vocabulary.py

Lines changed: 1 addition & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -24,78 +24,12 @@ def test_read_revision(self):
2424
' representations can be found here: '
2525
'http://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme'
2626
'/instruments/?format=xml"')
27-
pti.gcmd_vocabulary._read_revision(line, gcmd_list)
27+
pti.gcmd_vocabulary.GCMDVocabulary._read_revision(line, gcmd_list)
2828
self.assertEqual(len(gcmd_list), 1)
2929
self.assertDictEqual(
3030
gcmd_list[0],
3131
{'Keyword Version': '8.1', 'Revision': '2016-01-08 13:40:40'})
3232

33-
def test_check_categories(self):
34-
lines = [
35-
'Revision',
36-
'Category,Class,Type,Subtype,Short_Name,Long_Name,UUID'
37-
]
38-
categories = ['Category', 'Class', 'Type', 'Subtype', 'Short_Name',
39-
'Long_Name']
40-
self.assertListEqual(pti.gcmd_vocabulary._get_categories(lines), categories)
41-
42-
def test_read_line_simple(self):
43-
gcmd_list = [{'Revision': '2016-01-08 13:40:40'}]
44-
line = ('"Earth Remote Sensing Instruments","","","","","",'
45-
'"6015ef7b-f3bd-49e1-9193-cc23db566b69"')
46-
categories = ['Category', 'Class', 'Type', 'Subtype', 'Short_Name',
47-
'Long_Name']
48-
pti.gcmd_vocabulary._read_line(line, gcmd_list, categories)
49-
self.assertEqual(len(gcmd_list), 2)
50-
self.assertEqual(gcmd_list[1], {'Category':
51-
'Earth Remote Sensing Instruments',
52-
'Class': '',
53-
'Type': '',
54-
'Subtype': '',
55-
'Short_Name': '',
56-
'Long_Name': ''})
57-
58-
def test_read_line_full(self):
59-
gcmd_list = [{'Revision': '2016-01-08 13:40:40'}]
60-
line = ('"Earth Remote Sensing Instruments","Active Remote Sensing",'
61-
'"Altimeters","Lidar/Laser Altimeters","GLAS","Geoscience '
62-
'Laser Altimeter System",'
63-
'"57463f12-2a21-49f9-9477-718030d34291"')
64-
categories = ['Category', 'Class', 'Type', 'Subtype', 'Short_Name',
65-
'Long_Name']
66-
pti.gcmd_vocabulary._read_line(line, gcmd_list, categories)
67-
self.assertEqual(len(gcmd_list), 2)
68-
self.assertEqual(gcmd_list[1], {'Category':
69-
'Earth Remote Sensing Instruments',
70-
'Class': 'Active Remote Sensing',
71-
'Type': 'Altimeters',
72-
'Subtype': 'Lidar/Laser Altimeters',
73-
'Short_Name': 'GLAS',
74-
'Long_Name':
75-
'Geoscience Laser Altimeter System'})
76-
77-
def test_read_line_not_applicable(self):
78-
gcmd_list = [{'Revision': '2016-01-08 13:40:40'}]
79-
line = ('"NOT APPLICABLE","","","","","",'
80-
'"8129a4b9-b5f9-4585-87e6-4576c3a53682"')
81-
categories = ['Category', 'Class', 'Type', 'Subtype', 'Short_Name',
82-
'Long_Name']
83-
pti.gcmd_vocabulary._read_line(line, gcmd_list, categories)
84-
self.assertEqual(len(gcmd_list), 1)
85-
self.assertEqual(gcmd_list[0], {'Revision': '2016-01-08 13:40:40'})
86-
87-
def test_read_line_wrong_length(self):
88-
gcmd_list = [{'Revision': '2016-01-08 13:40:40'}]
89-
line = ('"Earth Remote Sensing Instruments","Active Remote Sensing",'
90-
'"Altimeters","Lidar/Laser Altimeters","GLAS","Geoscience '
91-
'Laser Altimeter System","Sneaky Extra Element",'
92-
'"57463f12-2a21-49f9-9477-718030d34291"')
93-
categories = ['Category', 'Class', 'Type', 'Subtype', 'Short_Name',
94-
'Long_Name']
95-
pti.gcmd_vocabulary._read_line(line, gcmd_list, categories)
96-
self.assertEqual(len(gcmd_list), 1)
97-
self.assertEqual(gcmd_list[0], {'Revision': '2016-01-08 13:40:40'})
98-
9933
def test_get_location_by_type(self):
10034
type = 'africa'
10135
a = pti.get_gcmd_location(type)

0 commit comments

Comments
 (0)