Skip to content

Commit 3cf875e

Browse files
authored
Merge pull request #69 from simleo/more_file_metadata
More file metadata
2 parents c27ed11 + c9a15fc commit 3cf875e

File tree

2 files changed

+20
-4
lines changed

2 files changed

+20
-4
lines changed

src/runcrate/convert.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,8 @@ def add_action_params(self, crate, activity, to_wf_p, ptype="usage"):
566566
))
567567
if len(action_p["exampleOfWork"]) == 1:
568568
action_p["exampleOfWork"] = action_p["exampleOfWork"][0]
569+
if ptype == "generation":
570+
action_p["dateCreated"] = rel.time.isoformat()
569571
action_params.append(action_p)
570572
return action_params
571573

@@ -606,6 +608,7 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
606608
source = self.manifest[hash_]
607609
action_p = crate.add_file(source, dest, properties={
608610
"sha1": hash_,
611+
"contentSize": str(Path(source).stat().st_size)
609612
})
610613
self._set_alternate_name(prov_param, action_p, parent=parent)
611614
try:
@@ -739,9 +742,9 @@ def patch_workflow_input_collection(self, crate, wf=None):
739742
if "ComputationalWorkflow" in as_list(tool.type):
740743
self.patch_workflow_input_collection(crate, wf=tool)
741744

742-
def _map_input_data(self, data):
745+
def _map_input_data(self, crate, data):
743746
if isinstance(data, list):
744-
return [self._map_input_data(_) for _ in data]
747+
return [self._map_input_data(crate, _) for _ in data]
745748
if isinstance(data, dict):
746749
rval = {}
747750
for k, v in data.items():
@@ -753,8 +756,13 @@ def _map_input_data(self, data):
753756
source_k = str(source)
754757
dest = self.file_map.get(source_k)
755758
rval[k] = str(dest) if dest else v
759+
fmt = data.get("format")
760+
if fmt:
761+
entity = crate.get(str(dest))
762+
if entity:
763+
entity["encodingFormat"] = fmt
756764
else:
757-
rval[k] = self._map_input_data(v)
765+
rval[k] = self._map_input_data(crate, v)
758766
return rval
759767
return data
760768

@@ -763,7 +771,7 @@ def add_inputs_file(self, crate):
763771
if path.is_file():
764772
with open(path) as f:
765773
data = json.load(f)
766-
data = self._map_input_data(data)
774+
data = self._map_input_data(crate, data)
767775
source = StringIO(json.dumps(data, indent=4))
768776
crate.add_file(source, path.name, properties={
769777
"name": "input object document",

tests/test_cwlprov_crate_builder.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,14 @@ def test_revsort(data_dir, tmpdir):
9595
assert "File" in entity.type
9696
assert entity["alternateName"] == "whale.txt"
9797
assert entity["sha1"] == entity.id.rsplit("/")[-1]
98+
assert entity["contentSize"] == "1111"
99+
assert "encodingFormat" in entity
98100
wf_input_file = entity
99101
wf_output_file = wf_results[0]
100102
assert wf_output_file["alternateName"] == "output.txt"
101103
assert wf_output_file["sha1"] == wf_output_file.id.rsplit("/")[-1]
104+
assert wf_output_file["dateCreated"] == "2018-10-25T15:46:38.058365"
105+
assert wf_output_file["contentSize"] == "1111"
102106
assert "File" in wf_output_file.type
103107
steps = workflow["step"]
104108
assert len(steps) == 2
@@ -118,6 +122,8 @@ def test_revsort(data_dir, tmpdir):
118122
assert rev_input_file is wf_input_file
119123
rev_output_file = results[0]
120124
assert "File" in rev_output_file.type
125+
assert rev_output_file["dateCreated"] == "2018-10-25T15:46:36.963254"
126+
assert rev_output_file["contentSize"] == "1111"
121127
assert step["position"] == "0"
122128
assert set(_connected(step)) == set([
123129
("packed.cwl#main/input", "packed.cwl#revtool.cwl/input"),
@@ -357,6 +363,7 @@ def test_dir_io(data_dir, tmpdir):
357363
assert "Dataset" in entity.type
358364
wf_input_dir = entity
359365
wf_output_dir = wf_results[0]
366+
assert wf_output_dir["dateCreated"] == "2023-02-17T16:20:30.288242"
360367
assert wf_input_dir.type == wf_output_dir.type == "Dataset"
361368
assert wf_input_dir["alternateName"] == "grepucase_in"
362369
assert len(wf_input_dir["hasPart"]) == 2
@@ -395,6 +402,7 @@ def test_dir_io(data_dir, tmpdir):
395402
assert greptool_input_dir is wf_input_dir
396403
greptool_output_dir = greptool_results[0]
397404
assert "Dataset" in greptool_output_dir.type
405+
assert greptool_output_dir["dateCreated"] == "2023-02-17T16:20:30.262141"
398406
ucasetool_action = action_map["packed.cwl#ucasetool.cwl"]
399407
ucasetool_objects = ucasetool_action["object"]
400408
ucasetool_results = ucasetool_action["result"]

0 commit comments

Comments
 (0)