10
10
from .errors import (BuildError , OrphanContainerBuildError , ReferenceTargetNotBuiltError , ContainerConfigurationError ,
11
11
ConstructError )
12
12
from .manager import Proxy , BuildManager
13
- from .warnings import MissingRequiredBuildWarning , DtypeConversionWarning , IncorrectQuantityBuildWarning
13
+ from .warnings import (MissingRequiredBuildWarning , DtypeConversionWarning , IncorrectQuantityBuildWarning ,
14
+ IncorrectDatasetShapeBuildWarning )
14
15
from ..container import AbstractContainer , Data , DataRegion
15
16
from ..term_set import TermSetWrapper
16
17
from ..data_utils import DataIO , AbstractDataChunkIterator
17
18
from ..query import ReferenceResolver
18
19
from ..spec import Spec , AttributeSpec , DatasetSpec , GroupSpec , LinkSpec , RefSpec
19
20
from ..spec .spec import BaseStorageSpec
20
- from ..utils import docval , getargs , ExtenderMeta , get_docval
21
+ from ..utils import docval , getargs , ExtenderMeta , get_docval , get_data_shape
21
22
22
23
_const_arg = '__constructor_arg'
23
24
@@ -721,19 +722,34 @@ def build(self, **kwargs):
721
722
if not isinstance (container , Data ):
722
723
msg = "'container' must be of type Data with DatasetSpec"
723
724
raise ValueError (msg )
724
- spec_dtype , spec_shape , spec = self .__check_dset_spec (self .spec , spec_ext )
725
+ spec_dtype , spec_shape , spec_dims , spec = self .__check_dset_spec (self .spec , spec_ext )
726
+ dimension_labels = self .__get_dimension_labels_from_spec (container .data , spec_shape , spec_dims )
725
727
if isinstance (spec_dtype , RefSpec ):
726
728
self .logger .debug ("Building %s '%s' as a dataset of references (source: %s)"
727
729
% (container .__class__ .__name__ , container .name , repr (source )))
728
730
# create dataset builder with data=None as a placeholder. fill in with refs later
729
- builder = DatasetBuilder (name , data = None , parent = parent , source = source , dtype = spec_dtype .reftype )
731
+ builder = DatasetBuilder (
732
+ name ,
733
+ data = None ,
734
+ parent = parent ,
735
+ source = source ,
736
+ dtype = spec_dtype .reftype ,
737
+ dimension_labels = dimension_labels ,
738
+ )
730
739
manager .queue_ref (self .__set_dataset_to_refs (builder , spec_dtype , spec_shape , container , manager ))
731
740
elif isinstance (spec_dtype , list ):
732
741
# a compound dataset
733
742
self .logger .debug ("Building %s '%s' as a dataset of compound dtypes (source: %s)"
734
743
% (container .__class__ .__name__ , container .name , repr (source )))
735
744
# create dataset builder with data=None, dtype=None as a placeholder. fill in with refs later
736
- builder = DatasetBuilder (name , data = None , parent = parent , source = source , dtype = spec_dtype )
745
+ builder = DatasetBuilder (
746
+ name ,
747
+ data = None ,
748
+ parent = parent ,
749
+ source = source ,
750
+ dtype = spec_dtype ,
751
+ dimension_labels = dimension_labels ,
752
+ )
737
753
manager .queue_ref (self .__set_compound_dataset_to_refs (builder , spec , spec_dtype , container ,
738
754
manager ))
739
755
else :
@@ -744,7 +760,14 @@ def build(self, **kwargs):
744
760
% (container .__class__ .__name__ , container .name , repr (source )))
745
761
# an unspecified dtype and we were given references
746
762
# create dataset builder with data=None as a placeholder. fill in with refs later
747
- builder = DatasetBuilder (name , data = None , parent = parent , source = source , dtype = 'object' )
763
+ builder = DatasetBuilder (
764
+ name ,
765
+ data = None ,
766
+ parent = parent ,
767
+ source = source ,
768
+ dtype = "object" ,
769
+ dimension_labels = dimension_labels ,
770
+ )
748
771
manager .queue_ref (self .__set_untyped_dataset_to_refs (builder , container , manager ))
749
772
else :
750
773
# a dataset that has no references, pass the conversion off to the convert_dtype method
@@ -760,7 +783,14 @@ def build(self, **kwargs):
760
783
except Exception as ex :
761
784
msg = 'could not resolve dtype for %s \' %s\' ' % (type (container ).__name__ , container .name )
762
785
raise Exception (msg ) from ex
763
- builder = DatasetBuilder (name , bldr_data , parent = parent , source = source , dtype = dtype )
786
+ builder = DatasetBuilder (
787
+ name ,
788
+ data = bldr_data ,
789
+ parent = parent ,
790
+ source = source ,
791
+ dtype = dtype ,
792
+ dimension_labels = dimension_labels ,
793
+ )
764
794
765
795
# Add attributes from the specification extension to the list of attributes
766
796
all_attrs = self .__spec .attributes + getattr (spec_ext , 'attributes' , tuple ())
@@ -779,14 +809,67 @@ def __check_dset_spec(self, orig, ext):
779
809
"""
780
810
dtype = orig .dtype
781
811
shape = orig .shape
812
+ dims = orig .dims
782
813
spec = orig
783
814
if ext is not None :
784
815
if ext .dtype is not None :
785
816
dtype = ext .dtype
786
817
if ext .shape is not None :
787
818
shape = ext .shape
819
+ dims = ext .dims
788
820
spec = ext
789
- return dtype , shape , spec
821
+ return dtype , shape , dims , spec
822
+
823
+ def __get_dimension_labels_from_spec (self , data , spec_shape , spec_dims ) -> tuple :
824
+ if spec_shape is None or spec_dims is None :
825
+ return None
826
+ data_shape = get_data_shape (data )
827
+ # if shape is a list of allowed shapes, find the index of the shape that matches the data
828
+ if isinstance (spec_shape [0 ], list ):
829
+ match_shape_inds = list ()
830
+ for i , s in enumerate (spec_shape ):
831
+ # skip this shape if it has a different number of dimensions from the data
832
+ if len (s ) != len (data_shape ):
833
+ continue
834
+ # check each dimension. None means any length is allowed
835
+ match = True
836
+ for j , d in enumerate (data_shape ):
837
+ if s [j ] is not None and s [j ] != d :
838
+ match = False
839
+ break
840
+ if match :
841
+ match_shape_inds .append (i )
842
+ # use the most specific match -- the one with the fewest Nones
843
+ if match_shape_inds :
844
+ if len (match_shape_inds ) == 1 :
845
+ return tuple (spec_dims [match_shape_inds [0 ]])
846
+ else :
847
+ count_nones = [len ([x for x in spec_shape [k ] if x is None ]) for k in match_shape_inds ]
848
+ index_min_count = count_nones .index (min (count_nones ))
849
+ best_match_ind = match_shape_inds [index_min_count ]
850
+ return tuple (spec_dims [best_match_ind ])
851
+ else :
852
+ # no matches found
853
+ msg = "Shape of data does not match any allowed shapes in spec '%s'" % self .spec .path
854
+ warnings .warn (msg , IncorrectDatasetShapeBuildWarning )
855
+ return None
856
+ else :
857
+ if len (data_shape ) != len (spec_shape ):
858
+ msg = "Shape of data does not match shape in spec '%s'" % self .spec .path
859
+ warnings .warn (msg , IncorrectDatasetShapeBuildWarning )
860
+ return None
861
+ # check each dimension. None means any length is allowed
862
+ match = True
863
+ for j , d in enumerate (data_shape ):
864
+ if spec_shape [j ] is not None and spec_shape [j ] != d :
865
+ match = False
866
+ break
867
+ if not match :
868
+ msg = "Shape of data does not match shape in spec '%s'" % self .spec .path
869
+ warnings .warn (msg , IncorrectDatasetShapeBuildWarning )
870
+ return None
871
+ # shape is a single list of allowed dimension lengths
872
+ return tuple (spec_dims )
790
873
791
874
def __is_reftype (self , data ):
792
875
if (isinstance (data , AbstractDataChunkIterator ) or
0 commit comments