33Basic support for hierarchies.
44"""
55
6+ from collections .abc import Hashable , Mapping
7+ from typing import (
8+ Any ,
9+ Callable ,
10+ Generic ,
11+ Iterable ,
12+ Iterator ,
13+ Optional ,
14+ TypeVar ,
15+ )
16+
617# Default modules need to import the PyDelphin version
718from delphin .__about__ import __version__ # noqa: F401
819from delphin .exceptions import PyDelphinException
@@ -12,12 +23,23 @@ class HierarchyError(PyDelphinException):
1223 """Raised for invalid operations on hierarchies."""
1324
1425
15- def _norm_id (id ):
26+ H = TypeVar ("H" , bound = Hashable )
27+ # generic types
28+ Identifiers = Iterable [H ]
29+ HierarchyMap = Mapping [H , Identifiers ]
30+ DataMap = Mapping [H , Any ]
31+ # explicit types
32+ HierarchyDict = dict [H , tuple [H , ...]]
33+ DataDict = dict [H , Any ]
34+ IdentifierNormalizer = Callable [[H ], H ]
35+
36+
37+ def _norm_id (id : H ) -> H :
1638 """Default id normalizer does nothing."""
1739 return id
1840
1941
20- class MultiHierarchy :
42+ class MultiHierarchy ( Generic [ H ]) :
2143 """
2244 A Multiply-inheriting Hierarchy.
2345
@@ -30,6 +52,10 @@ class MultiHierarchy:
3052 data. Data for identifiers may be get and set individually with
3153 dictionary key-access.
3254
55+ While MultiHierarchy can model non-string hierarchies, the data
56+ type of all node identifiers must be hashable and consistent
57+ within the hierarchy.
58+
3359 >>> h = MultiHierarchy('*top*', {'food': '*top*',
3460 ... 'utensil': '*top*'})
3561 >>> th.update({'fruit': 'food', 'apple': 'fruit'})
@@ -72,8 +98,19 @@ class MultiHierarchy:
7298 top: the hierarchy's top node identifier
7399 """
74100
75- def __init__ (self , top , hierarchy = None , data = None ,
76- normalize_identifier = None ):
101+ _top : H
102+ _hier : HierarchyDict
103+ _loer : dict [H , set [H ]]
104+ _data : DataDict
105+ _norm : IdentifierNormalizer
106+
107+ def __init__ (
108+ self ,
109+ top : H ,
110+ hierarchy : Optional [HierarchyMap ] = None ,
111+ data : Optional [DataMap ] = None ,
112+ normalize_identifier : Optional [IdentifierNormalizer ] = None ,
113+ ):
77114 if not normalize_identifier :
78115 self ._norm = _norm_id
79116 elif not callable (normalize_identifier ):
@@ -89,17 +126,19 @@ def __init__(self, top, hierarchy=None, data=None,
89126 self .update (hierarchy , data )
90127
91128 @property
92- def top (self ):
129+ def top (self ) -> H :
93130 return self ._top
94131
95- def __eq__ (self , other ) :
132+ def __eq__ (self , other : Any ) -> bool :
96133 if not isinstance (other , self .__class__ ):
97134 return NotImplemented
98- return (self ._top == other ._top
99- and self ._hier == other ._hier
100- and self ._data == other ._data )
135+ return (
136+ self ._top == other ._top
137+ and self ._hier == other ._hier
138+ and self ._data == other ._data
139+ )
101140
102- def __getitem__ (self , identifier ) :
141+ def __getitem__ (self , identifier : H ) -> Any :
103142 identifier = self ._norm (identifier )
104143 data = None
105144 try :
@@ -109,31 +148,37 @@ def __getitem__(self, identifier):
109148 raise
110149 return data
111150
112- def __setitem__ (self , identifier , data ) :
151+ def __setitem__ (self , identifier : H , data : Any ) -> None :
113152 identifier = self ._norm (identifier )
114153 if identifier not in self :
115154 raise HierarchyError (
116155 f'cannot set data; not in hierarchy: { identifier } ' )
117156 self ._data [identifier ] = data
118157
119- def __iter__ (self ):
120- return iter (identifier for identifier in self ._hier
121- if identifier != self ._top )
158+ def __iter__ (self ) -> Iterator [H ]:
159+ return iter (
160+ identifier for identifier in self ._hier
161+ if identifier != self ._top
162+ )
122163
123- def __contains__ (self , identifier ) :
164+ def __contains__ (self , identifier : H ) -> bool :
124165 return self ._norm (identifier ) in self ._hier
125166
126- def __len__ (self ):
167+ def __len__ (self ) -> int :
127168 return len (self ._hier ) - 1 # ignore top
128169
129- def items (self ):
170+ def items (self ) -> Iterable [ tuple [ H , Any ]] :
130171 """
131172 Return the (identifier, data) pairs excluding the top node.
132173 """
133174 value = self .__getitem__
134175 return [(identifier , value (identifier )) for identifier in self ]
135176
136- def update (self , subhierarchy = None , data = None ):
177+ def update (
178+ self ,
179+ subhierarchy : Optional [HierarchyMap ] = None ,
180+ data : Optional [DataMap ] = None ,
181+ ) -> None :
137182 """
138183 Incorporate *subhierarchy* and *data* into the hierarchy.
139184
@@ -166,7 +211,7 @@ def update(self, subhierarchy=None, data=None):
166211 loer = dict (self ._loer )
167212
168213 while subhierarchy :
169- eligible = _get_eligible (hier , subhierarchy )
214+ eligible : list [ H ] = _get_eligible (hier , subhierarchy )
170215
171216 for identifier in eligible :
172217 parents = subhierarchy .pop (identifier )
@@ -181,22 +226,22 @@ def update(self, subhierarchy=None, data=None):
181226 self ._loer = loer
182227 self ._data .update (data )
183228
184- def parents (self , identifier ) :
229+ def parents (self , identifier : H ) -> tuple [ H , ...] :
185230 """Return the immediate parents of *identifier*."""
186231 identifier = self ._norm (identifier )
187232 return self ._hier [identifier ]
188233
189- def children (self , identifier ) :
234+ def children (self , identifier : H ) -> set [ H ] :
190235 """Return the immediate children of *identifier*."""
191236 identifier = self ._norm (identifier )
192237 return self ._loer [identifier ]
193238
194- def ancestors (self , identifier ) :
239+ def ancestors (self , identifier : H ) -> set [ H ] :
195240 """Return the ancestors of *identifier*."""
196241 identifier = self ._norm (identifier )
197242 return _ancestors (identifier , self ._hier )
198243
199- def descendants (self , identifier ) :
244+ def descendants (self , identifier : H ) -> set [ H ] :
200245 """Return the descendants of *identifier*."""
201246 identifier = self ._norm (identifier )
202247 xs = set ()
@@ -205,7 +250,7 @@ def descendants(self, identifier):
205250 xs .update (self .descendants (child ))
206251 return xs
207252
208- def subsumes (self , a , b ) :
253+ def subsumes (self , a : H , b : H ) -> bool :
209254 """
210255 Return `True` if node *a* subsumes node *b*.
211256
@@ -234,7 +279,7 @@ def subsumes(self, a, b):
234279 a , b = norm (a ), norm (b )
235280 return a == b or b in self .descendants (a )
236281
237- def compatible (self , a , b ) :
282+ def compatible (self , a : H , b : H ) -> bool :
238283 """
239284 Return `True` if node *a* is compatible with node *b*.
240285
@@ -262,7 +307,11 @@ def compatible(self, a, b):
262307 b_lineage = self .descendants (b ).union ([b ])
263308 return len (a_lineage .intersection (b_lineage )) > 0
264309
265- def validate_update (self , subhierarchy , data ):
310+ def validate_update (
311+ self ,
312+ subhierarchy : Optional [HierarchyMap ],
313+ data : Optional [DataMap ],
314+ ) -> tuple [HierarchyDict , DataDict ]:
266315 """
267316 Check if the update can apply to the current hierarchy.
268317
@@ -277,58 +326,71 @@ def validate_update(self, subhierarchy, data):
277326 ids = set (self ._hier ).intersection (subhierarchy )
278327 if ids :
279328 raise HierarchyError (
280- 'already in hierarchy: {}' .format (', ' .join (ids )))
329+ 'already in hierarchy: {}' .format (', ' .join (map ( str , ids ) )))
281330
282331 ids = set (data ).difference (set (self ._hier ).union (subhierarchy ))
283332 if ids :
284333 raise HierarchyError (
285334 'cannot update data; not in hierarchy: {}'
286- .format (', ' .join (ids )))
335+ .format (', ' .join (map ( str , ids ) )))
287336 return subhierarchy , data
288337
289338
290- def _ancestors (id , hier ) :
339+ def _ancestors (id : H , hier : dict [ H , tuple [ H , ...]]) -> set [ H ] :
291340 xs = set ()
292341 for parent in hier [id ]:
293342 xs .add (parent )
294343 xs .update (_ancestors (parent , hier ))
295344 return xs
296345
297346
298- def _normalize_update (norm , subhierarchy , data ):
299- sub = {}
347+ def _normalize_update (
348+ norm : IdentifierNormalizer ,
349+ subhierarchy : Optional [HierarchyMap ],
350+ data : Optional [DataMap ],
351+ ) -> tuple [HierarchyDict , DataDict ]:
352+ sub : HierarchyDict = {}
353+ parents : Identifiers
300354 if subhierarchy :
301355 for id , parents in subhierarchy .items ():
302356 if isinstance (parents , str ):
303357 parents = parents .split ()
304358 id = norm (id )
305359 parents = tuple (map (norm , parents ))
306360 sub [id ] = parents
307- dat = {}
361+ dat : DataDict = {}
308362 if data :
309363 dat = {norm (id ): obj for id , obj in data .items ()}
310364 return sub , dat
311365
312366
313- def _get_eligible (hier , sub ):
367+ def _get_eligible (
368+ hier : HierarchyDict ,
369+ sub : HierarchyDict ,
370+ ) -> list [H ]:
314371 eligible = [id for id , parents in sub .items ()
315372 if all (parent in hier for parent in parents )]
316373 if not eligible :
317374 raise HierarchyError (
318375 'disconnected or cyclic hierarchy; remaining: {}'
319- .format (', ' .join (sub )))
376+ .format (', ' .join (map ( str , sub ) )))
320377 return eligible
321378
322379
323- def _validate_parentage (id , parents , hier ):
324- ancestors = set ()
380+ def _validate_parentage (
381+ id : H ,
382+ parents : tuple [H , ...],
383+ hier : HierarchyDict ,
384+ ) -> None :
385+ ancestors : set [H ] = set ()
325386 for parent in parents :
326387 ancestors .update (_ancestors (parent , hier ))
327- redundant = ancestors .intersection (parents )
388+ redundant = sorted ( map ( str , ancestors .intersection (parents )) )
328389 if redundant :
329390 raise HierarchyError (
330391 '{} has redundant parents: {}'
331- .format (id , ', ' .join (sorted (redundant ))))
392+ .format (id , ', ' .join (redundant ))
393+ )
332394
333395
334396# single-parented hierarchy might be something like this:
0 commit comments