|
5 | 5 | import re |
6 | 6 | import textwrap |
7 | 7 | import warnings |
| 8 | +from collections.abc import Mapping, Sequence |
8 | 9 | from pathlib import Path |
9 | | -from typing import Generator, Tuple, Union |
| 10 | +from typing import Generator, Optional, Tuple, Union |
10 | 11 |
|
11 | 12 | from delphin import util |
12 | 13 |
|
|
33 | 34 | _line_width = 79 # try not to go beyond this number of characters |
34 | 35 |
|
35 | 36 |
|
| 37 | +AttrSeq = Sequence[tuple[str, Union['Conjunction', 'Term']]] |
| 38 | +AttrMap = Mapping[str, Union['Conjunction', 'Term']] |
| 39 | + |
36 | 40 | # Exceptions |
37 | 41 |
|
38 | 42 | class TDLError(PyDelphinException): |
@@ -190,21 +194,78 @@ class AVM(FeatureStructure, Term): |
190 | 194 | docstring (str): documentation string |
191 | 195 | """ |
192 | 196 |
|
193 | | - def __init__(self, featvals=None, docstring=None): |
| 197 | + def __init__( |
| 198 | + self, |
| 199 | + featvals: Union[AttrSeq, AttrMap, None] = None, |
| 200 | + docstring=None, |
| 201 | + ) -> None: |
194 | 202 | # super() doesn't work because I need to split the parameters |
195 | | - FeatureStructure.__init__(self, featvals) |
| 203 | + FeatureStructure.__init__(self) |
196 | 204 | Term.__init__(self, docstring=docstring) |
| 205 | + if featvals is not None: |
| 206 | + self.aggregate(featvals) |
197 | 207 |
|
198 | 208 | @classmethod |
199 | 209 | def _default(cls): |
200 | | - return AVM() |
| 210 | + return _ImplicitAVM() |
201 | 211 |
|
202 | | - def __setitem__(self, key, val): |
| 212 | + def __setitem__(self, key: str, val: Union['Conjunction', Term]) -> None: |
203 | 213 | if not (val is None or isinstance(val, (Term, Conjunction))): |
204 | | - raise TypeError('invalid attribute value type: {}'.format( |
205 | | - type(val).__name__)) |
| 214 | + raise TypeError( |
| 215 | + 'invalid attribute value type: {}'.format(type(val).__name__) |
| 216 | + ) |
206 | 217 | super(AVM, self).__setitem__(key, val) |
207 | 218 |
|
| 219 | + def aggregate(self, featvals: Union[AttrSeq, AttrMap]) -> None: |
| 220 | + """Combine features in a single AVM. |
| 221 | +
|
| 222 | + This function takes feature paths and values and merges them |
| 223 | + into the AVM, but does not do full unification. For example: |
| 224 | +
|
| 225 | + >>> avm = tdl.AVM([("FEAT", tdl.TypeIdentifier("val1"))]) |
| 226 | + >>> avm.aggregate([ |
| 227 | + ... ("FEAT", tdl.TypeIdentifier("val2")), |
| 228 | + ... ("FEAT.SUB", tdl.TypeIdentifier("val3")), |
| 229 | + ... ]) |
| 230 | + >>> print(tdl.format(avm)) |
| 231 | + [ FEAT val1 & val2 & [ SUB val3 ] ] |
| 232 | +
|
| 233 | + The *featvals* argument may be an sequence of (feature, value) |
| 234 | + pairs or a mapping of features to values. |
| 235 | +
|
| 236 | + """ |
| 237 | + if hasattr(featvals, 'items'): |
| 238 | + featvals = list(featvals.items()) |
| 239 | + for feat, val in featvals: |
| 240 | + avm = self |
| 241 | + feat = feat.upper() |
| 242 | + while feat: |
| 243 | + subkey, _, rest = feat.partition(".") |
| 244 | + cur_val = avm.get(subkey) |
| 245 | + # new feature, just assign |
| 246 | + if subkey not in avm: |
| 247 | + avm[feat] = val |
| 248 | + break |
| 249 | + # last feature on path, conjoin |
| 250 | + elif not rest: |
| 251 | + avm[subkey] = cur_val & val |
| 252 | + # non-conjunction implicit AVM; follow the dots |
| 253 | + elif isinstance(cur_val, _ImplicitAVM): |
| 254 | + avm = cur_val |
| 255 | + # conjunction with implicit AVM; follow the AVM's dots |
| 256 | + elif ( |
| 257 | + isinstance(cur_val, Conjunction) |
| 258 | + and (avm_ := cur_val._last_avm()) |
| 259 | + and isinstance(avm_, _ImplicitAVM) |
| 260 | + ): |
| 261 | + avm = avm_ |
| 262 | + # some other term; create conjunction with implicit AVM |
| 263 | + else: |
| 264 | + avm_ = _ImplicitAVM() |
| 265 | + avm[subkey] = cur_val & avm_ |
| 266 | + avm = avm_ |
| 267 | + feat = rest |
| 268 | + |
208 | 269 | def normalize(self): |
209 | 270 | """ |
210 | 271 | Reduce trivial AVM conjunctions to just the AVM. |
@@ -255,7 +316,7 @@ def features(self, expand=False): |
255 | 316 |
|
256 | 317 |
|
257 | 318 | class _ImplicitAVM(AVM): |
258 | | - """AVM implicitly constructed by list syntax.""" |
| 319 | + """AVM implicitly constructed by dot-notation and list syntax.""" |
259 | 320 |
|
260 | 321 |
|
261 | 322 | class ConsList(AVM): |
@@ -514,13 +575,10 @@ def __getitem__(self, key): |
514 | 575 |
|
515 | 576 | def __setitem__(self, key, val): |
516 | 577 | """Set *key* to *val* in the last AVM in the conjunction""" |
517 | | - avm = None |
518 | | - for term in self._terms: |
519 | | - if isinstance(term, AVM): |
520 | | - avm = term |
521 | | - if avm is None: |
| 578 | + if avm := self._last_avm(): |
| 579 | + avm[key] = val |
| 580 | + else: |
522 | 581 | raise TDLError('no AVM in Conjunction') |
523 | | - avm[key] = val |
524 | 582 |
|
525 | 583 | def __delitem__(self, key): |
526 | 584 | """Delete *key* from all AVMs in the conjunction""" |
@@ -614,6 +672,12 @@ def string(self): |
614 | 672 | return str(term) |
615 | 673 | return None # conjunction does not have a string type (not an error) |
616 | 674 |
|
| 675 | + def _last_avm(self) -> Optional[AVM]: |
| 676 | + for term in reversed(self._terms): |
| 677 | + if isinstance(term, AVM): |
| 678 | + return term |
| 679 | + return None |
| 680 | + |
617 | 681 |
|
618 | 682 | class TypeDefinition: |
619 | 683 | """ |
@@ -1399,6 +1463,7 @@ def _format_term(term, indent): |
1399 | 1463 | Regex: _format_regex, |
1400 | 1464 | Coreference: _format_coref, |
1401 | 1465 | AVM: _format_avm, |
| 1466 | + _ImplicitAVM: _format_avm, |
1402 | 1467 | ConsList: _format_conslist, |
1403 | 1468 | DiffList: _format_difflist, |
1404 | 1469 | }.get(term.__class__, None) |
|
0 commit comments