2
2
#
3
3
# This file is part of Nominatim. (https://nominatim.org)
4
4
#
5
- # Copyright (C) 2024 by the Nominatim developer community.
5
+ # Copyright (C) 2025 by the Nominatim developer community.
6
6
# For a full list of authors see the git log.
7
7
"""
8
8
Generic processor for names that creates abbreviation variants.
9
9
"""
10
10
from typing import Mapping , Dict , Any , Iterable , Iterator , Optional , List , cast
11
11
import itertools
12
12
13
- import datrie
14
-
15
13
from ...errors import UsageError
16
14
from ...data .place_name import PlaceName
17
15
from .config_variants import get_variant_config
18
16
from .generic_mutation import MutationVariantGenerator
17
+ from .simple_trie import SimpleTrie
19
18
20
19
# Configuration section
21
20
@@ -25,8 +24,7 @@ def configure(rules: Mapping[str, Any], normalizer: Any, _: Any) -> Dict[str, An
25
24
"""
26
25
config : Dict [str , Any ] = {}
27
26
28
- config ['replacements' ], config ['chars' ] = get_variant_config (rules .get ('variants' ),
29
- normalizer )
27
+ config ['replacements' ], _ = get_variant_config (rules .get ('variants' ), normalizer )
30
28
config ['variant_only' ] = rules .get ('mode' , '' ) == 'variant-only'
31
29
32
30
# parse mutation rules
@@ -68,12 +66,8 @@ def __init__(self, norm: Any, to_ascii: Any, config: Mapping[str, Any]) -> None:
68
66
self .variant_only = config ['variant_only' ]
69
67
70
68
# Set up datrie
71
- if config ['replacements' ]:
72
- self .replacements = datrie .Trie (config ['chars' ])
73
- for src , repllist in config ['replacements' ]:
74
- self .replacements [src ] = repllist
75
- else :
76
- self .replacements = None
69
+ self .replacements : Optional [SimpleTrie [List [str ]]] = \
70
+ SimpleTrie (config ['replacements' ]) if config ['replacements' ] else None
77
71
78
72
# set up mutation rules
79
73
self .mutations = [MutationVariantGenerator (* cfg ) for cfg in config ['mutations' ]]
@@ -116,10 +110,10 @@ def _generate_word_variants(self, norm_name: str) -> Iterable[str]:
116
110
pos = 0
117
111
force_space = False
118
112
while pos < baselen :
119
- full , repl = self . replacements . longest_prefix_item ( baseform [ pos :],
120
- ( None , None ) )
121
- if full is not None :
122
- done = baseform [startpos :pos ]
113
+ frm = pos
114
+ repl , pos = self . replacements . longest_prefix ( baseform , pos )
115
+ if repl is not None :
116
+ done = baseform [startpos :frm ]
123
117
partials = [v + done + r
124
118
for v , r in itertools .product (partials , repl )
125
119
if not force_space or r .startswith (' ' )]
@@ -128,11 +122,10 @@ def _generate_word_variants(self, norm_name: str) -> Iterable[str]:
128
122
# to be helpful. Only use the original term.
129
123
startpos = 0
130
124
break
131
- startpos = pos + len (full )
132
- if full [- 1 ] == ' ' :
133
- startpos -= 1
125
+ if baseform [pos - 1 ] == ' ' :
126
+ pos -= 1
134
127
force_space = True
135
- pos = startpos
128
+ startpos = pos
136
129
else :
137
130
pos += 1
138
131
force_space = False
0 commit comments