11
11
import heapq
12
12
13
13
from ..types import SearchDetails , DataLayer
14
- from .query import QueryStruct , Token , TokenType , TokenRange , BreakType
14
+ from . import query as qmod
15
15
from .token_assignment import TokenAssignment
16
16
from . import db_search_fields as dbf
17
17
from . import db_searches as dbs
@@ -51,7 +51,7 @@ class SearchBuilder:
51
51
""" Build the abstract search queries from token assignments.
52
52
"""
53
53
54
- def __init__ (self , query : QueryStruct , details : SearchDetails ) -> None :
54
+ def __init__ (self , query : qmod . QueryStruct , details : SearchDetails ) -> None :
55
55
self .query = query
56
56
self .details = details
57
57
@@ -97,7 +97,7 @@ def build(self, assignment: TokenAssignment) -> Iterator[dbs.AbstractSearch]:
97
97
builder = self .build_poi_search (sdata )
98
98
elif assignment .housenumber :
99
99
hnr_tokens = self .query .get_tokens (assignment .housenumber ,
100
- TokenType . HOUSENUMBER )
100
+ qmod . TOKEN_HOUSENUMBER )
101
101
builder = self .build_housenumber_search (sdata , hnr_tokens , assignment .address )
102
102
else :
103
103
builder = self .build_special_search (sdata , assignment .address ,
@@ -128,7 +128,7 @@ def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch
128
128
yield dbs .PoiSearch (sdata )
129
129
130
130
def build_special_search (self , sdata : dbf .SearchData ,
131
- address : List [TokenRange ],
131
+ address : List [qmod . TokenRange ],
132
132
is_category : bool ) -> Iterator [dbs .AbstractSearch ]:
133
133
""" Build abstract search queries for searches that do not involve
134
134
a named place.
@@ -150,8 +150,8 @@ def build_special_search(self, sdata: dbf.SearchData,
150
150
lookups .Restrict )]
151
151
yield dbs .PostcodeSearch (penalty , sdata )
152
152
153
- def build_housenumber_search (self , sdata : dbf .SearchData , hnrs : List [Token ],
154
- address : List [TokenRange ]) -> Iterator [dbs .AbstractSearch ]:
153
+ def build_housenumber_search (self , sdata : dbf .SearchData , hnrs : List [qmod . Token ],
154
+ address : List [qmod . TokenRange ]) -> Iterator [dbs .AbstractSearch ]:
155
155
""" Build a simple address search for special entries where the
156
156
housenumber is the main name token.
157
157
"""
@@ -173,7 +173,7 @@ def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token],
173
173
list (partials ), lookups .LookupAll ))
174
174
else :
175
175
addr_fulls = [t .token for t
176
- in self .query .get_tokens (address [0 ], TokenType . WORD )]
176
+ in self .query .get_tokens (address [0 ], qmod . TOKEN_WORD )]
177
177
if len (addr_fulls ) > 5 :
178
178
return
179
179
sdata .lookups .append (
@@ -183,7 +183,7 @@ def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token],
183
183
yield dbs .PlaceSearch (0.05 , sdata , expected_count )
184
184
185
185
def build_name_search (self , sdata : dbf .SearchData ,
186
- name : TokenRange , address : List [TokenRange ],
186
+ name : qmod . TokenRange , address : List [qmod . TokenRange ],
187
187
is_category : bool ) -> Iterator [dbs .AbstractSearch ]:
188
188
""" Build abstract search queries for simple name or address searches.
189
189
"""
@@ -196,7 +196,7 @@ def build_name_search(self, sdata: dbf.SearchData,
196
196
sdata .lookups = lookup
197
197
yield dbs .PlaceSearch (penalty + name_penalty , sdata , count )
198
198
199
- def yield_lookups (self , name : TokenRange , address : List [TokenRange ]
199
+ def yield_lookups (self , name : qmod . TokenRange , address : List [qmod . TokenRange ]
200
200
) -> Iterator [Tuple [float , int , List [dbf .FieldLookup ]]]:
201
201
""" Yield all variants how the given name and address should best
202
202
be searched for. This takes into account how frequent the terms
@@ -216,7 +216,7 @@ def yield_lookups(self, name: TokenRange, address: List[TokenRange]
216
216
217
217
addr_count = min (t .addr_count for t in addr_partials ) if addr_partials else 30000
218
218
# Partial term to frequent. Try looking up by rare full names first.
219
- name_fulls = self .query .get_tokens (name , TokenType . WORD )
219
+ name_fulls = self .query .get_tokens (name , qmod . TOKEN_WORD )
220
220
if name_fulls :
221
221
fulls_count = sum (t .count for t in name_fulls )
222
222
@@ -235,7 +235,7 @@ def yield_lookups(self, name: TokenRange, address: List[TokenRange]
235
235
self .get_name_address_ranking (list (name_partials .keys ()), addr_partials )
236
236
237
237
def get_name_address_ranking (self , name_tokens : List [int ],
238
- addr_partials : List [Token ]) -> List [dbf .FieldLookup ]:
238
+ addr_partials : List [qmod . Token ]) -> List [dbf .FieldLookup ]:
239
239
""" Create a ranking expression looking up by name and address.
240
240
"""
241
241
lookup = [dbf .FieldLookup ('name_vector' , name_tokens , lookups .LookupAll )]
@@ -257,7 +257,7 @@ def get_name_address_ranking(self, name_tokens: List[int],
257
257
258
258
return lookup
259
259
260
- def get_full_name_ranking (self , name_fulls : List [Token ], addr_partials : List [Token ],
260
+ def get_full_name_ranking (self , name_fulls : List [qmod . Token ], addr_partials : List [qmod . Token ],
261
261
use_lookup : bool ) -> List [dbf .FieldLookup ]:
262
262
""" Create a ranking expression with full name terms and
263
263
additional address lookup. When 'use_lookup' is true, then
@@ -281,19 +281,19 @@ def get_full_name_ranking(self, name_fulls: List[Token], addr_partials: List[Tok
281
281
return dbf .lookup_by_any_name ([t .token for t in name_fulls ],
282
282
addr_restrict_tokens , addr_lookup_tokens )
283
283
284
- def get_name_ranking (self , trange : TokenRange ,
284
+ def get_name_ranking (self , trange : qmod . TokenRange ,
285
285
db_field : str = 'name_vector' ) -> dbf .FieldRanking :
286
286
""" Create a ranking expression for a name term in the given range.
287
287
"""
288
- name_fulls = self .query .get_tokens (trange , TokenType . WORD )
288
+ name_fulls = self .query .get_tokens (trange , qmod . TOKEN_WORD )
289
289
ranks = [dbf .RankedTokens (t .penalty , [t .token ]) for t in name_fulls ]
290
290
ranks .sort (key = lambda r : r .penalty )
291
291
# Fallback, sum of penalty for partials
292
292
name_partials = self .query .get_partials_list (trange )
293
293
default = sum (t .penalty for t in name_partials ) + 0.2
294
294
return dbf .FieldRanking (db_field , default , ranks )
295
295
296
- def get_addr_ranking (self , trange : TokenRange ) -> dbf .FieldRanking :
296
+ def get_addr_ranking (self , trange : qmod . TokenRange ) -> dbf .FieldRanking :
297
297
""" Create a list of ranking expressions for an address term
298
298
for the given ranges.
299
299
"""
@@ -304,10 +304,10 @@ def get_addr_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
304
304
while todo :
305
305
neglen , pos , rank = heapq .heappop (todo )
306
306
for tlist in self .query .nodes [pos ].starting :
307
- if tlist .ttype in (TokenType . PARTIAL , TokenType . WORD ):
307
+ if tlist .ttype in (qmod . TOKEN_PARTIAL , qmod . TOKEN_WORD ):
308
308
if tlist .end < trange .end :
309
309
chgpenalty = PENALTY_WORDCHANGE [self .query .nodes [tlist .end ].btype ]
310
- if tlist .ttype == TokenType . PARTIAL :
310
+ if tlist .ttype == qmod . TOKEN_PARTIAL :
311
311
penalty = rank .penalty + chgpenalty \
312
312
+ max (t .penalty for t in tlist .tokens )
313
313
heapq .heappush (todo , (neglen - 1 , tlist .end ,
@@ -317,7 +317,7 @@ def get_addr_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
317
317
heapq .heappush (todo , (neglen - 1 , tlist .end ,
318
318
rank .with_token (t , chgpenalty )))
319
319
elif tlist .end == trange .end :
320
- if tlist .ttype == TokenType . PARTIAL :
320
+ if tlist .ttype == qmod . TOKEN_PARTIAL :
321
321
ranks .append (dbf .RankedTokens (rank .penalty
322
322
+ max (t .penalty for t in tlist .tokens ),
323
323
rank .tokens ))
@@ -357,11 +357,11 @@ def get_search_data(self, assignment: TokenAssignment) -> Optional[dbf.SearchDat
357
357
if assignment .housenumber :
358
358
sdata .set_strings ('housenumbers' ,
359
359
self .query .get_tokens (assignment .housenumber ,
360
- TokenType . HOUSENUMBER ))
360
+ qmod . TOKEN_HOUSENUMBER ))
361
361
if assignment .postcode :
362
362
sdata .set_strings ('postcodes' ,
363
363
self .query .get_tokens (assignment .postcode ,
364
- TokenType . POSTCODE ))
364
+ qmod . TOKEN_POSTCODE ))
365
365
if assignment .qualifier :
366
366
tokens = self .get_qualifier_tokens (assignment .qualifier )
367
367
if not tokens :
@@ -386,23 +386,23 @@ def get_search_data(self, assignment: TokenAssignment) -> Optional[dbf.SearchDat
386
386
387
387
return sdata
388
388
389
- def get_country_tokens (self , trange : TokenRange ) -> List [Token ]:
389
+ def get_country_tokens (self , trange : qmod . TokenRange ) -> List [qmod . Token ]:
390
390
""" Return the list of country tokens for the given range,
391
391
optionally filtered by the country list from the details
392
392
parameters.
393
393
"""
394
- tokens = self .query .get_tokens (trange , TokenType . COUNTRY )
394
+ tokens = self .query .get_tokens (trange , qmod . TOKEN_COUNTRY )
395
395
if self .details .countries :
396
396
tokens = [t for t in tokens if t .lookup_word in self .details .countries ]
397
397
398
398
return tokens
399
399
400
- def get_qualifier_tokens (self , trange : TokenRange ) -> List [Token ]:
400
+ def get_qualifier_tokens (self , trange : qmod . TokenRange ) -> List [qmod . Token ]:
401
401
""" Return the list of qualifier tokens for the given range,
402
402
optionally filtered by the qualifier list from the details
403
403
parameters.
404
404
"""
405
- tokens = self .query .get_tokens (trange , TokenType . QUALIFIER )
405
+ tokens = self .query .get_tokens (trange , qmod . TOKEN_QUALIFIER )
406
406
if self .details .categories :
407
407
tokens = [t for t in tokens if t .get_category () in self .details .categories ]
408
408
@@ -415,7 +415,7 @@ def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCa
415
415
"""
416
416
if assignment .near_item :
417
417
tokens : Dict [Tuple [str , str ], float ] = {}
418
- for t in self .query .get_tokens (assignment .near_item , TokenType . NEAR_ITEM ):
418
+ for t in self .query .get_tokens (assignment .near_item , qmod . TOKEN_NEAR_ITEM ):
419
419
cat = t .get_category ()
420
420
# The category of a near search will be that of near_item.
421
421
# Thus, if search is restricted to a category parameter,
@@ -429,11 +429,11 @@ def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCa
429
429
430
430
431
431
PENALTY_WORDCHANGE = {
432
- BreakType . START : 0.0 ,
433
- BreakType . END : 0.0 ,
434
- BreakType . PHRASE : 0.0 ,
435
- BreakType . SOFT_PHRASE : 0.0 ,
436
- BreakType . WORD : 0.1 ,
437
- BreakType . PART : 0.2 ,
438
- BreakType . TOKEN : 0.4
432
+ qmod . BREAK_START : 0.0 ,
433
+ qmod . BREAK_END : 0.0 ,
434
+ qmod . BREAK_PHRASE : 0.0 ,
435
+ qmod . BREAK_SOFT_PHRASE : 0.0 ,
436
+ qmod . BREAK_WORD : 0.1 ,
437
+ qmod . BREAK_PART : 0.2 ,
438
+ qmod . BREAK_TOKEN : 0.4
439
439
}
0 commit comments