19
19
20
20
from pydantic import BaseModel , ConfigDict , Field
21
21
import pandas as pd
22
+ from tenacity import Retrying , retry , stop_after_attempt , wait_exponential
22
23
from tqdm .auto import tqdm
23
24
from concurrent .futures import FIRST_COMPLETED , ThreadPoolExecutor , as_completed , wait
24
25
from langevals_core .azure_patch import patch_litellm
@@ -267,9 +268,10 @@ def set_model_envs(self):
267
268
def evaluate (self , entry : TEntry ) -> SingleEvaluationResult :
268
269
raise NotImplementedError ("This method should be implemented by subclasses." )
269
270
270
- def _evaluate_entry (self , entry ):
271
+ def _evaluate_entry (self , entry , retries = 0 ):
271
272
try :
272
- return self .evaluate (entry )
273
+ retryer = Retrying (stop = stop_after_attempt (retries ), reraise = True )
274
+ return retryer (self .evaluate , entry )
273
275
except Exception as exception :
274
276
return EvaluationResultError (
275
277
error_type = type (exception ).__name__ ,
@@ -284,14 +286,15 @@ def evaluate_batch(
284
286
data : List [TEntry ],
285
287
index = 0 ,
286
288
max_evaluations_in_parallel = 50 ,
289
+ retries = 3 ,
287
290
_executor_ref : Optional [Callable [[ThreadPoolExecutor ], None ]] = None ,
288
291
) -> BatchEvaluationResult :
289
292
results : list [SingleEvaluationResult ] = [
290
293
EvaluationResultSkipped (details = "not processed" )
291
294
] * len (data )
292
295
with ThreadPoolExecutor (max_workers = max_evaluations_in_parallel ) as executor :
293
296
future_to_index = {
294
- executor .submit (self ._evaluate_entry , entry ): idx
297
+ executor .submit (self ._evaluate_entry , entry , retries ): idx
295
298
for idx , entry in enumerate (data )
296
299
}
297
300
@@ -306,7 +309,9 @@ def evaluate_batch(
306
309
executor , "interrupted"
307
310
) and executor .__getattribute__ ("interrupted" ):
308
311
raise KeyboardInterrupt ()
309
- done , not_done = wait (not_done , timeout = 0.1 , return_when = FIRST_COMPLETED )
312
+ done , not_done = wait (
313
+ not_done , timeout = 0.1 , return_when = FIRST_COMPLETED
314
+ )
310
315
for future in done :
311
316
idx = future_to_index [future ]
312
317
results [idx ] = future .result ()
0 commit comments