11package io .quarkiverse .langchain4j .testing .scorer ;
22
33import java .io .Closeable ;
4+ import java .util .Comparator ;
45import java .util .List ;
56import java .util .concurrent .CopyOnWriteArrayList ;
67import java .util .concurrent .CountDownLatch ;
@@ -28,50 +29,64 @@ public Scorer() {
2829 }
2930
3031 @ SuppressWarnings ({ "unchecked" })
31- public <T > EvaluationReport evaluate ( Samples <T > samples , Function < Parameters , T > function ,
32- EvaluationStrategy <T >... strategies ) {
33- List <EvaluationResult <? >> evaluations = new CopyOnWriteArrayList <>();
32+ public <T > EvaluationReport <T > evaluate (
33+ Samples < T > samples , Function < Parameters , T > function , EvaluationStrategy <T >... strategies ) {
34+ List <OrderedEvaluationResult < T >> evaluations = new CopyOnWriteArrayList <>();
3435 CountDownLatch latch = new CountDownLatch (samples .size ());
36+ var index = 0 ;
3537 for (EvaluationSample <T > sample : samples ) {
3638 // TODO Should we handle the context somehow.
37- executor .submit (() -> {
38- try {
39- var response = execute (sample , function );
40- LOG .infof ("Evaluating sample `%s`" , sample .name ());
41- for (EvaluationStrategy <T > strategy : strategies ) {
42- EvaluationResult <T > evaluation = EvaluationResult .fromCompletedEvaluation (sample ,
43- response , strategy .evaluate (sample , response ));
44- LOG .infof ("Evaluation of sample `%s` with strategy `%s`: %s" , sample .name (),
45- strategy .getClass ().getSimpleName (),
46- evaluation .passed () ? "OK" : "KO" );
47- evaluations .add (evaluation );
48- }
49- } catch (Throwable e ) {
50- LOG .errorf (e , "Failed to evaluate sample `%s`" , sample .name ());
51- evaluations .add (EvaluationResult .fromEvaluationThrowable (sample , e ));
52- } finally {
53- latch .countDown ();
54- }
55- });
39+ var currentIndex = index ++;
40+ executor .submit (
41+ () -> {
42+ try {
43+ var response = execute (sample , function );
44+ LOG .infof ("Evaluating sample `%s`" , sample .name ());
45+ for (EvaluationStrategy <T > strategy : strategies ) {
46+ EvaluationResult <T > evaluation = EvaluationResult .fromCompletedEvaluation (
47+ sample , response , strategy .evaluate (sample , response ));
48+ LOG .infof (
49+ "Evaluation of sample `%s` with strategy `%s`: %s" ,
50+ sample .name (),
51+ strategy .getClass ().getSimpleName (),
52+ evaluation .passed () ? "OK" : "KO" );
53+ evaluations .add (new OrderedEvaluationResult (currentIndex , evaluation ));
54+ }
55+ } catch (Throwable e ) {
56+ LOG .errorf (e , "Failed to evaluate sample `%s`" , sample .name ());
57+ evaluations .add (
58+ new OrderedEvaluationResult (
59+ currentIndex , EvaluationResult .fromEvaluationThrowable (sample , e )));
60+ } finally {
61+ latch .countDown ();
62+ }
63+ });
5664 }
5765 try {
5866 latch .await ();
5967 } catch (InterruptedException e ) {
6068 Thread .currentThread ().interrupt ();
6169 }
62- return new EvaluationReport (evaluations );
70+ var orderedEvalutions = evaluations .stream ()
71+ .sorted (Comparator .comparing (OrderedEvaluationResult ::index ))
72+ .map (OrderedEvaluationResult ::evaluation )
73+ .toList ();
74+ return new EvaluationReport <>(orderedEvalutions );
6375 }
6476
6577 public void close () {
6678 executor .shutdown ();
6779 }
6880
69- public record EvaluationResult <T >(EvaluationSample <T > sample , T result , Throwable thrown , boolean passed ) {
70- public static <T > EvaluationResult <T > fromCompletedEvaluation (EvaluationSample <T > sample , T result , boolean passed ) {
81+ public record EvaluationResult <T >(
82+ EvaluationSample <T > sample , T result , Throwable thrown , boolean passed ) {
83+ public static <T > EvaluationResult <T > fromCompletedEvaluation (
84+ EvaluationSample <T > sample , T result , boolean passed ) {
7185 return new EvaluationResult <>(sample , result , null , passed );
7286 }
7387
74- public static <T > EvaluationResult <T > fromEvaluationThrowable (EvaluationSample <T > sample , Throwable thrown ) {
88+ public static <T > EvaluationResult <T > fromEvaluationThrowable (
89+ EvaluationSample <T > sample , Throwable thrown ) {
7590 return new EvaluationResult <>(sample , null , thrown , false );
7691 }
7792 }
@@ -84,4 +99,6 @@ private <T> T execute(EvaluationSample<T> sample, Function<Parameters, T> functi
8499 }
85100 }
86101
102+ private record OrderedEvaluationResult <T >(int index , EvaluationResult <T > evaluation ) {
103+ }
87104}
0 commit comments