quarkiverse
diff --git a/‎core/deployment/src/main/java/io/quarkiverse/langchain4j/deployment/AiServicesProcessor.java‎
Lines changed: 5 additions & 4 deletions b/‎core/deployment/src/main/java/io/quarkiverse/langchain4j/deployment/AiServicesProcessor.java‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎core/deployment/src/main/java/io/quarkiverse/langchain4j/deployment/DotNames.java‎
Lines changed: 2 additions & 0 deletions b/‎core/deployment/src/main/java/io/quarkiverse/langchain4j/deployment/DotNames.java‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/runtime/src/main/java/io/quarkiverse/langchain4j/runtime/aiservice/AiServiceMethodImplementationSupport.java‎
Lines changed: 47 additions & 14 deletions b/‎core/runtime/src/main/java/io/quarkiverse/langchain4j/runtime/aiservice/AiServiceMethodImplementationSupport.java‎
Lines changed: 47 additions & 14 deletions
@@ -693,14 +693,15 @@ public void handleDeclarativeServices(AiServicesRecorder recorder,
                 if (!DotNames.MULTI.equals(method.returnType().name())) {
                     continue;
                 }
-                boolean isMultiString = false;
+                boolean isSupportedResponseType = false;
                 if (method.returnType().kind() == Type.Kind.PARAMETERIZED_TYPE) {
                     Type multiType = method.returnType().asParameterizedType().arguments().get(0);
-                    if (DotNames.STRING.equals(multiType.name())) {
-                        isMultiString = true;
+                    if (DotNames.STRING.equals(multiType.name())
+                            || DotNames.CHAT_EVENT.equals(multiType.name())) {
+                        isSupportedResponseType = true;
                     }
                 }
-                if (!isMultiString) {
+                if (!isSupportedResponseType) {
                     throw illegalConfiguration("Only Multi<String> is supported as a Multi return type. Offending method is '"
                             + method.declaringClass().name().toString() + "#" + method.name() + "'");
                 }
 
@@ -17,6 +17,7 @@
 import io.quarkiverse.langchain4j.guardrails.OutputGuardrailAccumulator;
 import io.quarkiverse.langchain4j.response.AiResponseAugmenter;
 import io.quarkiverse.langchain4j.response.ResponseAugmenter;
+import io.quarkiverse.langchain4j.runtime.aiservice.ChatEvent;
 import io.smallrye.common.annotation.Blocking;
 import io.smallrye.common.annotation.NonBlocking;
 import io.smallrye.common.annotation.RunOnVirtualThread;
@@ -62,6 +63,7 @@ public class DotNames {
     public static final DotName CHAT_MODEL_LISTENER = DotName.createSimple(ChatModelListener.class);
     public static final DotName MODEL_AUTH_PROVIDER = DotName.createSimple(ModelAuthProvider.class);
     public static final DotName TOOL = DotName.createSimple(Tool.class);
+    public static final DotName CHAT_EVENT = DotName.createSimple(ChatEvent.class);
 
     public static final DotName REGISTER_REST_CLIENT = DotName.createSimple(RegisterRestClient.class);
 
 
@@ -177,6 +177,10 @@ private static Object doImplement(AiServiceMethodCreateInfo methodCreateInfo, Ob
         Map<String, Object> templateVariables = getTemplateVariables(methodArgs, methodCreateInfo.getUserMessageInfo());
 
         Type returnType = methodCreateInfo.getReturnType();
+        boolean isMulti = TypeUtil.isMulti(returnType);
+
+        final boolean isStringMulti = (isMulti && returnType instanceof ParameterizedType
+                && TypeUtil.isTypeOf(((ParameterizedType) returnType).getActualTypeArguments()[0], String.class));
         if (TypeUtil.isImage(returnType) || TypeUtil.isResultImage(returnType)) {
             return doImplementGenerateImage(methodCreateInfo, context, systemMessage, userMessage, memoryId, returnType,
                     templateVariables, auditSourceInfo);
@@ -217,7 +221,7 @@ private static Object doImplement(AiServiceMethodCreateInfo methodCreateInfo, Ob
             Metadata metadata = Metadata.from(userMessage, memoryId, chatMemory);
             AugmentationRequest augmentationRequest = new AugmentationRequest(userMessage, metadata);
 
-            if (!TypeUtil.isMulti(returnType)) {
+            if (!isMulti) {
                 augmentationResult = context.retrievalAugmentor.augment(augmentationRequest);
                 userMessage = (UserMessage) augmentationResult.chatMessage();
             } else {
@@ -244,10 +248,18 @@ public Flow.Publisher<?> apply(AugmentationResult ar) {
                                 var stream = new TokenStreamMulti(messagesToSend, effectiveToolSpecifications,
                                         finalToolExecutors, ar.contents(), context, memoryId,
                                         methodCreateInfo.isSwitchToWorkerThreadForToolExecution(), isRunningOnWorkerThread);
-                                return stream.plug(m -> ResponseAugmenterSupport.apply(m, methodCreateInfo,
-                                        new ResponseAugmenterParams((UserMessage) augmentedUserMessage,
-                                                memory, ar, methodCreateInfo.getUserMessageTemplate(),
-                                                templateVariables)));
+                                return stream
+                                        .filter(event -> {
+                                            return !isStringMulti || event instanceof ChatEvent.PartialResponseEvent;
+                                        }).map(event -> {
+                                            if (isStringMulti && event instanceof ChatEvent.PartialResponseEvent) {
+                                                return ((ChatEvent.PartialResponseEvent) event).getChunk();
+                                            }
+                                            return event;
+                                        }).plug(m -> ResponseAugmenterSupport.apply(m, methodCreateInfo,
+                                                new ResponseAugmenterParams((UserMessage) augmentedUserMessage,
+                                                        memory, ar, methodCreateInfo.getUserMessageTemplate(),
+                                                        templateVariables)));
                             }
 
                             private List<ChatMessage> messagesToSend(UserMessage augmentedUserMessage,
@@ -297,13 +309,20 @@ private List<ChatMessage> messagesToSend(UserMessage augmentedUserMessage,
 
         var actualAugmentationResult = augmentationResult;
         var actualUserMessage = userMessage;
-        if (TypeUtil.isMulti(returnType)) {
+        if (isMulti) {
             chatMemory.commit(); // for streaming cases, we really have to commit because all alternatives are worse
             if (methodCreateInfo.getOutputGuardrailsClassNames().isEmpty()) {
                 var stream = new TokenStreamMulti(messagesToSend, toolSpecifications, toolExecutors,
                         (augmentationResult != null ? augmentationResult.contents() : null), context, memoryId,
                         methodCreateInfo.isSwitchToWorkerThreadForToolExecution(), isRunningOnWorkerThread);
-                return stream.plug(m -> ResponseAugmenterSupport.apply(m, methodCreateInfo,
+                return stream.filter(event -> {
+                    return !isStringMulti || event instanceof ChatEvent.PartialResponseEvent;
+                }).map(event -> {
+                    if (isStringMulti && event instanceof ChatEvent.PartialResponseEvent) {
+                        return ((ChatEvent.PartialResponseEvent) event).getChunk();
+                    }
+                    return event;
+                }).plug(m -> ResponseAugmenterSupport.apply(m, methodCreateInfo,
                         new ResponseAugmenterParams(actualUserMessage,
                                 chatMemory, actualAugmentationResult, methodCreateInfo.getUserMessageTemplate(),
                                 Collections.unmodifiableMap(templateVariables))));
@@ -317,7 +336,8 @@ private List<ChatMessage> messagesToSend(UserMessage augmentedUserMessage,
                         OutputGuardrailResult result;
                         try {
                             result = GuardrailsSupport.invokeOutputGuardrailsForStream(methodCreateInfo,
-                                    new OutputGuardrailParams(AiMessage.from(chunk), chatMemory, actualAugmentationResult,
+                                    new OutputGuardrailParams(AiMessage.from(chunk.getMessage()), chatMemory,
+                                            actualAugmentationResult,
                                             methodCreateInfo.getUserMessageTemplate(),
                                             Collections.unmodifiableMap(templateVariables)),
                                     beanManager, auditSourceInfo);
@@ -340,6 +360,9 @@ private List<ChatMessage> messagesToSend(UserMessage augmentedUserMessage,
                                 throw new GuardrailException(
                                         "Attempting to rewrite the LLM output while streaming is not allowed");
                             }
+                            if (isStringMulti) {
+                                return chunk.getMessage();
+                            }
                             return chunk;
                         }
                     })
@@ -915,7 +938,7 @@ public interface Wrapper {
         Object wrap(Input input, Function<Input, Object> fun);
     }
 
-    private static class TokenStreamMulti extends AbstractMulti<String> implements Multi<String> {
+    private static class TokenStreamMulti extends AbstractMulti<ChatEvent> implements Multi<ChatEvent> {
         private final List<ChatMessage> messagesToSend;
         private final List<ToolSpecification> toolSpecifications;
         private final Map<String, ToolExecutor> toolsExecutors;
@@ -941,14 +964,14 @@ public TokenStreamMulti(List<ChatMessage> messagesToSend, List<ToolSpecification
         }
 
         @Override
-        public void subscribe(MultiSubscriber<? super String> subscriber) {
-            UnicastProcessor<String> processor = UnicastProcessor.create();
+        public void subscribe(MultiSubscriber<? super ChatEvent> subscriber) {
+            UnicastProcessor<ChatEvent> processor = UnicastProcessor.create();
             processor.subscribe(subscriber);
 
             createTokenStream(processor);
         }
 
-        private void createTokenStream(UnicastProcessor<String> processor) {
+        private void createTokenStream(UnicastProcessor<ChatEvent> processor) {
             Context ctxt = null;
             if (switchToWorkerThreadForToolExecution || isCallerRunningOnWorkerThread) {
                 // we create or retrieve the current context, to use `executeBlocking` when required.
@@ -959,8 +982,18 @@ private void createTokenStream(UnicastProcessor<String> processor) {
                     toolsExecutors, contents, context, memoryId, ctxt, switchToWorkerThreadForToolExecution,
                     isCallerRunningOnWorkerThread);
             TokenStream tokenStream = stream
-                    .onPartialResponse(processor::onNext)
-                    .onCompleteResponse(message -> processor.onComplete())
+                    .onPartialResponse(chunk -> processor
+                            .onNext(new ChatEvent.PartialResponseEvent(chunk)))
+                    .onCompleteResponse(message -> {
+                        processor.onNext(new ChatEvent.ChatCompletedEvent(message));
+                        processor.onComplete();
+                    })
+                    .onRetrieved(content -> {
+                        processor.onNext(new ChatEvent.ContentFetchedEvent(content));
+                    })
+                    .onToolExecuted(execution -> {
+                        processor.onNext(new ChatEvent.ToolExecutedEvent(execution));
+                    })
                     .onError(processor::onError);
             // This is equivalent to "run subscription on worker thread"
             if (switchToWorkerThreadForToolExecution && Context.isOnEventLoopThread()) {
Original file line number	Diff line number	Diff line change
`@@ -693,14 +693,15 @@ public void handleDeclarativeServices(AiServicesRecorder recorder,`
`693`	`693`	`if (!DotNames.MULTI.equals(method.returnType().name())) {`
`694`	`694`	`continue;`
`695`	`695`	`}`
`696`		`- boolean isMultiString = false;`
	`696`	`+ boolean isSupportedResponseType = false;`
`697`	`697`	`if (method.returnType().kind() == Type.Kind.PARAMETERIZED_TYPE) {`
`698`	`698`	`Type multiType = method.returnType().asParameterizedType().arguments().get(0);`
`699`		`- if (DotNames.STRING.equals(multiType.name())) {`
`700`		`- isMultiString = true;`
	`699`	`+ if (DotNames.STRING.equals(multiType.name())`
	`700`	`+ \|\| DotNames.CHAT_EVENT.equals(multiType.name())) {`
	`701`	`+ isSupportedResponseType = true;`
`701`	`702`	`}`
`702`	`703`	`}`
`703`		`- if (!isMultiString) {`
	`704`	`+ if (!isSupportedResponseType) {`
`704`	`705`	`throw illegalConfiguration("Only Multi<String> is supported as a Multi return type. Offending method is '"`
`705`	`706`	`+ method.declaringClass().name().toString() + "#" + method.name() + "'");`
`706`	`707`	`}`