From cf7efa130fc1b31055f7c8f18d65936961a72753 Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Fri, 2 May 2025 16:07:30 -0600 Subject: [PATCH 01/41] wip on simple streaming --- .../app/routers/index/sessions/__init__.py | 29 +++++++++++++ llm-service/app/services/llm_completion.py | 42 ++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index 83a38de1..110d3e86 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -41,10 +41,12 @@ from typing import Optional from fastapi import APIRouter, Header +from fastapi.responses import StreamingResponse from pydantic import BaseModel from .... import exceptions from ....rag_types import RagPredictConfiguration +from ....services import models from ....services.chat import ( v2_chat, ) @@ -53,6 +55,7 @@ chat_history_manager, ) from ....services.chat_history.paginator import paginate +from ....services.llm_completion import stream_completion from ....services.metadata_apis import session_metadata_api from ....services.mlflow import rating_mlflow_log_metric, feedback_mlflow_log_table from ....services.session import rename_session @@ -161,6 +164,10 @@ class RagStudioChatRequest(BaseModel): configuration: RagPredictConfiguration | None = None +class StreamCompletionRequest(BaseModel): + query: str + + def parse_jwt_cookie(jwt_cookie: str | None) -> str: if jwt_cookie is None: return "unknown" @@ -188,3 +195,25 @@ def chat( configuration = request.configuration or RagPredictConfiguration() return v2_chat(session, request.query, configuration, user_name=origin_remote_user) + + +@router.post( + "/stream-completion", summary="Stream completion responses for the given query" +) +@exceptions.propagates +def stream_chat_completion( + session_id: int, + request: StreamCompletionRequest, + origin_remote_user: Optional[str] = Header(None), +): + session = session_metadata_api.get_session(session_id, user_name=origin_remote_user) + model_name = session.inference_model + + def generate_stream(): + for response in stream_completion(session_id, request.query, model_name): + yield json.dumps( + {"text": response.message.content, "done": response.delta is None} + ) + "\n" + + # todo: write to history, start evals, rewrite question, log to mlfow once the response is done + return StreamingResponse(generate_stream(), media_type="application/x-ndjson") diff --git a/llm-service/app/services/llm_completion.py b/llm-service/app/services/llm_completion.py index 34541ee9..918ca2f9 100644 --- a/llm-service/app/services/llm_completion.py +++ b/llm-service/app/services/llm_completion.py @@ -36,8 +36,15 @@ # DATA. # import itertools +from typing import Generator -from llama_index.core.base.llms.types import ChatMessage, ChatResponse +from llama_index.core.base.llms.types import ( + ChatMessage, + ChatResponse, + ChatResponseGen, + CompletionResponse, + CompletionResponseGen, +) from llama_index.core.llms import LLM from . import models @@ -66,6 +73,24 @@ def completion(session_id: int, question: str, model_name: str) -> ChatResponse: return model.chat(messages) +def stream_completion( + session_id: int, question: str, model_name: str +) -> ChatResponseGen: + """ + Streamed version of the completion function. + Returns a generator that yields ChatResponse objects as they become available. + """ + model = models.LLM.get(model_name) + chat_history = chat_history_manager.retrieve_chat_history(session_id)[:10] + messages = list( + itertools.chain.from_iterable( + map(lambda x: make_chat_messages(x), chat_history) + ) + ) + messages.append(ChatMessage.from_str(question, role="user")) + return model.stream_chat(messages) + + def hypothetical(question: str, configuration: QueryConfiguration) -> str: model: LLM = models.LLM.get(configuration.model_name) prompt: str = ( @@ -73,3 +98,18 @@ def hypothetical(question: str, configuration: QueryConfiguration) -> str: "Produce a brief document that would hypothetically answer this question." ) return model.complete(prompt).text + + +def stream_hypothetical( + question: str, configuration: QueryConfiguration +) -> CompletionResponseGen: + """ + Streamed version of the hypothetical function. + Returns a generator that yields CompletionResponse objects as they become available. + """ + model: LLM = models.LLM.get(configuration.model_name) + prompt: str = ( + f"You are an expert. You are asked: {question}. " + "Produce a brief document that would hypothetically answer this question." + ) + return model.stream_complete(prompt) From 46da524da890a5307c9225a269c2991a1e56390e Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Mon, 5 May 2025 09:17:33 -0600 Subject: [PATCH 02/41] simple poc for streaming --- ui/src/api/chatApi.ts | 42 +++++++++++++++++++ ui/src/api/utils.ts | 1 + .../FooterComponents/RagChatQueryInput.tsx | 14 ++++++- 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts index fdb9abb1..2123fc0b 100644 --- a/ui/src/api/chatApi.ts +++ b/ui/src/api/chatApi.ts @@ -384,3 +384,45 @@ const feedbackMutation = async ({ { feedback }, ); }; + +interface ChatRequest { + query: string; + sessionId: string; +} + +interface ChatMutationOptions { + onChunk?: (chunk: string) => void; +} + +export function useStreamChatMutation(options?: ChatMutationOptions) { + return useMutation({ + mutationKey: [MutationKeys.streamChatMutation], + mutationFn: async ({ query, sessionId }: ChatRequest) => { + const res = await fetch( + `${llmServicePath}/sessions/${sessionId}/stream-completion`, + { + method: "POST", + body: JSON.stringify({ query }), + headers: { "Content-Type": "application/json" }, + }, + ); + if (!res.body) throw new Error("Error getting stream completion"); + const reader = res.body.getReader(); + const decoder = new TextDecoder(); + let fullResponse = ""; + let done = false; + + while (!done) { + const { value, done: doneReading } = await reader.read(); + done = doneReading; + // do we need the fallback? + const chunk = decoder.decode(value ?? new Uint8Array(), { + stream: true, + }); + fullResponse += chunk; + options?.onChunk?.(chunk); + } + return fullResponse; + }, + }); +} diff --git a/ui/src/api/utils.ts b/ui/src/api/utils.ts index a6c06b5a..ccdce209 100644 --- a/ui/src/api/utils.ts +++ b/ui/src/api/utils.ts @@ -80,6 +80,7 @@ export enum MutationKeys { "removeDataSourceFromProject" = "removeDataSourceFromProject", "updateAmpConfig" = "updateAmpConfig", "restartApplication" = "restartApplication", + "streamChatMutation" = "streamChatMutation", } export enum QueryKeys { diff --git a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx index 51cc4a43..8c7959c6 100644 --- a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx +++ b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx @@ -40,7 +40,11 @@ import { Button, Flex, Input, InputRef, Switch, Tooltip } from "antd"; import { DatabaseFilled, SendOutlined } from "@ant-design/icons"; import { useContext, useEffect, useRef, useState } from "react"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; -import { createQueryConfiguration, useChatMutation } from "src/api/chatApi.ts"; +import { + createQueryConfiguration, + useChatMutation, + useStreamChatMutation, +} from "src/api/chatApi.ts"; import { useParams, useSearch } from "@tanstack/react-router"; import { cdlBlue600 } from "src/cuix/variables.ts"; @@ -62,6 +66,7 @@ const RagChatQueryInput = ({ const [userInput, setUserInput] = useState(""); const { sessionId } = useParams({ strict: false }); + const [, setResponse] = useState(""); const search: { question?: string } = useSearch({ strict: false, }); @@ -85,6 +90,12 @@ const RagChatQueryInput = ({ }, }); + const streamChatMutation = useStreamChatMutation({ + onChunk: (chunk) => { + setResponse((prev) => prev + chunk); + }, + }); + useEffect(() => { if (inputRef.current) { inputRef.current.focus(); @@ -97,6 +108,7 @@ const RagChatQueryInput = ({ } if (userInput.length > 0) { if (sessionId) { + streamChatMutation.mutate({ query: userInput, sessionId: sessionId }); chatMutation.mutate({ query: userInput, session_id: +sessionId, From 52904cb1bef0e09f4ec591244ee727374d233a4f Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Mon, 5 May 2025 09:18:22 -0600 Subject: [PATCH 03/41] remove usage from RagChatQueryInput.tsx --- .../FooterComponents/RagChatQueryInput.tsx | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx index 8c7959c6..51cc4a43 100644 --- a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx +++ b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx @@ -40,11 +40,7 @@ import { Button, Flex, Input, InputRef, Switch, Tooltip } from "antd"; import { DatabaseFilled, SendOutlined } from "@ant-design/icons"; import { useContext, useEffect, useRef, useState } from "react"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; -import { - createQueryConfiguration, - useChatMutation, - useStreamChatMutation, -} from "src/api/chatApi.ts"; +import { createQueryConfiguration, useChatMutation } from "src/api/chatApi.ts"; import { useParams, useSearch } from "@tanstack/react-router"; import { cdlBlue600 } from "src/cuix/variables.ts"; @@ -66,7 +62,6 @@ const RagChatQueryInput = ({ const [userInput, setUserInput] = useState(""); const { sessionId } = useParams({ strict: false }); - const [, setResponse] = useState(""); const search: { question?: string } = useSearch({ strict: false, }); @@ -90,12 +85,6 @@ const RagChatQueryInput = ({ }, }); - const streamChatMutation = useStreamChatMutation({ - onChunk: (chunk) => { - setResponse((prev) => prev + chunk); - }, - }); - useEffect(() => { if (inputRef.current) { inputRef.current.focus(); @@ -108,7 +97,6 @@ const RagChatQueryInput = ({ } if (userInput.length > 0) { if (sessionId) { - streamChatMutation.mutate({ query: userInput, sessionId: sessionId }); chatMutation.mutate({ query: userInput, session_id: +sessionId, From 5ff5da80c23a4b5b9b8afd5714505251c5ee956f Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Mon, 5 May 2025 09:22:53 -0600 Subject: [PATCH 04/41] remove stream hypothetical --- llm-service/app/services/llm_completion.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/llm-service/app/services/llm_completion.py b/llm-service/app/services/llm_completion.py index 918ca2f9..787469cb 100644 --- a/llm-service/app/services/llm_completion.py +++ b/llm-service/app/services/llm_completion.py @@ -36,14 +36,11 @@ # DATA. # import itertools -from typing import Generator from llama_index.core.base.llms.types import ( ChatMessage, ChatResponse, ChatResponseGen, - CompletionResponse, - CompletionResponseGen, ) from llama_index.core.llms import LLM @@ -98,18 +95,3 @@ def hypothetical(question: str, configuration: QueryConfiguration) -> str: "Produce a brief document that would hypothetically answer this question." ) return model.complete(prompt).text - - -def stream_hypothetical( - question: str, configuration: QueryConfiguration -) -> CompletionResponseGen: - """ - Streamed version of the hypothetical function. - Returns a generator that yields CompletionResponse objects as they become available. - """ - model: LLM = models.LLM.get(configuration.model_name) - prompt: str = ( - f"You are an expert. You are asked: {question}. " - "Produce a brief document that would hypothetically answer this question." - ) - return model.stream_complete(prompt) From 96be99aa710e736ea7434964dc0f6ad27c990b3a Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Mon, 5 May 2025 09:29:16 -0600 Subject: [PATCH 05/41] remove unused import --- llm-service/app/routers/index/sessions/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index 110d3e86..01563e45 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -46,7 +46,6 @@ from .... import exceptions from ....rag_types import RagPredictConfiguration -from ....services import models from ....services.chat import ( v2_chat, ) From 4e8e6f005a53bec025f994fb0f9dddbea589373e Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Mon, 5 May 2025 12:35:12 -0600 Subject: [PATCH 06/41] wip on doing something once the gen is done --- llm-service/app/routers/index/sessions/__init__.py | 8 +++++++- llm-service/app/services/llm_completion.py | 7 +++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index 01563e45..d6127b81 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -214,5 +214,11 @@ def generate_stream(): {"text": response.message.content, "done": response.delta is None} ) + "\n" + try: + stream = generate_stream() + finally: + print("DONE") + + # kick off evals with full response # todo: write to history, start evals, rewrite question, log to mlfow once the response is done - return StreamingResponse(generate_stream(), media_type="application/x-ndjson") + return StreamingResponse(stream, media_type="application/x-ndjson") diff --git a/llm-service/app/services/llm_completion.py b/llm-service/app/services/llm_completion.py index 787469cb..b532e3be 100644 --- a/llm-service/app/services/llm_completion.py +++ b/llm-service/app/services/llm_completion.py @@ -36,6 +36,7 @@ # DATA. # import itertools +from typing import Generator from llama_index.core.base.llms.types import ( ChatMessage, @@ -72,7 +73,7 @@ def completion(session_id: int, question: str, model_name: str) -> ChatResponse: def stream_completion( session_id: int, question: str, model_name: str -) -> ChatResponseGen: +) -> Generator[ChatResponse, None, None]: """ Streamed version of the completion function. Returns a generator that yields ChatResponse objects as they become available. @@ -85,7 +86,9 @@ def stream_completion( ) ) messages.append(ChatMessage.from_str(question, role="user")) - return model.stream_chat(messages) + + stream = model.stream_chat(messages) + return stream def hypothetical(question: str, configuration: QueryConfiguration) -> str: From 9cb719f99f66cd5330d3b78fd89f7f1ae5b58e58 Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Mon, 5 May 2025 14:11:55 -0600 Subject: [PATCH 07/41] progress on generators --- .../app/routers/index/sessions/__init__.py | 21 ++++-- llm-service/app/services/chat.py | 74 ++++++++++++++++++- ui/src/api/chatApi.ts | 21 +++--- .../FooterComponents/RagChatQueryInput.tsx | 20 ++++- 4 files changed, 115 insertions(+), 21 deletions(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index d6127b81..c865a62b 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -48,6 +48,7 @@ from ....rag_types import RagPredictConfiguration from ....services.chat import ( v2_chat, + v3_chat, ) from ....services.chat_history.chat_history_manager import ( RagStudioChatMessage, @@ -202,23 +203,27 @@ def chat( @exceptions.propagates def stream_chat_completion( session_id: int, - request: StreamCompletionRequest, + request: RagStudioChatRequest, origin_remote_user: Optional[str] = Header(None), ): session = session_metadata_api.get_session(session_id, user_name=origin_remote_user) - model_name = session.inference_model + configuration = request.configuration or RagPredictConfiguration() def generate_stream(): - for response in stream_completion(session_id, request.query, model_name): + for response in v3_chat( + session, request.query, configuration, user_name=origin_remote_user + ): yield json.dumps( {"text": response.message.content, "done": response.delta is None} ) + "\n" - try: - stream = generate_stream() - finally: - print("DONE") + def full_response(): + yield json.dumps({"text": "Another one", "done": True}) + "\n" + + def combined_gen(): + yield from generate_stream() + yield from full_response() # kick off evals with full response # todo: write to history, start evals, rewrite question, log to mlfow once the response is done - return StreamingResponse(stream, media_type="application/x-ndjson") + return StreamingResponse(combined_gen(), media_type="application/x-ndjson") diff --git a/llm-service/app/services/chat.py b/llm-service/app/services/chat.py index 0eb385e8..6f9fbb10 100644 --- a/llm-service/app/services/chat.py +++ b/llm-service/app/services/chat.py @@ -38,10 +38,10 @@ import time import uuid from random import shuffle -from typing import List, Iterable, Optional +from typing import List, Iterable, Optional, Generator from fastapi import HTTPException -from llama_index.core.base.llms.types import MessageRole +from llama_index.core.base.llms.types import MessageRole, ChatResponse from llama_index.core.chat_engine.types import AgentChatResponse from pydantic import BaseModel @@ -67,6 +67,44 @@ class RagContext(BaseModel): content: str +def v3_chat( + session: Session, + query: str, + configuration: RagPredictConfiguration, + user_name: Optional[str], +) -> Generator[ChatResponse, None, RagStudioChatMessage]: + query_configuration = QueryConfiguration( + top_k=session.response_chunks, + model_name=session.inference_model, + rerank_model_name=session.rerank_model, + exclude_knowledge_base=configuration.exclude_knowledge_base, + use_question_condensing=configuration.use_question_condensing, + use_hyde=session.query_configuration.enable_hyde, + use_summary_filter=session.query_configuration.enable_summary_filter, + ) + + if configuration.exclude_knowledge_base or len(session.data_source_ids) == 0: + return stream_direct_llm_chat(session, query, user_name=user_name) + + total_data_sources_size: int = sum( + map( + lambda ds_id: VectorStoreFactory.for_chunks(ds_id).size() or 0, + session.data_source_ids, + ) + ) + if total_data_sources_size == 0: + return stream_direct_llm_chat(session, query, user_name) + + response_id = str(uuid.uuid4()) + + new_chat_message: RagStudioChatMessage = _run_chat( + session, response_id, query, query_configuration, user_name + ) + + chat_history_manager.append_to_history(session.id, [new_chat_message]) + return new_chat_message + + def v2_chat( session: Session, query: str, @@ -337,3 +375,35 @@ def direct_llm_chat( ) chat_history_manager.append_to_history(session.id, [new_chat_message]) return new_chat_message + + +def stream_direct_llm_chat( + session: Session, query: str, user_name: Optional[str] +) -> Generator[ChatResponse, None, RagStudioChatMessage]: + response_id = str(uuid.uuid4()) + record_direct_llm_mlflow_run(response_id, session, user_name) + + chat_response = llm_completion.stream_completion( + session.id, query, session.inference_model + ) + yield from chat_response + + assistant_message = "" + for response in chat_response: + assistant_message += response.message.content + + new_chat_message = RagStudioChatMessage( + id=response_id, + session_id=session.id, + source_nodes=[], + inference_model=session.inference_model, + evaluations=[], + rag_message=RagMessage( + user=query, + assistant=assistant_message, + ), + timestamp=time.time(), + condensed_question=None, + ) + chat_history_manager.append_to_history(session.id, [new_chat_message]) + return new_chat_message diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts index 2123fc0b..5828b81a 100644 --- a/ui/src/api/chatApi.ts +++ b/ui/src/api/chatApi.ts @@ -385,11 +385,6 @@ const feedbackMutation = async ({ ); }; -interface ChatRequest { - query: string; - sessionId: string; -} - interface ChatMutationOptions { onChunk?: (chunk: string) => void; } @@ -397,12 +392,16 @@ interface ChatMutationOptions { export function useStreamChatMutation(options?: ChatMutationOptions) { return useMutation({ mutationKey: [MutationKeys.streamChatMutation], - mutationFn: async ({ query, sessionId }: ChatRequest) => { + mutationFn: async ({ + query, + configuration, + session_id, + }: ChatMutationRequest) => { const res = await fetch( - `${llmServicePath}/sessions/${sessionId}/stream-completion`, + `${llmServicePath}/sessions/${session_id.toString()}/stream-completion`, { method: "POST", - body: JSON.stringify({ query }), + body: JSON.stringify({ query, configuration }), headers: { "Content-Type": "application/json" }, }, ); @@ -411,7 +410,6 @@ export function useStreamChatMutation(options?: ChatMutationOptions) { const decoder = new TextDecoder(); let fullResponse = ""; let done = false; - while (!done) { const { value, done: doneReading } = await reader.read(); done = doneReading; @@ -419,6 +417,11 @@ export function useStreamChatMutation(options?: ChatMutationOptions) { const chunk = decoder.decode(value ?? new Uint8Array(), { stream: true, }); + if (doneReading) { + console.log("HELLO"); + } else { + console.log("CHUNK", chunk); + } fullResponse += chunk; options?.onChunk?.(chunk); } diff --git a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx index 51cc4a43..80146789 100644 --- a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx +++ b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx @@ -40,7 +40,11 @@ import { Button, Flex, Input, InputRef, Switch, Tooltip } from "antd"; import { DatabaseFilled, SendOutlined } from "@ant-design/icons"; import { useContext, useEffect, useRef, useState } from "react"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; -import { createQueryConfiguration, useChatMutation } from "src/api/chatApi.ts"; +import { + createQueryConfiguration, + useChatMutation, + useStreamChatMutation, +} from "src/api/chatApi.ts"; import { useParams, useSearch } from "@tanstack/react-router"; import { cdlBlue600 } from "src/cuix/variables.ts"; @@ -62,11 +66,12 @@ const RagChatQueryInput = ({ const [userInput, setUserInput] = useState(""); const { sessionId } = useParams({ strict: false }); + const [response, setResponse] = useState(""); const search: { question?: string } = useSearch({ strict: false, }); const inputRef = useRef(null); - + // console.log(response); const { data: sampleQuestions, isPending: sampleQuestionsIsPending, @@ -85,6 +90,12 @@ const RagChatQueryInput = ({ }, }); + const streamChatMutation = useStreamChatMutation({ + onChunk: (chunk) => { + setResponse((prev) => prev + chunk); + }, + }); + useEffect(() => { if (inputRef.current) { inputRef.current.focus(); @@ -97,6 +108,11 @@ const RagChatQueryInput = ({ } if (userInput.length > 0) { if (sessionId) { + streamChatMutation.mutate({ + query: userInput, + configuration: createQueryConfiguration(excludeKnowledgeBase), + session_id: +sessionId, + }); chatMutation.mutate({ query: userInput, session_id: +sessionId, From 8c25d448916b038fbc386de03b820c003dbbcc21 Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Mon, 5 May 2025 16:47:34 -0600 Subject: [PATCH 08/41] go back to simple streaming only endpoint --- llm-service/app/routers/index/sessions/__init__.py | 4 ++-- llm-service/app/services/chat.py | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index c865a62b..a8d2d2a7 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -205,7 +205,7 @@ def stream_chat_completion( session_id: int, request: RagStudioChatRequest, origin_remote_user: Optional[str] = Header(None), -): +) -> StreamingResponse: session = session_metadata_api.get_session(session_id, user_name=origin_remote_user) configuration = request.configuration or RagPredictConfiguration() @@ -218,7 +218,7 @@ def generate_stream(): ) + "\n" def full_response(): - yield json.dumps({"text": "Another one", "done": True}) + "\n" + yield json.dumps({"text": "Done", "done": True}) + "\n" def combined_gen(): yield from generate_stream() diff --git a/llm-service/app/services/chat.py b/llm-service/app/services/chat.py index 6f9fbb10..c564c155 100644 --- a/llm-service/app/services/chat.py +++ b/llm-service/app/services/chat.py @@ -72,7 +72,7 @@ def v3_chat( query: str, configuration: RagPredictConfiguration, user_name: Optional[str], -) -> Generator[ChatResponse, None, RagStudioChatMessage]: +) -> Generator[ChatResponse, None, None]: query_configuration = QueryConfiguration( top_k=session.response_chunks, model_name=session.inference_model, @@ -379,18 +379,17 @@ def direct_llm_chat( def stream_direct_llm_chat( session: Session, query: str, user_name: Optional[str] -) -> Generator[ChatResponse, None, RagStudioChatMessage]: +) -> Generator[ChatResponse, None, None]: response_id = str(uuid.uuid4()) record_direct_llm_mlflow_run(response_id, session, user_name) chat_response = llm_completion.stream_completion( session.id, query, session.inference_model ) - yield from chat_response - assistant_message = "" for response in chat_response: assistant_message += response.message.content + yield response new_chat_message = RagStudioChatMessage( id=response_id, @@ -406,4 +405,3 @@ def stream_direct_llm_chat( condensed_question=None, ) chat_history_manager.append_to_history(session.id, [new_chat_message]) - return new_chat_message From e25408ddf496b8c79199da925a1fbe2052f35af9 Mon Sep 17 00:00:00 2001 From: jwatson Date: Mon, 5 May 2025 16:20:34 -0700 Subject: [PATCH 09/41] wip lastFile:llm-service/app/services/chat.py --- .../app/routers/index/sessions/__init__.py | 16 ++++++++++++++-- llm-service/app/services/chat.py | 5 ++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index a8d2d2a7..596aa184 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -40,7 +40,7 @@ import logging from typing import Optional -from fastapi import APIRouter, Header +from fastapi import APIRouter, Header, HTTPException from fastapi.responses import StreamingResponse from pydantic import BaseModel @@ -55,7 +55,6 @@ chat_history_manager, ) from ....services.chat_history.paginator import paginate -from ....services.llm_completion import stream_completion from ....services.metadata_apis import session_metadata_api from ....services.mlflow import rating_mlflow_log_metric, feedback_mlflow_log_table from ....services.session import rename_session @@ -103,6 +102,19 @@ def chat_history( ) +@router.get( + "/chat-history/{message_id}", + summary="Returns a specific chat messages for the provided session.", +) +@exceptions.propagates +def get_message_by_id(session_id: int, message_id: str) -> RagStudioChatMessage: + results: list[RagStudioChatMessage] = chat_history_manager.retrieve_chat_history(session_id=session_id) + for message in results: + if message.id == message_id: + return message + raise HTTPException(status_code=404, detail=f"Message with id {message_id} not found in session {session_id}") + + @router.delete( "/chat-history", summary="Deletes the chat history for the provided session." ) diff --git a/llm-service/app/services/chat.py b/llm-service/app/services/chat.py index c564c155..f2e93cdf 100644 --- a/llm-service/app/services/chat.py +++ b/llm-service/app/services/chat.py @@ -386,9 +386,8 @@ def stream_direct_llm_chat( chat_response = llm_completion.stream_completion( session.id, query, session.inference_model ) - assistant_message = "" + response = "" for response in chat_response: - assistant_message += response.message.content yield response new_chat_message = RagStudioChatMessage( @@ -399,7 +398,7 @@ def stream_direct_llm_chat( evaluations=[], rag_message=RagMessage( user=query, - assistant=assistant_message, + assistant=response.message.content, ), timestamp=time.time(), condensed_question=None, From 1bd9f5fe2f761b1bedafb73175b7b3add7492680 Mon Sep 17 00:00:00 2001 From: jwatson Date: Mon, 5 May 2025 16:39:21 -0700 Subject: [PATCH 10/41] add response id on every chunk returned lastFile:llm-service/app/routers/index/sessions/__init__.py --- llm-service/app/routers/index/sessions/__init__.py | 2 +- llm-service/app/services/chat.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index 596aa184..6e611fcb 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -226,7 +226,7 @@ def generate_stream(): session, request.query, configuration, user_name=origin_remote_user ): yield json.dumps( - {"text": response.message.content, "done": response.delta is None} + {"text": response.message.content, "response_id": response.additional_kwargs["response_id"], "done": response.delta is None} ) + "\n" def full_response(): diff --git a/llm-service/app/services/chat.py b/llm-service/app/services/chat.py index f2e93cdf..68181653 100644 --- a/llm-service/app/services/chat.py +++ b/llm-service/app/services/chat.py @@ -41,7 +41,7 @@ from typing import List, Iterable, Optional, Generator from fastapi import HTTPException -from llama_index.core.base.llms.types import MessageRole, ChatResponse +from llama_index.core.base.llms.types import MessageRole, ChatResponse, ChatMessage from llama_index.core.chat_engine.types import AgentChatResponse from pydantic import BaseModel @@ -386,8 +386,9 @@ def stream_direct_llm_chat( chat_response = llm_completion.stream_completion( session.id, query, session.inference_model ) - response = "" + response: ChatResponse = ChatResponse(message=ChatMessage(content=query)) for response in chat_response: + response.additional_kwargs["response_id"] = response_id yield response new_chat_message = RagStudioChatMessage( From 4e9b8ef21730c38e194f31fc488eeb50d23a451e Mon Sep 17 00:00:00 2001 From: jwatson Date: Mon, 5 May 2025 16:42:40 -0700 Subject: [PATCH 11/41] remove duplicate calls, but still not rendering --- ui/src/api/chatApi.ts | 10 +++++----- .../FooterComponents/RagChatQueryInput.tsx | 13 +++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts index 5828b81a..83efc4c9 100644 --- a/ui/src/api/chatApi.ts +++ b/ui/src/api/chatApi.ts @@ -417,11 +417,11 @@ export function useStreamChatMutation(options?: ChatMutationOptions) { const chunk = decoder.decode(value ?? new Uint8Array(), { stream: true, }); - if (doneReading) { - console.log("HELLO"); - } else { - console.log("CHUNK", chunk); - } + // if (doneReading) { + // console.log("HELLO"); + // } else { + // console.log("CHUNK", chunk); + // } fullResponse += chunk; options?.onChunk?.(chunk); } diff --git a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx index 80146789..fe14d1a1 100644 --- a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx +++ b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx @@ -92,7 +92,8 @@ const RagChatQueryInput = ({ const streamChatMutation = useStreamChatMutation({ onChunk: (chunk) => { - setResponse((prev) => prev + chunk); + console.log("stream chunk", chunk); + setResponse(() => chunk); }, }); @@ -113,11 +114,11 @@ const RagChatQueryInput = ({ configuration: createQueryConfiguration(excludeKnowledgeBase), session_id: +sessionId, }); - chatMutation.mutate({ - query: userInput, - session_id: +sessionId, - configuration: createQueryConfiguration(excludeKnowledgeBase), - }); + // chatMutation.mutate({ + // query: userInput, + // session_id: +sessionId, + // configuration: createQueryConfiguration(excludeKnowledgeBase), + // }); } else { newSessionCallback(userInput); } From 3b915220160eb23a4a3357f888e655dff07a919a Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Tue, 6 May 2025 11:31:07 -0600 Subject: [PATCH 12/41] getting there --- .../app/routers/index/sessions/__init__.py | 26 ++-- ui/package.json | 3 + ui/pnpm-lock.yaml | 88 ++++++++++++ ui/src/api/chatApi.ts | 128 +++++++++++++++++- ui/src/pages/RagChatTab/ChatLayout.tsx | 2 + .../ChatOutput/ChatMessages/ChatMessage.tsx | 2 + .../Loaders/PendingRagOutputSkeleton.tsx | 30 +++- .../FooterComponents/RagChatQueryInput.tsx | 26 ++-- .../pages/RagChatTab/State/RagChatContext.tsx | 2 + 9 files changed, 274 insertions(+), 33 deletions(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index 6e611fcb..485270b9 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -108,11 +108,16 @@ def chat_history( ) @exceptions.propagates def get_message_by_id(session_id: int, message_id: str) -> RagStudioChatMessage: - results: list[RagStudioChatMessage] = chat_history_manager.retrieve_chat_history(session_id=session_id) + results: list[RagStudioChatMessage] = chat_history_manager.retrieve_chat_history( + session_id=session_id + ) for message in results: if message.id == message_id: return message - raise HTTPException(status_code=404, detail=f"Message with id {message_id} not found in session {session_id}") + raise HTTPException( + status_code=404, + detail=f"Message with id {message_id} not found in session {session_id}", + ) @router.delete( @@ -222,20 +227,15 @@ def stream_chat_completion( configuration = request.configuration or RagPredictConfiguration() def generate_stream(): + response_id: str = "" for response in v3_chat( session, request.query, configuration, user_name=origin_remote_user ): - yield json.dumps( - {"text": response.message.content, "response_id": response.additional_kwargs["response_id"], "done": response.delta is None} - ) + "\n" - - def full_response(): - yield json.dumps({"text": "Done", "done": True}) + "\n" - - def combined_gen(): - yield from generate_stream() - yield from full_response() + print(response.delta) + response_id = response.additional_kwargs["response_id"] + yield f"data: {response.delta}" + "\n\n" + yield f"data: {response_id}" + "\n\n" # kick off evals with full response # todo: write to history, start evals, rewrite question, log to mlfow once the response is done - return StreamingResponse(combined_gen(), media_type="application/x-ndjson") + return StreamingResponse(generate_stream(), media_type="text/event-stream") diff --git a/ui/package.json b/ui/package.json index 30038622..16dc2366 100644 --- a/ui/package.json +++ b/ui/package.json @@ -21,6 +21,7 @@ "@ant-design/icons": "^5.5.1", "@emotion/react": "^11.14.0", "@emotion/styled": "^11.14.0", + "@microsoft/fetch-event-source": "^2.0.1", "@mui/material": "^6.4.3", "@mui/x-charts": "^7.26.0", "@tanstack/react-query": "^5.59.20", @@ -29,6 +30,7 @@ "antd": "^5.24.6", "date-fns": "^4.1.0", "lodash": "^4.17.21", + "ndjson": "^2.0.0", "react": "^18.3.1", "react-dom": "^18.3.1", "react-intersection-observer": "^9.16.0", @@ -48,6 +50,7 @@ "@testing-library/user-event": "^14.5.2", "@types/eslint__js": "^8.42.3", "@types/lodash": "^4.17.13", + "@types/ndjson": "^2.0.4", "@types/node": "^22.9.0", "@types/react": "^18.3.12", "@types/react-dom": "^18.3.1", diff --git a/ui/pnpm-lock.yaml b/ui/pnpm-lock.yaml index 0a9c6b26..a29c8bc6 100644 --- a/ui/pnpm-lock.yaml +++ b/ui/pnpm-lock.yaml @@ -17,6 +17,9 @@ importers: '@emotion/styled': specifier: ^11.14.0 version: 11.14.0(@emotion/react@11.14.0(@types/react@18.3.12)(react@18.3.1))(@types/react@18.3.12)(react@18.3.1) + '@microsoft/fetch-event-source': + specifier: ^2.0.1 + version: 2.0.1 '@mui/material': specifier: ^6.4.3 version: 6.4.3(@emotion/react@11.14.0(@types/react@18.3.12)(react@18.3.1))(@emotion/styled@11.14.0(@emotion/react@11.14.0(@types/react@18.3.12)(react@18.3.1))(@types/react@18.3.12)(react@18.3.1))(@types/react@18.3.12)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) @@ -41,6 +44,9 @@ importers: lodash: specifier: ^4.17.21 version: 4.17.21 + ndjson: + specifier: ^2.0.0 + version: 2.0.0 react: specifier: ^18.3.1 version: 18.3.1 @@ -93,6 +99,9 @@ importers: '@types/lodash': specifier: ^4.17.13 version: 4.17.13 + '@types/ndjson': + specifier: ^2.0.4 + version: 2.0.4 '@types/node': specifier: ^22.9.0 version: 22.9.0 @@ -768,6 +777,9 @@ packages: '@jridgewell/trace-mapping@0.3.25': resolution: {integrity: sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==} + '@microsoft/fetch-event-source@2.0.1': + resolution: {integrity: sha512-W6CLUJ2eBMw3Rec70qrsEW0jOm/3twwJv21mrmj2yORiaVmVYGS4sSS5yUwvQc1ZlDLYGPnClVWmUUMagKNsfA==} + '@mui/core-downloads-tracker@6.4.3': resolution: {integrity: sha512-hlyOzo2ObarllAOeT1ZSAusADE5NZNencUeIvXrdQ1Na+FL1lcznhbxfV5He1KqGiuR8Az3xtCUcYKwMVGFdzg==} @@ -1439,6 +1451,9 @@ packages: '@types/ms@0.7.34': resolution: {integrity: sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g==} + '@types/ndjson@2.0.4': + resolution: {integrity: sha512-ajAl7AjhFstF6waORYNSS49GL5iBKisqJlgvXuprXFKCX9fto4ordlNU3+XMgkMddgeR0WoQQBmKUk0v0dJ4pw==} + '@types/node@22.9.0': resolution: {integrity: sha512-vuyHg81vvWA1Z1ELfvLko2c8f34gyA0zaic0+Rllc5lbCnbSyuvb2Oxpm6TAUAC/2xZN3QGqxBNggD1nNR2AfQ==} @@ -1468,6 +1483,9 @@ packages: '@types/swagger-ui-react@5.18.0': resolution: {integrity: sha512-c2M9adVG7t28t1pq19K9Jt20VLQf0P/fwJwnfcmsVVsdkwCWhRmbKDu+tIs0/NGwJ/7GY8lBx+iKZxuDI5gDbw==} + '@types/through@0.0.33': + resolution: {integrity: sha512-HsJ+z3QuETzP3cswwtzt2vEIiHBk/dCcHGhbmG5X3ecnwFD/lPrMpliGXxSCg03L9AhrdwA4Oz/qfspkDW+xGQ==} + '@types/trusted-types@2.0.7': resolution: {integrity: sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==} @@ -2663,6 +2681,9 @@ packages: json-stable-stringify-without-jsonify@1.0.1: resolution: {integrity: sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==} + json-stringify-safe@5.0.1: + resolution: {integrity: sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==} + json2mq@0.2.0: resolution: {integrity: sha512-SzoRg7ux5DWTII9J2qkrZrqV1gt+rTaoufMxEzXbS26Uid0NwaJd123HcoB80TgubEppxxIGdNxCx50fEoEWQA==} @@ -2919,6 +2940,9 @@ packages: resolution: {integrity: sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==} engines: {node: '>=16 || 14 >=14.17'} + minimist@1.2.8: + resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==} + ms@2.1.3: resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} @@ -2930,6 +2954,11 @@ packages: natural-compare@1.4.0: resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} + ndjson@2.0.0: + resolution: {integrity: sha512-nGl7LRGrzugTtaFcJMhLbpzJM6XdivmbkdlaGcrk/LXg2KL/YBC6z1g70xh0/al+oFuVFP8N8kiWRucmeEH/qQ==} + engines: {node: '>=10'} + hasBin: true + neotraverse@0.6.18: resolution: {integrity: sha512-Z4SmBUweYa09+o6pG+eASabEpP6QkQ70yHj351pQoEXIs8uHbaU2DWVmzBANKgflPa47A50PtB2+NgRpQvr7vA==} engines: {node: '>= 10'} @@ -3495,6 +3524,10 @@ packages: resolution: {integrity: sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==} engines: {node: '>=0.10.0'} + readable-stream@3.6.2: + resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==} + engines: {node: '>= 6'} + readdirp@3.6.0: resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==} engines: {node: '>=8.10.0'} @@ -3697,6 +3730,9 @@ packages: space-separated-tokens@2.0.2: resolution: {integrity: sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==} + split2@3.2.2: + resolution: {integrity: sha512-9NThjpgZnifTkJpzTZ7Eue85S49QwpNhZTq6GRJwObb6jnLFNGB7Qm73V5HewTROPyxD0C29xqmaI68bQtV+hg==} + sprintf-js@1.0.3: resolution: {integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==} @@ -3735,6 +3771,9 @@ packages: resolution: {integrity: sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg==} engines: {node: '>= 0.4'} + string_decoder@1.3.0: + resolution: {integrity: sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==} + stringify-entities@4.0.4: resolution: {integrity: sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==} @@ -3797,6 +3836,9 @@ packages: resolution: {integrity: sha512-B71/4oyj61iNH0KeCamLuE2rmKuTO5byTOSVwECM5FA7TiAiAW+UqTKZ9ERueC4qvgSttUhdmq1mXC3kJqGX7A==} engines: {node: '>=12.22'} + through2@4.0.2: + resolution: {integrity: sha512-iOqSav00cVxEEICeD7TjLB1sueEL+81Wpzp2bY17uZjZN0pWZPuo4suZ/61VujxmqSGFfgOcNuTZ85QJwNZQpw==} + tiny-invariant@1.3.3: resolution: {integrity: sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==} @@ -3989,6 +4031,9 @@ packages: peerDependencies: react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + util-deprecate@1.0.2: + resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} + uuid@11.0.5: resolution: {integrity: sha512-508e6IcKLrhxKdBbcA2b4KQZlLVp2+J5UwQ6F7Drckkc5N9ZJwFa4TgWtsww9UG8fGHbm6gbV19TdM5pQ4GaIA==} hasBin: true @@ -4719,6 +4764,8 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.0 + '@microsoft/fetch-event-source@2.0.1': {} + '@mui/core-downloads-tracker@6.4.3': {} '@mui/material@6.4.3(@emotion/react@11.14.0(@types/react@18.3.12)(react@18.3.1))(@emotion/styled@11.14.0(@emotion/react@11.14.0(@types/react@18.3.12)(react@18.3.1))(@types/react@18.3.12)(react@18.3.1))(@types/react@18.3.12)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': @@ -5676,6 +5723,11 @@ snapshots: '@types/ms@0.7.34': {} + '@types/ndjson@2.0.4': + dependencies: + '@types/node': 22.9.0 + '@types/through': 0.0.33 + '@types/node@22.9.0': dependencies: undici-types: 6.19.8 @@ -5707,6 +5759,10 @@ snapshots: dependencies: '@types/react': 18.3.12 + '@types/through@0.0.33': + dependencies: + '@types/node': 22.9.0 + '@types/trusted-types@2.0.7': optional: true @@ -7114,6 +7170,8 @@ snapshots: json-stable-stringify-without-jsonify@1.0.1: {} + json-stringify-safe@5.0.1: {} + json2mq@0.2.0: dependencies: string-convert: 0.2.1 @@ -7592,12 +7650,22 @@ snapshots: dependencies: brace-expansion: 2.0.1 + minimist@1.2.8: {} + ms@2.1.3: {} nanoid@3.3.11: {} natural-compare@1.4.0: {} + ndjson@2.0.0: + dependencies: + json-stringify-safe: 5.0.1 + minimist: 1.2.8 + readable-stream: 3.6.2 + split2: 3.2.2 + through2: 4.0.2 + neotraverse@0.6.18: {} no-case@3.0.4: @@ -8247,6 +8315,12 @@ snapshots: dependencies: loose-envify: 1.4.0 + readable-stream@3.6.2: + dependencies: + inherits: 2.0.4 + string_decoder: 1.3.0 + util-deprecate: 1.0.2 + readdirp@3.6.0: dependencies: picomatch: 2.3.1 @@ -8495,6 +8569,10 @@ snapshots: space-separated-tokens@2.0.2: {} + split2@3.2.2: + dependencies: + readable-stream: 3.6.2 + sprintf-js@1.0.3: {} stackback@0.0.2: {} @@ -8550,6 +8628,10 @@ snapshots: define-properties: 1.2.1 es-object-atoms: 1.0.0 + string_decoder@1.3.0: + dependencies: + safe-buffer: 5.2.1 + stringify-entities@4.0.4: dependencies: character-entities-html4: 2.1.0 @@ -8658,6 +8740,10 @@ snapshots: throttle-debounce@5.0.2: {} + through2@4.0.2: + dependencies: + readable-stream: 3.6.2 + tiny-invariant@1.3.3: {} tiny-warning@1.0.3: {} @@ -8858,6 +8944,8 @@ snapshots: dependencies: react: 18.3.1 + util-deprecate@1.0.2: {} + uuid@11.0.5: {} vfile-message@4.0.2: diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts index 83efc4c9..4e3c1c78 100644 --- a/ui/src/api/chatApi.ts +++ b/ui/src/api/chatApi.ts @@ -52,6 +52,11 @@ import { useQueryClient, } from "@tanstack/react-query"; import { suggestedQuestionKey } from "src/api/ragQueryApi.ts"; +import { + EventStreamContentType, + fetchEventSource, +} from "@microsoft/fetch-event-source"; +import { Dispatch, SetStateAction } from "react"; export interface SourceNode { node_id: string; @@ -389,6 +394,12 @@ interface ChatMutationOptions { onChunk?: (chunk: string) => void; } +export interface ChatMutationResponse { + text: string; + response_id: string; + done: boolean; +} + export function useStreamChatMutation(options?: ChatMutationOptions) { return useMutation({ mutationKey: [MutationKeys.streamChatMutation], @@ -417,15 +428,128 @@ export function useStreamChatMutation(options?: ChatMutationOptions) { const chunk = decoder.decode(value ?? new Uint8Array(), { stream: true, }); + const parsedChunk = JSON.parse(chunk) as ChatMutationResponse; // if (doneReading) { // console.log("HELLO"); // } else { // console.log("CHUNK", chunk); // } - fullResponse += chunk; - options?.onChunk?.(chunk); + console.log(parsedChunk); + fullResponse += parsedChunk.text; + options?.onChunk?.(parsedChunk.text); } return fullResponse; }, }); } + +export const useChatMutationV2 = ({ + onSuccess, + onError, + onChunk, +}: UseMutationType & { onChunk: (msg: string) => void }) => { + const queryClient = useQueryClient(); + return useMutation({ + mutationKey: [MutationKeys.chatMutation], + mutationFn: (request: ChatMutationRequest) => + chatMutationV2(request, onChunk), + onMutate: (variables) => { + queryClient.setQueryData>( + chatHistoryQueryKey({ + session_id: variables.session_id, + }), + (cachedData) => + appendPlaceholderToChatHistory(variables.query, cachedData), + ); + }, + onSuccess: (data, variables) => { + // queryClient.setQueryData>( + // chatHistoryQueryKey({ + // session_id: variables.request.session_id, + // }), + // (cachedData) => replacePlaceholderInChatHistory(data, cachedData), + // ); + queryClient + .invalidateQueries({ + queryKey: suggestedQuestionKey(variables.session_id), + }) + .catch((error: unknown) => { + console.error(error); + }); + }, + onError: (error: Error, variables) => { + const uuid = crypto.randomUUID(); + const errorMessage: ChatMessageType = { + id: `error-${uuid}`, + session_id: variables.session_id, + source_nodes: [], + rag_message: { + user: variables.query, + assistant: error.message, + }, + evaluations: [], + timestamp: Date.now(), + }; + queryClient.setQueryData>( + chatHistoryQueryKey({ + session_id: variables.session_id, + offset: 0, + }), + (cachedData) => + replacePlaceholderInChatHistory(errorMessage, cachedData), + ); + + onError?.(error); + }, + }); +}; + +const chatMutationV2 = async ( + request: ChatMutationRequest, + onChunk: (chunk: string) => void, +): Promise => { + const ctrl = new AbortController(); + await fetchEventSource( + `${llmServicePath}/sessions/${request.session_id.toString()}/stream-completion`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + query: request.query, + configuration: request.configuration, + }), + signal: ctrl.signal, + onmessage(msg) { + onChunk(msg.data); + }, + onclose() { + console.log("Connection closed"); + }, + onerror(err) { + console.error("Error", err); + ctrl.abort(); + }, + async onopen(response) { + if ( + response.ok && + response.headers.get("content-type")?.includes(EventStreamContentType) + ) { + await Promise.resolve(); + console.log("all good"); + return; // everything's good + } else if ( + response.status >= 400 && + response.status < 500 && + response.status !== 429 + ) { + // client-side errors are usually non-retriable: + throw new Error(); + } else { + throw new Error(); + } + }, + }, + ); +}; diff --git a/ui/src/pages/RagChatTab/ChatLayout.tsx b/ui/src/pages/RagChatTab/ChatLayout.tsx index 0042f44b..ffb5247e 100644 --- a/ui/src/pages/RagChatTab/ChatLayout.tsx +++ b/ui/src/pages/RagChatTab/ChatLayout.tsx @@ -73,6 +73,7 @@ function ChatLayout() { const { data: dataSources, status: dataSourcesStatus } = useGetDataSourcesForProject(+projectId); const [excludeKnowledgeBase, setExcludeKnowledgeBase] = useState(false); + const [streamedChat, setStreamedChat] = useState(""); const { status: chatHistoryStatus, data: chatHistory, @@ -108,6 +109,7 @@ function ChatLayout() { isFetching, isFetchingPreviousPage, }, + streamedChatState: [streamedChat, setStreamedChat], dataSourceSize, dataSourcesQuery: { dataSources: dataSources ?? [], diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx index bf4fab20..988b8b52 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx @@ -50,6 +50,8 @@ import Markdown from "react-markdown"; import "../tableMarkdown.css"; import { ExclamationCircleTwoTone } from "@ant-design/icons"; +import { useContext } from "react"; +import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; const isError = (data: ChatMessageType) => { return data.id.startsWith("error-"); diff --git a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx index b39f23f1..975dee79 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx @@ -36,19 +36,37 @@ * DATA. ******************************************************************************/ -import { Divider, Row, Skeleton } from "antd"; +import { Divider, Row, Skeleton, Typography } from "antd"; import UserQuestion from "pages/RagChatTab/ChatOutput/ChatMessages/UserQuestion.tsx"; +import { useContext } from "react"; +import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; +import Markdown from "react-markdown"; +import Remark from "remark-gfm"; const PendingRagOutputSkeleton = ({ question }: { question: string }) => { + const { + streamedChatState: [streamedChat], + } = useContext(RagChatContext); + console.log(streamedChat); return (
- - - - - + {streamedChat ? ( + + + {streamedChat} + + + ) : ( + + + + )}
diff --git a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx index fe14d1a1..65264e54 100644 --- a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx +++ b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx @@ -39,10 +39,13 @@ import { Button, Flex, Input, InputRef, Switch, Tooltip } from "antd"; import { DatabaseFilled, SendOutlined } from "@ant-design/icons"; import { useContext, useEffect, useRef, useState } from "react"; +import ndjson from "ndjson"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; import { + ChatMutationResponse, createQueryConfiguration, useChatMutation, + useChatMutationV2, useStreamChatMutation, } from "src/api/chatApi.ts"; import { useParams, useSearch } from "@tanstack/react-router"; @@ -51,6 +54,11 @@ import { cdlBlue600 } from "src/cuix/variables.ts"; import type { SwitchChangeEventHandler } from "antd/lib/switch"; import { useSuggestQuestions } from "src/api/ragQueryApi.ts"; import SuggestedQuestionsFooter from "pages/RagChatTab/FooterComponents/SuggestedQuestionsFooter.tsx"; +import { llmServicePath } from "src/api/utils.ts"; +import { + EventStreamContentType, + fetchEventSource, +} from "@microsoft/fetch-event-source"; const RagChatQueryInput = ({ newSessionCallback, @@ -62,16 +70,15 @@ const RagChatQueryInput = ({ chatHistoryQuery: { flatChatHistory }, dataSourceSize, dataSourcesQuery: { dataSourcesStatus }, + streamedChatState: [, setStreamedChat], } = useContext(RagChatContext); const [userInput, setUserInput] = useState(""); const { sessionId } = useParams({ strict: false }); - const [response, setResponse] = useState(""); const search: { question?: string } = useSearch({ strict: false, }); const inputRef = useRef(null); - // console.log(response); const { data: sampleQuestions, isPending: sampleQuestionsIsPending, @@ -90,10 +97,10 @@ const RagChatQueryInput = ({ }, }); - const streamChatMutation = useStreamChatMutation({ + const streamChatMutation = useChatMutationV2({ onChunk: (chunk) => { - console.log("stream chunk", chunk); - setResponse(() => chunk); + console.log("chunk", chunk); + setStreamedChat((prev) => prev + chunk); }, }); @@ -103,7 +110,7 @@ const RagChatQueryInput = ({ } }, [inputRef.current, flatChatHistory.length]); - const handleChat = (userInput: string) => { + const handleChat = async (userInput: string) => { if (userInput.trim().length <= 0) { return; } @@ -111,14 +118,9 @@ const RagChatQueryInput = ({ if (sessionId) { streamChatMutation.mutate({ query: userInput, - configuration: createQueryConfiguration(excludeKnowledgeBase), session_id: +sessionId, + configuration: createQueryConfiguration(excludeKnowledgeBase), }); - // chatMutation.mutate({ - // query: userInput, - // session_id: +sessionId, - // configuration: createQueryConfiguration(excludeKnowledgeBase), - // }); } else { newSessionCallback(userInput); } diff --git a/ui/src/pages/RagChatTab/State/RagChatContext.tsx b/ui/src/pages/RagChatTab/State/RagChatContext.tsx index c61ce7a8..9df858aa 100644 --- a/ui/src/pages/RagChatTab/State/RagChatContext.tsx +++ b/ui/src/pages/RagChatTab/State/RagChatContext.tsx @@ -59,6 +59,7 @@ export interface RagChatContextType { InfiniteQueryObserverResult> >; }; + streamedChatState: [string, Dispatch>]; dataSourcesQuery: { dataSources: DataSourceType[]; dataSourcesStatus?: "error" | "success" | "pending"; @@ -79,6 +80,7 @@ export const RagChatContext = createContext({ {} as InfiniteQueryObserverResult>, ), }, + streamedChatState: ["", () => null], dataSourcesQuery: { dataSources: [], dataSourcesStatus: undefined }, dataSourceSize: null, excludeKnowledgeBaseState: [false, () => null], From df0203af500693e001f5723e0d369b9c68716e57 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 6 May 2025 10:36:45 -0700 Subject: [PATCH 13/41] Consolidate response_id generation --- llm-service/app/services/chat.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/llm-service/app/services/chat.py b/llm-service/app/services/chat.py index 68181653..2d5c7b62 100644 --- a/llm-service/app/services/chat.py +++ b/llm-service/app/services/chat.py @@ -83,8 +83,10 @@ def v3_chat( use_summary_filter=session.query_configuration.enable_summary_filter, ) + response_id = str(uuid.uuid4()) + if configuration.exclude_knowledge_base or len(session.data_source_ids) == 0: - return stream_direct_llm_chat(session, query, user_name=user_name) + return stream_direct_llm_chat(session, response_id, query, user_name) total_data_sources_size: int = sum( map( @@ -93,9 +95,7 @@ def v3_chat( ) ) if total_data_sources_size == 0: - return stream_direct_llm_chat(session, query, user_name) - - response_id = str(uuid.uuid4()) + return stream_direct_llm_chat(session, response_id, query, user_name) new_chat_message: RagStudioChatMessage = _run_chat( session, response_id, query, query_configuration, user_name @@ -121,8 +121,10 @@ def v2_chat( use_summary_filter=session.query_configuration.enable_summary_filter, ) + response_id = str(uuid.uuid4()) + if configuration.exclude_knowledge_base or len(session.data_source_ids) == 0: - return direct_llm_chat(session, query, user_name=user_name) + return direct_llm_chat(session, response_id, query, user_name) total_data_sources_size: int = sum( map( @@ -131,9 +133,7 @@ def v2_chat( ) ) if total_data_sources_size == 0: - return direct_llm_chat(session, query, user_name) - - response_id = str(uuid.uuid4()) + return direct_llm_chat(session, response_id, query, user_name) new_chat_message: RagStudioChatMessage = _run_chat( session, response_id, query, query_configuration, user_name @@ -352,9 +352,8 @@ def process_response(response: str | None) -> list[str]: def direct_llm_chat( - session: Session, query: str, user_name: Optional[str] + session: Session, response_id: str, query: str, user_name: Optional[str] ) -> RagStudioChatMessage: - response_id = str(uuid.uuid4()) record_direct_llm_mlflow_run(response_id, session, user_name) chat_response = llm_completion.completion( @@ -378,9 +377,8 @@ def direct_llm_chat( def stream_direct_llm_chat( - session: Session, query: str, user_name: Optional[str] + session: Session, response_id: str, query: str, user_name: Optional[str] ) -> Generator[ChatResponse, None, None]: - response_id = str(uuid.uuid4()) record_direct_llm_mlflow_run(response_id, session, user_name) chat_response = llm_completion.stream_completion( From cb81801db28ee3bb855d6f3292946f26bc90ab5f Mon Sep 17 00:00:00 2001 From: jwatson Date: Tue, 6 May 2025 11:16:46 -0700 Subject: [PATCH 14/41] wip lastFile:ui/src/api/chatApi.ts --- .../app/routers/index/sessions/__init__.py | 6 ++--- ui/src/api/chatApi.ts | 27 +++++++++++++++---- .../Loaders/PendingRagOutputSkeleton.tsx | 2 +- .../FooterComponents/RagChatQueryInput.tsx | 10 +------ 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index 485270b9..42426c24 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -231,10 +231,10 @@ def generate_stream(): for response in v3_chat( session, request.query, configuration, user_name=origin_remote_user ): - print(response.delta) response_id = response.additional_kwargs["response_id"] - yield f"data: {response.delta}" + "\n\n" - yield f"data: {response_id}" + "\n\n" + json_delta = json.dumps({ "text": response.delta }) + yield f"data: {json_delta}" + "\n\n" + yield f'data: {"response_id" : {response_id}}\n\n' # kick off evals with full response # todo: write to history, start evals, rewrite question, log to mlfow once the response is done diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts index 4e3c1c78..e60aa4fd 100644 --- a/ui/src/api/chatApi.ts +++ b/ui/src/api/chatApi.ts @@ -53,10 +53,10 @@ import { } from "@tanstack/react-query"; import { suggestedQuestionKey } from "src/api/ragQueryApi.ts"; import { + EventSourceMessage, EventStreamContentType, fetchEventSource, } from "@microsoft/fetch-event-source"; -import { Dispatch, SetStateAction } from "react"; export interface SourceNode { node_id: string; @@ -434,7 +434,7 @@ export function useStreamChatMutation(options?: ChatMutationOptions) { // } else { // console.log("CHUNK", chunk); // } - console.log(parsedChunk); + // console.log(parsedChunk); fullResponse += parsedChunk.text; options?.onChunk?.(parsedChunk.text); } @@ -462,7 +462,11 @@ export const useChatMutationV2 = ({ appendPlaceholderToChatHistory(variables.query, cachedData), ); }, + onSettled: (data, error, variables) => { + console.log(`onSettled is here! with response id: ${data}`); + }, onSuccess: (data, variables) => { + console.log(`onSuccess is here! with response id: ${data}`); // queryClient.setQueryData>( // chatHistoryQueryKey({ // session_id: variables.request.session_id, @@ -507,8 +511,9 @@ export const useChatMutationV2 = ({ const chatMutationV2 = async ( request: ChatMutationRequest, onChunk: (chunk: string) => void, -): Promise => { +): Promise => { const ctrl = new AbortController(); + let responseId = ""; await fetchEventSource( `${llmServicePath}/sessions/${request.session_id.toString()}/stream-completion`, { @@ -521,8 +526,19 @@ const chatMutationV2 = async ( configuration: request.configuration, }), signal: ctrl.signal, - onmessage(msg) { - onChunk(msg.data); + onmessage(msg: EventSourceMessage) { + const data = JSON.parse(msg.data) as { + text?: string; + response_id?: string; + }; + + console.log(`data: "${msg.data}"`); + if (data.text) { + onChunk(data.text); + } + if (data.response_id) { + responseId = data.response_id; + } }, onclose() { console.log("Connection closed"); @@ -552,4 +568,5 @@ const chatMutationV2 = async ( }, }, ); + return responseId; }; diff --git a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx index 975dee79..9d824104 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx @@ -47,7 +47,7 @@ const PendingRagOutputSkeleton = ({ question }: { question: string }) => { const { streamedChatState: [streamedChat], } = useContext(RagChatContext); - console.log(streamedChat); + // console.log(streamedChat); return (
diff --git a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx index 65264e54..84719803 100644 --- a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx +++ b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx @@ -39,14 +39,11 @@ import { Button, Flex, Input, InputRef, Switch, Tooltip } from "antd"; import { DatabaseFilled, SendOutlined } from "@ant-design/icons"; import { useContext, useEffect, useRef, useState } from "react"; -import ndjson from "ndjson"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; import { - ChatMutationResponse, createQueryConfiguration, useChatMutation, useChatMutationV2, - useStreamChatMutation, } from "src/api/chatApi.ts"; import { useParams, useSearch } from "@tanstack/react-router"; import { cdlBlue600 } from "src/cuix/variables.ts"; @@ -54,11 +51,6 @@ import { cdlBlue600 } from "src/cuix/variables.ts"; import type { SwitchChangeEventHandler } from "antd/lib/switch"; import { useSuggestQuestions } from "src/api/ragQueryApi.ts"; import SuggestedQuestionsFooter from "pages/RagChatTab/FooterComponents/SuggestedQuestionsFooter.tsx"; -import { llmServicePath } from "src/api/utils.ts"; -import { - EventStreamContentType, - fetchEventSource, -} from "@microsoft/fetch-event-source"; const RagChatQueryInput = ({ newSessionCallback, @@ -99,7 +91,7 @@ const RagChatQueryInput = ({ const streamChatMutation = useChatMutationV2({ onChunk: (chunk) => { - console.log("chunk", chunk); + // console.log("chunk", chunk); setStreamedChat((prev) => prev + chunk); }, }); From 469d14b4180d71492e0b981cf03273fac2c5ba45 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 6 May 2025 11:28:11 -0700 Subject: [PATCH 15/41] drop databases lastFile:ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx --- ui/src/api/chatApi.ts | 5 ---- .../ChatOutput/ChatMessages/ChatMessage.tsx | 2 -- .../Loaders/PendingRagOutputSkeleton.tsx | 27 ++++++++++++------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts index e60aa4fd..6bc2e1b6 100644 --- a/ui/src/api/chatApi.ts +++ b/ui/src/api/chatApi.ts @@ -444,7 +444,6 @@ export function useStreamChatMutation(options?: ChatMutationOptions) { } export const useChatMutationV2 = ({ - onSuccess, onError, onChunk, }: UseMutationType & { onChunk: (msg: string) => void }) => { @@ -462,9 +461,6 @@ export const useChatMutationV2 = ({ appendPlaceholderToChatHistory(variables.query, cachedData), ); }, - onSettled: (data, error, variables) => { - console.log(`onSettled is here! with response id: ${data}`); - }, onSuccess: (data, variables) => { console.log(`onSuccess is here! with response id: ${data}`); // queryClient.setQueryData>( @@ -532,7 +528,6 @@ const chatMutationV2 = async ( response_id?: string; }; - console.log(`data: "${msg.data}"`); if (data.text) { onChunk(data.text); } diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx index 988b8b52..bf4fab20 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx @@ -50,8 +50,6 @@ import Markdown from "react-markdown"; import "../tableMarkdown.css"; import { ExclamationCircleTwoTone } from "@ant-design/icons"; -import { useContext } from "react"; -import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; const isError = (data: ChatMessageType) => { return data.id.startsWith("error-"); diff --git a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx index 9d824104..8d7795eb 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx @@ -36,7 +36,7 @@ * DATA. ******************************************************************************/ -import { Divider, Row, Skeleton, Typography } from "antd"; +import { Divider, Flex, Row, Skeleton, Typography } from "antd"; import UserQuestion from "pages/RagChatTab/ChatOutput/ChatMessages/UserQuestion.tsx"; import { useContext } from "react"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; @@ -53,15 +53,22 @@ const PendingRagOutputSkeleton = ({ question }: { question: string }) => {
{streamedChat ? ( - - - {streamedChat} - - + + + + {streamedChat} + + + ) : ( From 9c5a00838de6d6a9cb37934952ad1813334a469b Mon Sep 17 00:00:00 2001 From: Baasit Sharief Date: Tue, 6 May 2025 11:42:10 -0700 Subject: [PATCH 16/41] mob next [ci-skip] [ci skip] [skip ci] lastFile:ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx --- llm-service/app/routers/index/sessions/__init__.py | 2 +- .../Loaders/PendingRagOutputSkeleton.tsx | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index 42426c24..cf414ff3 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -234,7 +234,7 @@ def generate_stream(): response_id = response.additional_kwargs["response_id"] json_delta = json.dumps({ "text": response.delta }) yield f"data: {json_delta}" + "\n\n" - yield f'data: {"response_id" : {response_id}}\n\n' + yield f'data: {{"response_id" : {response_id}}}\n\n' # kick off evals with full response # todo: write to history, start evals, rewrite question, log to mlfow once the response is done diff --git a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx index 8d7795eb..d5d8a575 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx @@ -42,6 +42,8 @@ import { useContext } from "react"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; import Markdown from "react-markdown"; import Remark from "remark-gfm"; +import Images from "src/components/images/Images.ts"; +import { cdlBlue500 } from "src/cuix/variables.ts"; const PendingRagOutputSkeleton = ({ question }: { question: string }) => { const { @@ -59,6 +61,18 @@ const PendingRagOutputSkeleton = ({ question }: { question: string }) => { justify="space-between" gap={8} > +
+ +
Date: Tue, 6 May 2025 11:44:47 -0700 Subject: [PATCH 17/41] mob next [ci-skip] [ci skip] [skip ci] lastFile:llm-service/app/routers/index/sessions/__init__.py --- llm-service/app/routers/index/sessions/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index cf414ff3..10be6383 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -234,7 +234,7 @@ def generate_stream(): response_id = response.additional_kwargs["response_id"] json_delta = json.dumps({ "text": response.delta }) yield f"data: {json_delta}" + "\n\n" - yield f'data: {{"response_id" : {response_id}}}\n\n' + yield f'data: {{"response_id" : "{response_id}"}}\n\n' # kick off evals with full response # todo: write to history, start evals, rewrite question, log to mlfow once the response is done From 603ae739befe5f34af8958d011281977246eb2e2 Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Tue, 6 May 2025 12:46:07 -0600 Subject: [PATCH 18/41] small refactor --- ui/src/api/chatApi.ts | 49 +----------- .../ChatOutput/ChatMessages/ChatMessage.tsx | 80 ++++++++++--------- .../Loaders/PendingRagOutputSkeleton.tsx | 72 ++++++----------- 3 files changed, 67 insertions(+), 134 deletions(-) diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts index 6bc2e1b6..675aa998 100644 --- a/ui/src/api/chatApi.ts +++ b/ui/src/api/chatApi.ts @@ -108,7 +108,7 @@ export interface ChatResponseFeedback { rating: boolean; } -const placeholderChatResponseId = "placeholder"; +export const placeholderChatResponseId = "placeholder"; export const isPlaceholder = (chatMessage: ChatMessageType): boolean => { return chatMessage.id === placeholderChatResponseId; @@ -390,59 +390,12 @@ const feedbackMutation = async ({ ); }; -interface ChatMutationOptions { - onChunk?: (chunk: string) => void; -} - export interface ChatMutationResponse { text: string; response_id: string; done: boolean; } -export function useStreamChatMutation(options?: ChatMutationOptions) { - return useMutation({ - mutationKey: [MutationKeys.streamChatMutation], - mutationFn: async ({ - query, - configuration, - session_id, - }: ChatMutationRequest) => { - const res = await fetch( - `${llmServicePath}/sessions/${session_id.toString()}/stream-completion`, - { - method: "POST", - body: JSON.stringify({ query, configuration }), - headers: { "Content-Type": "application/json" }, - }, - ); - if (!res.body) throw new Error("Error getting stream completion"); - const reader = res.body.getReader(); - const decoder = new TextDecoder(); - let fullResponse = ""; - let done = false; - while (!done) { - const { value, done: doneReading } = await reader.read(); - done = doneReading; - // do we need the fallback? - const chunk = decoder.decode(value ?? new Uint8Array(), { - stream: true, - }); - const parsedChunk = JSON.parse(chunk) as ChatMutationResponse; - // if (doneReading) { - // console.log("HELLO"); - // } else { - // console.log("CHUNK", chunk); - // } - // console.log(parsedChunk); - fullResponse += parsedChunk.text; - options?.onChunk?.(parsedChunk.text); - } - return fullResponse; - }, - }); -} - export const useChatMutationV2 = ({ onError, onChunk, diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx index bf4fab20..f4e5fa79 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx @@ -55,44 +55,7 @@ const isError = (data: ChatMessageType) => { return data.id.startsWith("error-"); }; -const ChatMessage = ({ data }: { data: ChatMessageType }) => { - if (isError(data)) { - return ( -
-
- - -
- -
- - - - - -
- -
-
- ); - } - - if (isPlaceholder(data)) { - return ; - } - +export const ChatMessageBody = ({ data }: { data: ChatMessageType }) => { return (
{data.rag_message.user ? ( @@ -153,4 +116,45 @@ const ChatMessage = ({ data }: { data: ChatMessageType }) => { ); }; +const ChatMessage = ({ data }: { data: ChatMessageType }) => { + if (isError(data)) { + return ( +
+
+ + +
+ +
+ + + + + +
+ +
+
+ ); + } + + if (isPlaceholder(data)) { + return ; + } + + return ; +}; + export default ChatMessage; diff --git a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx index d5d8a575..e3395d4f 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx @@ -36,61 +36,37 @@ * DATA. ******************************************************************************/ -import { Divider, Flex, Row, Skeleton, Typography } from "antd"; -import UserQuestion from "pages/RagChatTab/ChatOutput/ChatMessages/UserQuestion.tsx"; +import { Row, Skeleton } from "antd"; import { useContext } from "react"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; -import Markdown from "react-markdown"; -import Remark from "remark-gfm"; -import Images from "src/components/images/Images.ts"; -import { cdlBlue500 } from "src/cuix/variables.ts"; +import { ChatMessageBody } from "pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx"; +import { ChatMessageType, placeholderChatResponseId } from "src/api/chatApi.ts"; const PendingRagOutputSkeleton = ({ question }: { question: string }) => { const { streamedChatState: [streamedChat], } = useContext(RagChatContext); - // console.log(streamedChat); - return ( -
-
- - {streamedChat ? ( - -
- -
- - - {streamedChat} - - -
- ) : ( - - - - )} -
- -
+ + const streamedMessage: ChatMessageType | undefined = streamedChat + ? { + id: placeholderChatResponseId, + session_id: 0, + source_nodes: [], + rag_message: { + user: question, + assistant: streamedChat, + }, + evaluations: [], + timestamp: Date.now(), + } + : undefined; + + return streamedMessage ? ( + + ) : ( + + + ); }; From 8984ddbe6eacef5c43612b6eb3cb577aa73120d4 Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Tue, 6 May 2025 13:16:22 -0600 Subject: [PATCH 19/41] remove deps --- ui/package.json | 2 -- ui/pnpm-lock.yaml | 80 ----------------------------------------------- 2 files changed, 82 deletions(-) diff --git a/ui/package.json b/ui/package.json index 16dc2366..e6dead99 100644 --- a/ui/package.json +++ b/ui/package.json @@ -30,7 +30,6 @@ "antd": "^5.24.6", "date-fns": "^4.1.0", "lodash": "^4.17.21", - "ndjson": "^2.0.0", "react": "^18.3.1", "react-dom": "^18.3.1", "react-intersection-observer": "^9.16.0", @@ -50,7 +49,6 @@ "@testing-library/user-event": "^14.5.2", "@types/eslint__js": "^8.42.3", "@types/lodash": "^4.17.13", - "@types/ndjson": "^2.0.4", "@types/node": "^22.9.0", "@types/react": "^18.3.12", "@types/react-dom": "^18.3.1", diff --git a/ui/pnpm-lock.yaml b/ui/pnpm-lock.yaml index a29c8bc6..92a15259 100644 --- a/ui/pnpm-lock.yaml +++ b/ui/pnpm-lock.yaml @@ -44,9 +44,6 @@ importers: lodash: specifier: ^4.17.21 version: 4.17.21 - ndjson: - specifier: ^2.0.0 - version: 2.0.0 react: specifier: ^18.3.1 version: 18.3.1 @@ -99,9 +96,6 @@ importers: '@types/lodash': specifier: ^4.17.13 version: 4.17.13 - '@types/ndjson': - specifier: ^2.0.4 - version: 2.0.4 '@types/node': specifier: ^22.9.0 version: 22.9.0 @@ -1451,9 +1445,6 @@ packages: '@types/ms@0.7.34': resolution: {integrity: sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g==} - '@types/ndjson@2.0.4': - resolution: {integrity: sha512-ajAl7AjhFstF6waORYNSS49GL5iBKisqJlgvXuprXFKCX9fto4ordlNU3+XMgkMddgeR0WoQQBmKUk0v0dJ4pw==} - '@types/node@22.9.0': resolution: {integrity: sha512-vuyHg81vvWA1Z1ELfvLko2c8f34gyA0zaic0+Rllc5lbCnbSyuvb2Oxpm6TAUAC/2xZN3QGqxBNggD1nNR2AfQ==} @@ -1483,9 +1474,6 @@ packages: '@types/swagger-ui-react@5.18.0': resolution: {integrity: sha512-c2M9adVG7t28t1pq19K9Jt20VLQf0P/fwJwnfcmsVVsdkwCWhRmbKDu+tIs0/NGwJ/7GY8lBx+iKZxuDI5gDbw==} - '@types/through@0.0.33': - resolution: {integrity: sha512-HsJ+z3QuETzP3cswwtzt2vEIiHBk/dCcHGhbmG5X3ecnwFD/lPrMpliGXxSCg03L9AhrdwA4Oz/qfspkDW+xGQ==} - '@types/trusted-types@2.0.7': resolution: {integrity: sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==} @@ -2681,9 +2669,6 @@ packages: json-stable-stringify-without-jsonify@1.0.1: resolution: {integrity: sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==} - json-stringify-safe@5.0.1: - resolution: {integrity: sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==} - json2mq@0.2.0: resolution: {integrity: sha512-SzoRg7ux5DWTII9J2qkrZrqV1gt+rTaoufMxEzXbS26Uid0NwaJd123HcoB80TgubEppxxIGdNxCx50fEoEWQA==} @@ -2940,9 +2925,6 @@ packages: resolution: {integrity: sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==} engines: {node: '>=16 || 14 >=14.17'} - minimist@1.2.8: - resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==} - ms@2.1.3: resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} @@ -2954,11 +2936,6 @@ packages: natural-compare@1.4.0: resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} - ndjson@2.0.0: - resolution: {integrity: sha512-nGl7LRGrzugTtaFcJMhLbpzJM6XdivmbkdlaGcrk/LXg2KL/YBC6z1g70xh0/al+oFuVFP8N8kiWRucmeEH/qQ==} - engines: {node: '>=10'} - hasBin: true - neotraverse@0.6.18: resolution: {integrity: sha512-Z4SmBUweYa09+o6pG+eASabEpP6QkQ70yHj351pQoEXIs8uHbaU2DWVmzBANKgflPa47A50PtB2+NgRpQvr7vA==} engines: {node: '>= 10'} @@ -3524,10 +3501,6 @@ packages: resolution: {integrity: sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==} engines: {node: '>=0.10.0'} - readable-stream@3.6.2: - resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==} - engines: {node: '>= 6'} - readdirp@3.6.0: resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==} engines: {node: '>=8.10.0'} @@ -3730,9 +3703,6 @@ packages: space-separated-tokens@2.0.2: resolution: {integrity: sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==} - split2@3.2.2: - resolution: {integrity: sha512-9NThjpgZnifTkJpzTZ7Eue85S49QwpNhZTq6GRJwObb6jnLFNGB7Qm73V5HewTROPyxD0C29xqmaI68bQtV+hg==} - sprintf-js@1.0.3: resolution: {integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==} @@ -3771,9 +3741,6 @@ packages: resolution: {integrity: sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg==} engines: {node: '>= 0.4'} - string_decoder@1.3.0: - resolution: {integrity: sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==} - stringify-entities@4.0.4: resolution: {integrity: sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==} @@ -3836,9 +3803,6 @@ packages: resolution: {integrity: sha512-B71/4oyj61iNH0KeCamLuE2rmKuTO5byTOSVwECM5FA7TiAiAW+UqTKZ9ERueC4qvgSttUhdmq1mXC3kJqGX7A==} engines: {node: '>=12.22'} - through2@4.0.2: - resolution: {integrity: sha512-iOqSav00cVxEEICeD7TjLB1sueEL+81Wpzp2bY17uZjZN0pWZPuo4suZ/61VujxmqSGFfgOcNuTZ85QJwNZQpw==} - tiny-invariant@1.3.3: resolution: {integrity: sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==} @@ -4031,9 +3995,6 @@ packages: peerDependencies: react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 - util-deprecate@1.0.2: - resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} - uuid@11.0.5: resolution: {integrity: sha512-508e6IcKLrhxKdBbcA2b4KQZlLVp2+J5UwQ6F7Drckkc5N9ZJwFa4TgWtsww9UG8fGHbm6gbV19TdM5pQ4GaIA==} hasBin: true @@ -5723,11 +5684,6 @@ snapshots: '@types/ms@0.7.34': {} - '@types/ndjson@2.0.4': - dependencies: - '@types/node': 22.9.0 - '@types/through': 0.0.33 - '@types/node@22.9.0': dependencies: undici-types: 6.19.8 @@ -5759,10 +5715,6 @@ snapshots: dependencies: '@types/react': 18.3.12 - '@types/through@0.0.33': - dependencies: - '@types/node': 22.9.0 - '@types/trusted-types@2.0.7': optional: true @@ -7170,8 +7122,6 @@ snapshots: json-stable-stringify-without-jsonify@1.0.1: {} - json-stringify-safe@5.0.1: {} - json2mq@0.2.0: dependencies: string-convert: 0.2.1 @@ -7650,22 +7600,12 @@ snapshots: dependencies: brace-expansion: 2.0.1 - minimist@1.2.8: {} - ms@2.1.3: {} nanoid@3.3.11: {} natural-compare@1.4.0: {} - ndjson@2.0.0: - dependencies: - json-stringify-safe: 5.0.1 - minimist: 1.2.8 - readable-stream: 3.6.2 - split2: 3.2.2 - through2: 4.0.2 - neotraverse@0.6.18: {} no-case@3.0.4: @@ -8315,12 +8255,6 @@ snapshots: dependencies: loose-envify: 1.4.0 - readable-stream@3.6.2: - dependencies: - inherits: 2.0.4 - string_decoder: 1.3.0 - util-deprecate: 1.0.2 - readdirp@3.6.0: dependencies: picomatch: 2.3.1 @@ -8569,10 +8503,6 @@ snapshots: space-separated-tokens@2.0.2: {} - split2@3.2.2: - dependencies: - readable-stream: 3.6.2 - sprintf-js@1.0.3: {} stackback@0.0.2: {} @@ -8628,10 +8558,6 @@ snapshots: define-properties: 1.2.1 es-object-atoms: 1.0.0 - string_decoder@1.3.0: - dependencies: - safe-buffer: 5.2.1 - stringify-entities@4.0.4: dependencies: character-entities-html4: 2.1.0 @@ -8740,10 +8666,6 @@ snapshots: throttle-debounce@5.0.2: {} - through2@4.0.2: - dependencies: - readable-stream: 3.6.2 - tiny-invariant@1.3.3: {} tiny-warning@1.0.3: {} @@ -8944,8 +8866,6 @@ snapshots: dependencies: react: 18.3.1 - util-deprecate@1.0.2: {} - uuid@11.0.5: {} vfile-message@4.0.2: From cfae208099884f8d2ab968b84a80cc3d75482099 Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Tue, 6 May 2025 14:03:18 -0600 Subject: [PATCH 20/41] things are getting close --- ui/src/api/chatApi.ts | 49 ++++++++++--------- .../ChatMessages/ChatMessageController.tsx | 12 +++-- .../Loaders/PendingRagOutputSkeleton.tsx | 4 +- .../FooterComponents/RagChatQueryInput.tsx | 18 +++---- 4 files changed, 45 insertions(+), 38 deletions(-) diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts index 675aa998..414fdaf0 100644 --- a/ui/src/api/chatApi.ts +++ b/ui/src/api/chatApi.ts @@ -57,6 +57,7 @@ import { EventStreamContentType, fetchEventSource, } from "@microsoft/fetch-event-source"; +import messageQueue from "src/utils/messageQueue.ts"; export interface SourceNode { node_id: string; @@ -393,11 +394,11 @@ const feedbackMutation = async ({ export interface ChatMutationResponse { text: string; response_id: string; - done: boolean; } export const useChatMutationV2 = ({ onError, + onSuccess, onChunk, }: UseMutationType & { onChunk: (msg: string) => void }) => { const queryClient = useQueryClient(); @@ -414,20 +415,30 @@ export const useChatMutationV2 = ({ appendPlaceholderToChatHistory(variables.query, cachedData), ); }, - onSuccess: (data, variables) => { - console.log(`onSuccess is here! with response id: ${data}`); - // queryClient.setQueryData>( - // chatHistoryQueryKey({ - // session_id: variables.request.session_id, - // }), - // (cachedData) => replacePlaceholderInChatHistory(data, cachedData), - // ); - queryClient - .invalidateQueries({ - queryKey: suggestedQuestionKey(variables.session_id), + onSuccess: (messageId, variables) => { + fetch( + `${llmServicePath}/sessions/${variables.session_id.toString()}/chat-history/${messageId}`, + ) + .then(async (res) => { + const message = (await res.json()) as ChatMessageType; + queryClient.setQueryData>( + chatHistoryQueryKey({ + session_id: variables.session_id, + }), + (cachedData) => + replacePlaceholderInChatHistory(message, cachedData), + ); + queryClient + .invalidateQueries({ + queryKey: suggestedQuestionKey(variables.session_id), + }) + .catch((error: unknown) => { + console.error(error); + }); + onSuccess?.(message); }) - .catch((error: unknown) => { - console.error(error); + .catch(() => { + messageQueue.error("An error occurred fetching the chat message"); }); }, onError: (error: Error, variables) => { @@ -476,10 +487,7 @@ const chatMutationV2 = async ( }), signal: ctrl.signal, onmessage(msg: EventSourceMessage) { - const data = JSON.parse(msg.data) as { - text?: string; - response_id?: string; - }; + const data = JSON.parse(msg.data) as ChatMutationResponse; if (data.text) { onChunk(data.text); @@ -488,9 +496,6 @@ const chatMutationV2 = async ( responseId = data.response_id; } }, - onclose() { - console.log("Connection closed"); - }, onerror(err) { console.error("Error", err); ctrl.abort(); @@ -501,8 +506,6 @@ const chatMutationV2 = async ( response.headers.get("content-type")?.includes(EventStreamContentType) ) { await Promise.resolve(); - console.log("all good"); - return; // everything's good } else if ( response.status >= 400 && response.status < 500 && diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx index 5ddaccc9..82d47f59 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx @@ -144,7 +144,7 @@ const ChatMessageController = () => { ) { setTimeout(() => { if (bottomElement.current) { - bottomElement.current.scrollIntoView({ behavior: "auto" }); + bottomElement.current.scrollTop = 0; } }, 50); } @@ -181,18 +181,24 @@ const ChatMessageController = () => {
{isFetchingPreviousPage && } {flatChatHistory.map((historyMessage, index) => { + const isLast = index === flatChatHistory.length - 1; // trigger fetching on second to la`st item if (index === 2) { return (
+ {isLast &&
}
); } - return ; + return ( +
+ {isLast &&
} + +
+ ); })} -
); }; diff --git a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx index e3395d4f..ecbb5ac3 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx @@ -62,7 +62,9 @@ const PendingRagOutputSkeleton = ({ question }: { question: string }) => { : undefined; return streamedMessage ? ( - +
+ +
) : ( diff --git a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx index 84719803..e73b5481 100644 --- a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx +++ b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx @@ -42,7 +42,6 @@ import { useContext, useEffect, useRef, useState } from "react"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; import { createQueryConfiguration, - useChatMutation, useChatMutationV2, } from "src/api/chatApi.ts"; import { useParams, useSearch } from "@tanstack/react-router"; @@ -83,17 +82,14 @@ const RagChatQueryInput = ({ !search.question, ); - const chatMutation = useChatMutation({ - onSuccess: () => { - setUserInput(""); - }, - }); - const streamChatMutation = useChatMutationV2({ onChunk: (chunk) => { - // console.log("chunk", chunk); setStreamedChat((prev) => prev + chunk); }, + onSuccess: () => { + setUserInput(""); + setStreamedChat(""); + }, }); useEffect(() => { @@ -102,7 +98,7 @@ const RagChatQueryInput = ({ } }, [inputRef.current, flatChatHistory.length]); - const handleChat = async (userInput: string) => { + const handleChat = (userInput: string) => { if (userInput.trim().length <= 0) { return; } @@ -167,7 +163,7 @@ const RagChatQueryInput = ({ /> } - disabled={chatMutation.isPending} + disabled={streamChatMutation.isPending} />
); } - + if (isLast) { + console.log(historyMessage.id); + } return ( -
- {isLast &&
} +
); diff --git a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx index ecbb5ac3..105ae901 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx @@ -36,40 +36,29 @@ * DATA. ******************************************************************************/ -import { Row, Skeleton } from "antd"; import { useContext } from "react"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; -import { ChatMessageBody } from "pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx"; import { ChatMessageType, placeholderChatResponseId } from "src/api/chatApi.ts"; +import { ChatMessageBody } from "pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx"; const PendingRagOutputSkeleton = ({ question }: { question: string }) => { const { streamedChatState: [streamedChat], } = useContext(RagChatContext); - const streamedMessage: ChatMessageType | undefined = streamedChat - ? { - id: placeholderChatResponseId, - session_id: 0, - source_nodes: [], - rag_message: { - user: question, - assistant: streamedChat, - }, - evaluations: [], - timestamp: Date.now(), - } - : undefined; + const streamedMessage: ChatMessageType = { + id: placeholderChatResponseId, + session_id: 0, + source_nodes: [], + rag_message: { + user: question, + assistant: streamedChat, + }, + evaluations: [], + timestamp: Date.now(), + }; - return streamedMessage ? ( -
- -
- ) : ( - - - - ); + return ; }; export default PendingRagOutputSkeleton; diff --git a/ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx b/ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx index 02e1799c..51467897 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx @@ -36,15 +36,23 @@ * DATA. ******************************************************************************/ -import { Flex, Typography } from "antd"; +import { Flex, Skeleton, Typography } from "antd"; import { SourceCard } from "pages/RagChatTab/ChatOutput/Sources/SourceCard.tsx"; -import { ChatMessageType } from "src/api/chatApi.ts"; +import { ChatMessageType, isPlaceholder } from "src/api/chatApi.ts"; import { WarningTwoTone } from "@ant-design/icons"; import { cdlOrange050, cdlOrange500 } from "src/cuix/variables.ts"; import { useGetModelById } from "src/api/modelsApi.ts"; import { useContext } from "react"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; +const SkeletonNode = () => { + return ( + + ); +}; const SourceNodes = ({ data }: { data: ChatMessageType }) => { const { data: inferenceModel } = useGetModelById(data.inference_model); const { activeSession } = useContext(RagChatContext); @@ -53,6 +61,17 @@ const SourceNodes = ({ data }: { data: ChatMessageType }) => { )); + if (isPlaceholder(data)) { + return ( + + + + + + + ); + } + if ( nodes.length === 0 && activeSession && From 8c8f1a82230283c31d4250a225f2dcb90e4407e5 Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Tue, 6 May 2025 16:55:12 -0600 Subject: [PATCH 28/41] only show loading nodes if kb --- .../ChatOutput/ChatMessages/ChatMessageController.tsx | 2 -- ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx | 6 +++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx index 21a0ccd5..fb1c5a29 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx @@ -138,10 +138,8 @@ const ChatMessageController = () => { flatChatHistory.length > 0 && isPlaceholder(flatChatHistory[flatChatHistory.length - 1]) ) { - console.log("placeholder useeffect"); bottomElement.current.scrollIntoView({ behavior: "smooth" }); } else { - console.log("initial useeffect"); bottomElement.current.scrollIntoView({ behavior: "auto" }); } } diff --git a/ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx b/ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx index 51467897..8f355a90 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx @@ -61,7 +61,11 @@ const SourceNodes = ({ data }: { data: ChatMessageType }) => { )); - if (isPlaceholder(data)) { + if ( + isPlaceholder(data) && + activeSession && + activeSession.dataSourceIds.length > 0 + ) { return ( From a06c03068f52255590f3eb72044f460bc65de734 Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Tue, 6 May 2025 16:56:42 -0600 Subject: [PATCH 29/41] remove unused --- ui/src/api/chatApi.ts | 69 ------------------------------------------- 1 file changed, 69 deletions(-) diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts index 414fdaf0..1a8c2c67 100644 --- a/ui/src/api/chatApi.ts +++ b/ui/src/api/chatApi.ts @@ -259,75 +259,6 @@ export const replacePlaceholderInChatHistory = ( }; }; -export const useChatMutation = ({ - onSuccess, - onError, -}: UseMutationType) => { - const queryClient = useQueryClient(); - return useMutation({ - mutationKey: [MutationKeys.chatMutation], - mutationFn: chatMutation, - onMutate: (variables) => { - queryClient.setQueryData>( - chatHistoryQueryKey({ - session_id: variables.session_id, - }), - (cachedData) => - appendPlaceholderToChatHistory(variables.query, cachedData), - ); - }, - onSuccess: (data, variables) => { - queryClient.setQueryData>( - chatHistoryQueryKey({ - session_id: variables.session_id, - }), - (cachedData) => replacePlaceholderInChatHistory(data, cachedData), - ); - queryClient - .invalidateQueries({ - queryKey: suggestedQuestionKey(variables.session_id), - }) - .catch((error: unknown) => { - console.error(error); - }); - onSuccess?.(data); - }, - onError: (error: Error, variables) => { - const uuid = crypto.randomUUID(); - const errorMessage: ChatMessageType = { - id: `error-${uuid}`, - session_id: variables.session_id, - source_nodes: [], - rag_message: { - user: variables.query, - assistant: error.message, - }, - evaluations: [], - timestamp: Date.now(), - }; - queryClient.setQueryData>( - chatHistoryQueryKey({ - session_id: variables.session_id, - offset: 0, - }), - (cachedData) => - replacePlaceholderInChatHistory(errorMessage, cachedData), - ); - - onError?.(error); - }, - }); -}; - -const chatMutation = async ( - request: ChatMutationRequest, -): Promise => { - return await postRequest( - `${llmServicePath}/sessions/${request.session_id.toString()}/chat`, - request, - ); -}; - export const createQueryConfiguration = ( excludeKnowledgeBase: boolean, ): QueryConfiguration => { From 4e0fc1f7603bd81e5efc4d37db312e4f5e63d153 Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Tue, 6 May 2025 17:01:40 -0600 Subject: [PATCH 30/41] removing active loading state --- ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx b/ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx index 8f355a90..92eff03f 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx @@ -46,12 +46,7 @@ import { useContext } from "react"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; const SkeletonNode = () => { - return ( - - ); + return ; }; const SourceNodes = ({ data }: { data: ChatMessageType }) => { const { data: inferenceModel } = useGetModelById(data.inference_model); From 9b4087b5de38d974b527e4293036ee84f6f69069 Mon Sep 17 00:00:00 2001 From: jwatson Date: Tue, 6 May 2025 16:09:09 -0700 Subject: [PATCH 31/41] fix mypy issues --- llm-service/app/routers/index/sessions/__init__.py | 4 ++-- llm-service/app/services/chat.py | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index 10be6383..9712b6ff 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -38,7 +38,7 @@ import base64 import json import logging -from typing import Optional +from typing import Optional, Generator from fastapi import APIRouter, Header, HTTPException from fastapi.responses import StreamingResponse @@ -226,7 +226,7 @@ def stream_chat_completion( session = session_metadata_api.get_session(session_id, user_name=origin_remote_user) configuration = request.configuration or RagPredictConfiguration() - def generate_stream(): + def generate_stream() -> Generator[str, None, None]: response_id: str = "" for response in v3_chat( session, request.query, configuration, user_name=origin_remote_user diff --git a/llm-service/app/services/chat.py b/llm-service/app/services/chat.py index e68f6da2..059365bb 100644 --- a/llm-service/app/services/chat.py +++ b/llm-service/app/services/chat.py @@ -1,4 +1,4 @@ -# ############################################################################## +# ########################################################################## # CLOUDERA APPLIED MACHINE LEARNING PROTOTYPE (AMP) # (C) Cloudera, Inc. 2024 # All rights reserved. @@ -161,12 +161,13 @@ def _run_streaming_chat( ) response: ChatResponse = ChatResponse(message=ChatMessage(content=query)) - for response in streaming_chat_response.chat_stream: - response.additional_kwargs["response_id"] = response_id - yield response + if streaming_chat_response.chat_stream: + for response in streaming_chat_response.chat_stream: + response.additional_kwargs["response_id"] = response_id + yield response chat_response = AgentChatResponse( - response=response.message.content, + response=response.message.content or "", sources=streaming_chat_response.sources, source_nodes=streaming_chat_response.source_nodes, ) @@ -201,6 +202,7 @@ def _run_streaming_chat( ) + def _run_chat( session: Session, response_id: str, @@ -455,7 +457,7 @@ def stream_direct_llm_chat( evaluations=[], rag_message=RagMessage( user=query, - assistant=response.message.content, + assistant=response.message.content or "", ), timestamp=time.time(), condensed_question=None, From a66d48961581aedbaf914dd931ef93d11f83808e Mon Sep 17 00:00:00 2001 From: jwatson Date: Tue, 6 May 2025 16:12:34 -0700 Subject: [PATCH 32/41] ruff --- llm-service/app/services/llm_completion.py | 1 - 1 file changed, 1 deletion(-) diff --git a/llm-service/app/services/llm_completion.py b/llm-service/app/services/llm_completion.py index b532e3be..922ea586 100644 --- a/llm-service/app/services/llm_completion.py +++ b/llm-service/app/services/llm_completion.py @@ -41,7 +41,6 @@ from llama_index.core.base.llms.types import ( ChatMessage, ChatResponse, - ChatResponseGen, ) from llama_index.core.llms import LLM From feacd5af5b828d4d9b38ef42d70f38ac5bb6172e Mon Sep 17 00:00:00 2001 From: actions-user Date: Tue, 6 May 2025 23:16:54 +0000 Subject: [PATCH 33/41] Update release version to dev-testing --- scripts/release_version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/release_version.txt b/scripts/release_version.txt index 114c45f6..32150ea1 100644 --- a/scripts/release_version.txt +++ b/scripts/release_version.txt @@ -1 +1 @@ -export RELEASE_TAG=1.17.0 +export RELEASE_TAG=dev-testing From 87fd117217f5d8302a229fa4ef457a961d37be8b Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Tue, 6 May 2025 17:26:07 -0600 Subject: [PATCH 34/41] handle file not found error for summaries when local --- .../app/ai/indexing/summary_indexer.py | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/llm-service/app/ai/indexing/summary_indexer.py b/llm-service/app/ai/indexing/summary_indexer.py index 9189f504..7d1290df 100644 --- a/llm-service/app/ai/indexing/summary_indexer.py +++ b/llm-service/app/ai/indexing/summary_indexer.py @@ -218,13 +218,17 @@ def create_storage_context( @classmethod def get_all_data_source_summaries(cls) -> dict[str, str]: root_dir = cls.__persist_root_dir() - # if not os.path.exists(root_dir): - # return {} - storage_context = SummaryIndexer.create_storage_context( - persist_dir=root_dir, - vector_store=SimpleVectorStore(), - ) - indices = load_indices_from_storage(storage_context=storage_context, index_ids=None, + try: + storage_context = SummaryIndexer.create_storage_context( + persist_dir=root_dir, + vector_store=SimpleVectorStore(), + ) + except FileNotFoundError: + # If the directory doesn't exist, we don't have any summaries. + return {} + indices = load_indices_from_storage( + storage_context=storage_context, + index_ids=None, **{ "llm": models.LLM.get_noop(), "response_synthesizer": models.LLM.get_noop(), @@ -234,11 +238,13 @@ def get_all_data_source_summaries(cls) -> dict[str, str]: "summary_query": "None", "data_source_id": 0, }, - ) + ) if len(indices) == 0: return {} - global_summary_store: DocumentSummaryIndex = cast(DocumentSummaryIndex, indices[0]) + global_summary_store: DocumentSummaryIndex = cast( + DocumentSummaryIndex, indices[0] + ) summary_ids = global_summary_store.index_struct.doc_id_to_summary_id.values() nodes = global_summary_store.docstore.get_nodes(list(summary_ids)) From 3cecc224a3b54d757d98f376e7b77bb213860640 Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Tue, 6 May 2025 17:27:07 -0600 Subject: [PATCH 35/41] remove log --- .../ChatOutput/ChatMessages/ChatMessageController.tsx | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx index fb1c5a29..9ea76106 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx @@ -177,7 +177,7 @@ const ChatMessageController = () => { {isFetchingPreviousPage && } {flatChatHistory.map((historyMessage, index) => { const isLast = index === flatChatHistory.length - 1; - // trigger fetching on second to la`st item + // trigger fetching on second to last item if (index === 2) { return (
@@ -186,9 +186,7 @@ const ChatMessageController = () => {
); } - if (isLast) { - console.log(historyMessage.id); - } + return (
Date: Wed, 7 May 2025 08:52:38 -0600 Subject: [PATCH 36/41] renaming --- ui/src/api/chatApi.ts | 6 +++--- .../ChatOutput/ChatMessages/ChatMessageController.tsx | 4 ++-- .../ChatOutput/Placeholders/SuggestedQuestionsCards.tsx | 4 ++-- .../pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts index 1a8c2c67..83b4a461 100644 --- a/ui/src/api/chatApi.ts +++ b/ui/src/api/chatApi.ts @@ -327,7 +327,7 @@ export interface ChatMutationResponse { response_id: string; } -export const useChatMutationV2 = ({ +export const useStreamingChatMutation = ({ onError, onSuccess, onChunk, @@ -336,7 +336,7 @@ export const useChatMutationV2 = ({ return useMutation({ mutationKey: [MutationKeys.chatMutation], mutationFn: (request: ChatMutationRequest) => - chatMutationV2(request, onChunk), + streamChatMutation(request, onChunk), onMutate: (variables) => { queryClient.setQueryData>( chatHistoryQueryKey({ @@ -399,7 +399,7 @@ export const useChatMutationV2 = ({ }); }; -const chatMutationV2 = async ( +const streamChatMutation = async ( request: ChatMutationRequest, onChunk: (chunk: string) => void, ): Promise => { diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx index 9ea76106..7797c545 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.tsx @@ -50,7 +50,7 @@ import messageQueue from "src/utils/messageQueue.ts"; import { createQueryConfiguration, isPlaceholder, - useChatMutationV2, + useStreamingChatMutation, } from "src/api/chatApi.ts"; import { useRenameNameMutation } from "src/api/sessionApi.ts"; import NoDataSourcesState from "pages/RagChatTab/ChatOutput/Placeholders/NoDataSourcesState.tsx"; @@ -79,7 +79,7 @@ const ChatMessageController = () => { }, }); - const { mutate: chatMutation } = useChatMutationV2({ + const { mutate: chatMutation } = useStreamingChatMutation({ onChunk: (chunk) => { setStreamedChat((prev) => prev + chunk); }, diff --git a/ui/src/pages/RagChatTab/ChatOutput/Placeholders/SuggestedQuestionsCards.tsx b/ui/src/pages/RagChatTab/ChatOutput/Placeholders/SuggestedQuestionsCards.tsx index b678d9a4..84694a3d 100644 --- a/ui/src/pages/RagChatTab/ChatOutput/Placeholders/SuggestedQuestionsCards.tsx +++ b/ui/src/pages/RagChatTab/ChatOutput/Placeholders/SuggestedQuestionsCards.tsx @@ -42,7 +42,7 @@ import { useContext } from "react"; import { useSuggestQuestions } from "src/api/ragQueryApi.ts"; import { createQueryConfiguration, - useChatMutationV2, + useStreamingChatMutation, } from "src/api/chatApi.ts"; import useCreateSessionAndRedirect from "pages/RagChatTab/ChatOutput/hooks/useCreateSessionAndRedirect"; @@ -93,7 +93,7 @@ const SuggestedQuestionsCards = () => { const createSessionAndRedirect = useCreateSessionAndRedirect(); const { mutate: chatMutation, isPending: askRagIsPending } = - useChatMutationV2({ + useStreamingChatMutation({ onChunk: (chunk) => { setStreamedChat((prev) => prev + chunk); }, diff --git a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx index e73b5481..2d51f4d8 100644 --- a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx +++ b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx @@ -42,7 +42,7 @@ import { useContext, useEffect, useRef, useState } from "react"; import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx"; import { createQueryConfiguration, - useChatMutationV2, + useStreamingChatMutation, } from "src/api/chatApi.ts"; import { useParams, useSearch } from "@tanstack/react-router"; import { cdlBlue600 } from "src/cuix/variables.ts"; @@ -82,7 +82,7 @@ const RagChatQueryInput = ({ !search.question, ); - const streamChatMutation = useChatMutationV2({ + const streamChatMutation = useStreamingChatMutation({ onChunk: (chunk) => { setStreamedChat((prev) => prev + chunk); }, From 6612fa5a97557836a6a77fb8c2cd77fe1fe7a344 Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Wed, 7 May 2025 10:34:57 -0600 Subject: [PATCH 37/41] better error handling --- .../app/routers/index/sessions/__init__.py | 19 ++-- llm-service/app/services/chat.py | 1 - ui/src/api/chatApi.ts | 87 ++++++++++++------- ui/src/api/modelsApi.ts | 4 +- 4 files changed, 67 insertions(+), 44 deletions(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index 9712b6ff..d4521dbe 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -228,13 +228,18 @@ def stream_chat_completion( def generate_stream() -> Generator[str, None, None]: response_id: str = "" - for response in v3_chat( - session, request.query, configuration, user_name=origin_remote_user - ): - response_id = response.additional_kwargs["response_id"] - json_delta = json.dumps({ "text": response.delta }) - yield f"data: {json_delta}" + "\n\n" - yield f'data: {{"response_id" : "{response_id}"}}\n\n' + try: + for response in v3_chat( + session, request.query, configuration, user_name=origin_remote_user + ): + print(response) + response_id = response.additional_kwargs["response_id"] + json_delta = json.dumps({"text": response.delta}) + yield f"data: {json_delta}" + "\n\n" + yield f'data: {{"response_id" : "{response_id}"}}\n\n' + except Exception as e: + logger.exception("Failed to stream chat completion") + yield f'data: {{"error" : "{e}"}}\n\n' # kick off evals with full response # todo: write to history, start evals, rewrite question, log to mlfow once the response is done diff --git a/llm-service/app/services/chat.py b/llm-service/app/services/chat.py index 059365bb..1744ea3a 100644 --- a/llm-service/app/services/chat.py +++ b/llm-service/app/services/chat.py @@ -202,7 +202,6 @@ def _run_streaming_chat( ) - def _run_chat( session: Session, response_id: str, diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts index 83b4a461..50938b02 100644 --- a/ui/src/api/chatApi.ts +++ b/ui/src/api/chatApi.ts @@ -57,7 +57,6 @@ import { EventStreamContentType, fetchEventSource, } from "@microsoft/fetch-event-source"; -import messageQueue from "src/utils/messageQueue.ts"; export interface SourceNode { node_id: string; @@ -323,20 +322,53 @@ const feedbackMutation = async ({ }; export interface ChatMutationResponse { - text: string; - response_id: string; + text?: string; + response_id?: string; + error?: string; } +const errorChatMessage = (variables: ChatMutationRequest, error: Error) => { + const uuid = crypto.randomUUID(); + const errorMessage: ChatMessageType = { + id: `error-${uuid}`, + session_id: variables.session_id, + source_nodes: [], + rag_message: { + user: variables.query, + assistant: error.message, + }, + evaluations: [], + timestamp: Date.now(), + }; + return errorMessage; +}; + export const useStreamingChatMutation = ({ onError, onSuccess, onChunk, }: UseMutationType & { onChunk: (msg: string) => void }) => { const queryClient = useQueryClient(); + const handleError = (variables: ChatMutationRequest, error: Error) => { + const errorMessage = errorChatMessage(variables, error); + queryClient.setQueryData>( + chatHistoryQueryKey({ + session_id: variables.session_id, + offset: 0, + }), + (cachedData) => replacePlaceholderInChatHistory(errorMessage, cachedData), + ); + }; return useMutation({ mutationKey: [MutationKeys.chatMutation], - mutationFn: (request: ChatMutationRequest) => - streamChatMutation(request, onChunk), + mutationFn: (request: ChatMutationRequest) => { + const convertError = (errorMessage: string) => { + const error = new Error(errorMessage); + handleError(request, error); + onError?.(error); + }; + return streamChatMutation(request, onChunk, convertError); + }, onMutate: (variables) => { queryClient.setQueryData>( chatHistoryQueryKey({ @@ -347,6 +379,9 @@ export const useStreamingChatMutation = ({ ); }, onSuccess: (messageId, variables) => { + if (!messageId) { + return; + } fetch( `${llmServicePath}/sessions/${variables.session_id.toString()}/chat-history/${messageId}`, ) @@ -368,32 +403,13 @@ export const useStreamingChatMutation = ({ }); onSuccess?.(message); }) - .catch(() => { - messageQueue.error("An error occurred fetching the chat message"); + .catch((error: unknown) => { + handleError(variables, error as Error); + onError?.(error as Error); }); }, onError: (error: Error, variables) => { - const uuid = crypto.randomUUID(); - const errorMessage: ChatMessageType = { - id: `error-${uuid}`, - session_id: variables.session_id, - source_nodes: [], - rag_message: { - user: variables.query, - assistant: error.message, - }, - evaluations: [], - timestamp: Date.now(), - }; - queryClient.setQueryData>( - chatHistoryQueryKey({ - session_id: variables.session_id, - offset: 0, - }), - (cachedData) => - replacePlaceholderInChatHistory(errorMessage, cachedData), - ); - + handleError(variables, error); onError?.(error); }, }); @@ -402,6 +418,7 @@ export const useStreamingChatMutation = ({ const streamChatMutation = async ( request: ChatMutationRequest, onChunk: (chunk: string) => void, + onError: (error: string) => void, ): Promise => { const ctrl = new AbortController(); let responseId = ""; @@ -420,6 +437,11 @@ const streamChatMutation = async ( onmessage(msg: EventSourceMessage) { const data = JSON.parse(msg.data) as ChatMutationResponse; + if (data.error) { + ctrl.abort(); + onError(data.error); + } + if (data.text) { onChunk(data.text); } @@ -427,9 +449,9 @@ const streamChatMutation = async ( responseId = data.response_id; } }, - onerror(err) { - console.error("Error", err); + onerror(err: unknown) { ctrl.abort(); + onError(String(err)); }, async onopen(response) { if ( @@ -442,10 +464,9 @@ const streamChatMutation = async ( response.status < 500 && response.status !== 429 ) { - // client-side errors are usually non-retriable: - throw new Error(); + onError("An error occurred: " + response.statusText); } else { - throw new Error(); + onError("An error occurred: " + response.statusText); } }, }, diff --git a/ui/src/api/modelsApi.ts b/ui/src/api/modelsApi.ts index e36c8f8c..70759e4f 100644 --- a/ui/src/api/modelsApi.ts +++ b/ui/src/api/modelsApi.ts @@ -57,13 +57,11 @@ export const useGetModelById = (model_id?: string) => { return useQuery({ queryKey: [QueryKeys.getModelById, { model_id }], queryFn: async () => { - if (!model_id) { - return undefined; - } const llmModels = await getLlmModels(); return llmModels.find((model) => model.model_id === model_id); }, staleTime: 1000 * 60 * 60, + enabled: !!model_id, }); }; From 65ca8310ad3b55a7a1b3ac68ff336119006dd120 Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Wed, 7 May 2025 10:43:10 -0600 Subject: [PATCH 38/41] bump bedrock to use max tokens of 1024 --- llm-service/app/services/models/llm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llm-service/app/services/models/llm.py b/llm-service/app/services/models/llm.py index a270c841..e09ec4a1 100644 --- a/llm-service/app/services/models/llm.py +++ b/llm-service/app/services/models/llm.py @@ -79,6 +79,7 @@ def get(cls, model_name: Optional[str] = None) -> llms.LLM: model=model_name, messages_to_prompt=messages_to_prompt, completion_to_prompt=completion_to_prompt, + max_tokens=1024, ) @staticmethod From 14ad9aec4c976c80fe984beb65e35bfafad23d7f Mon Sep 17 00:00:00 2001 From: jwatson Date: Wed, 7 May 2025 10:06:35 -0700 Subject: [PATCH 39/41] python refactoring lastFile:llm-service/app/routers/index/sessions/__init__.py --- .../app/routers/index/chat/__init__.py | 2 +- .../app/routers/index/sessions/__init__.py | 4 +- llm-service/app/services/chat.py | 464 ------------------ llm-service/app/services/chat/__init__.py | 38 ++ llm-service/app/services/chat/chat.py | 171 +++++++ .../app/services/chat/streaming_chat.py | 212 ++++++++ .../app/services/chat/suggested_questions.py | 158 ++++++ llm-service/app/services/chat/utils.py | 101 ++++ llm-service/app/services/query/querier.py | 2 +- llm-service/app/services/session.py | 7 +- llm-service/app/tests/services/test_chat.py | 2 +- 11 files changed, 690 insertions(+), 471 deletions(-) delete mode 100644 llm-service/app/services/chat.py create mode 100644 llm-service/app/services/chat/__init__.py create mode 100644 llm-service/app/services/chat/chat.py create mode 100644 llm-service/app/services/chat/streaming_chat.py create mode 100644 llm-service/app/services/chat/suggested_questions.py create mode 100644 llm-service/app/services/chat/utils.py diff --git a/llm-service/app/routers/index/chat/__init__.py b/llm-service/app/routers/index/chat/__init__.py index 2302d5af..e85cdad9 100644 --- a/llm-service/app/routers/index/chat/__init__.py +++ b/llm-service/app/routers/index/chat/__init__.py @@ -42,7 +42,7 @@ from pydantic import BaseModel from app import exceptions -from app.services.chat import generate_suggested_questions +from app.services.chat.suggested_questions import generate_suggested_questions logger = logging.getLogger(__name__) router = APIRouter(prefix="/chat", tags=["Chat"]) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index d4521dbe..bcbe03bb 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -44,11 +44,11 @@ from fastapi.responses import StreamingResponse from pydantic import BaseModel +from app.services.chat.streaming_chat import v3_chat from .... import exceptions from ....rag_types import RagPredictConfiguration -from ....services.chat import ( +from ....services.chat.chat import ( v2_chat, - v3_chat, ) from ....services.chat_history.chat_history_manager import ( RagStudioChatMessage, diff --git a/llm-service/app/services/chat.py b/llm-service/app/services/chat.py deleted file mode 100644 index 1744ea3a..00000000 --- a/llm-service/app/services/chat.py +++ /dev/null @@ -1,464 +0,0 @@ -# ########################################################################## -# CLOUDERA APPLIED MACHINE LEARNING PROTOTYPE (AMP) -# (C) Cloudera, Inc. 2024 -# All rights reserved. -# -# Applicable Open Source License: Apache 2.0 -# -# NOTE: Cloudera open source products are modular software products -# made up of hundreds of individual components, each of which was -# individually copyrighted. Each Cloudera open source product is a -# collective work under U.S. Copyright Law. Your license to use the -# collective work is as provided in your written agreement with -# Cloudera. Used apart from the collective work, this file is -# licensed for your use pursuant to the open source license -# identified above. -# -# This code is provided to you pursuant a written agreement with -# (i) Cloudera, Inc. or (ii) a third-party authorized to distribute -# this code. If you do not have a written agreement with Cloudera nor -# with an authorized and properly licensed third party, you do not -# have any rights to access nor to use this code. -# -# Absent a written agreement with Cloudera, Inc. (“Cloudera”) to the -# contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY -# KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED -# WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO -# IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND -# FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU, -# AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS -# ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE -# OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR -# CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES -# RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF -# BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF -# DATA. -# ############################################################################## -import time -import uuid -from random import shuffle -from typing import List, Iterable, Optional, Generator - -from fastapi import HTTPException -from llama_index.core.base.llms.types import MessageRole, ChatResponse, ChatMessage -from llama_index.core.chat_engine.types import AgentChatResponse -from pydantic import BaseModel - -from . import evaluators, llm_completion -from .chat_history.chat_history_manager import ( - RagPredictSourceNode, - Evaluation, - RagMessage, - RagStudioChatMessage, - chat_history_manager, -) -from .metadata_apis import session_metadata_api -from .metadata_apis.session_metadata_api import Session -from .mlflow import record_rag_mlflow_run, record_direct_llm_mlflow_run -from .query import querier -from .query.query_configuration import QueryConfiguration -from ..ai.vector_stores.vector_store_factory import VectorStoreFactory -from ..rag_types import RagPredictConfiguration - - -class RagContext(BaseModel): - role: MessageRole - content: str - - -def v3_chat( - session: Session, - query: str, - configuration: RagPredictConfiguration, - user_name: Optional[str], -) -> Generator[ChatResponse, None, None]: - query_configuration = QueryConfiguration( - top_k=session.response_chunks, - model_name=session.inference_model, - rerank_model_name=session.rerank_model, - exclude_knowledge_base=configuration.exclude_knowledge_base, - use_question_condensing=configuration.use_question_condensing, - use_hyde=session.query_configuration.enable_hyde, - use_summary_filter=session.query_configuration.enable_summary_filter, - ) - - response_id = str(uuid.uuid4()) - - if configuration.exclude_knowledge_base or len(session.data_source_ids) == 0: - return stream_direct_llm_chat(session, response_id, query, user_name) - - total_data_sources_size: int = sum( - map( - lambda ds_id: VectorStoreFactory.for_chunks(ds_id).size() or 0, - session.data_source_ids, - ) - ) - if total_data_sources_size == 0: - return stream_direct_llm_chat(session, response_id, query, user_name) - - return _run_streaming_chat( - session, response_id, query, query_configuration, user_name - ) - - -def v2_chat( - session: Session, - query: str, - configuration: RagPredictConfiguration, - user_name: Optional[str], -) -> RagStudioChatMessage: - query_configuration = QueryConfiguration( - top_k=session.response_chunks, - model_name=session.inference_model, - rerank_model_name=session.rerank_model, - exclude_knowledge_base=configuration.exclude_knowledge_base, - use_question_condensing=configuration.use_question_condensing, - use_hyde=session.query_configuration.enable_hyde, - use_summary_filter=session.query_configuration.enable_summary_filter, - ) - - response_id = str(uuid.uuid4()) - - if configuration.exclude_knowledge_base or len(session.data_source_ids) == 0: - return direct_llm_chat(session, response_id, query, user_name) - - total_data_sources_size: int = sum( - map( - lambda ds_id: VectorStoreFactory.for_chunks(ds_id).size() or 0, - session.data_source_ids, - ) - ) - if total_data_sources_size == 0: - return direct_llm_chat(session, response_id, query, user_name) - - new_chat_message: RagStudioChatMessage = _run_chat( - session, response_id, query, query_configuration, user_name - ) - - chat_history_manager.append_to_history(session.id, [new_chat_message]) - return new_chat_message - - -def _run_streaming_chat( - session: Session, - response_id: str, - query: str, - query_configuration: QueryConfiguration, - user_name: Optional[str], -) -> Generator[ChatResponse, None, None]: - if len(session.data_source_ids) != 1: - raise HTTPException( - status_code=400, detail="Only one datasource is supported for chat." - ) - - data_source_id: int = session.data_source_ids[0] - streaming_chat_response, condensed_question = querier.streaming_query( - data_source_id, - query, - query_configuration, - retrieve_chat_history(session.id), - ) - - response: ChatResponse = ChatResponse(message=ChatMessage(content=query)) - if streaming_chat_response.chat_stream: - for response in streaming_chat_response.chat_stream: - response.additional_kwargs["response_id"] = response_id - yield response - - chat_response = AgentChatResponse( - response=response.message.content or "", - sources=streaming_chat_response.sources, - source_nodes=streaming_chat_response.source_nodes, - ) - - if condensed_question and (condensed_question.strip() == query.strip()): - condensed_question = None - relevance, faithfulness = evaluators.evaluate_response( - query, chat_response, session.inference_model - ) - response_source_nodes = format_source_nodes(chat_response, data_source_id) - new_chat_message = RagStudioChatMessage( - id=response_id, - session_id=session.id, - source_nodes=response_source_nodes, - inference_model=session.inference_model, - rag_message=RagMessage( - user=query, - assistant=chat_response.response, - ), - evaluations=[ - Evaluation(name="relevance", value=relevance), - Evaluation(name="faithfulness", value=faithfulness), - ], - timestamp=time.time(), - condensed_question=condensed_question, - ) - - chat_history_manager.append_to_history(session.id, [new_chat_message]) - - record_rag_mlflow_run( - new_chat_message, query_configuration, response_id, session, user_name - ) - - -def _run_chat( - session: Session, - response_id: str, - query: str, - query_configuration: QueryConfiguration, - user_name: Optional[str], -) -> RagStudioChatMessage: - if len(session.data_source_ids) != 1: - raise HTTPException( - status_code=400, detail="Only one datasource is supported for chat." - ) - - data_source_id: int = session.data_source_ids[0] - response, condensed_question = querier.query( - data_source_id, - query, - query_configuration, - retrieve_chat_history(session.id), - ) - if condensed_question and (condensed_question.strip() == query.strip()): - condensed_question = None - relevance, faithfulness = evaluators.evaluate_response( - query, response, session.inference_model - ) - response_source_nodes = format_source_nodes(response, data_source_id) - new_chat_message = RagStudioChatMessage( - id=response_id, - session_id=session.id, - source_nodes=response_source_nodes, - inference_model=session.inference_model, - rag_message=RagMessage( - user=query, - assistant=response.response, - ), - evaluations=[ - Evaluation(name="relevance", value=relevance), - Evaluation(name="faithfulness", value=faithfulness), - ], - timestamp=time.time(), - condensed_question=condensed_question, - ) - - record_rag_mlflow_run( - new_chat_message, query_configuration, response_id, session, user_name - ) - return new_chat_message - - -def retrieve_chat_history(session_id: int) -> List[RagContext]: - chat_history = chat_history_manager.retrieve_chat_history(session_id)[:10] - history: List[RagContext] = [] - for message in chat_history: - history.append( - RagContext(role=MessageRole.USER, content=message.rag_message.user) - ) - history.append( - RagContext( - role=MessageRole.ASSISTANT, content=message.rag_message.assistant - ) - ) - return history - - -def format_source_nodes( - response: AgentChatResponse, data_source_id: int -) -> List[RagPredictSourceNode]: - response_source_nodes = [] - for source_node in response.source_nodes: - doc_id = source_node.node.metadata.get("document_id", source_node.node.node_id) - response_source_nodes.append( - RagPredictSourceNode( - node_id=source_node.node.node_id, - doc_id=doc_id, - source_file_name=source_node.node.metadata["file_name"], - score=source_node.score or 0.0, - dataSourceId=data_source_id, - ) - ) - response_source_nodes = sorted( - response_source_nodes, key=lambda x: x.score, reverse=True - ) - return response_source_nodes - - -SAMPLE_QUESTIONS = [ - "What is Cloudera, and how does it support organizations in managing big data?", - "What are the key components of the Cloudera Data Platform (CDP), and how do they work together?", - "How does Cloudera enable hybrid and multi-cloud data management for enterprises?", - "What are the primary use cases for Cloudera's platform in industries such as finance, healthcare, and retail?", - "How does Cloudera ensure data security and compliance with regulations like GDPR, HIPAA, and CCPA?", - "What is the role of Apache Hadoop and Apache Spark in Cloudera's ecosystem, and how do they contribute to data processing?", - "How does Cloudera's platform support machine learning and artificial intelligence workflows?", - "What are the differences between Cloudera Data Platform (CDP) Public Cloud and CDP Private Cloud?", - "How does Cloudera's platform handle data ingestion, storage, and real-time analytics at scale?", - "What tools and features does Cloudera provide for data governance, lineage, and cataloging?,", -] - - -def generate_dummy_suggested_questions() -> List[str]: - questions = SAMPLE_QUESTIONS.copy() - shuffle(questions) - return questions[:4] - - -def _generate_suggested_questions_direct_llm(session: Session) -> List[str]: - chat_history = retrieve_chat_history(session.id) - if not chat_history: - return generate_dummy_suggested_questions() - query_str = ( - " Give me a list of possible follow-up questions." - " Each question should be on a new line." - " There should be no more than four (4) questions." - " Each question should be no longer than fifteen (15) words." - " The response should be a bulleted list, using an asterisk (*) to denote the bullet item." - " Do not start like this - `Here are four questions that I can answer based on the context information`" - " Only return the list." - ) - chat_response = llm_completion.completion( - session.id, query_str, session.inference_model - ) - suggested_questions = process_response(chat_response.message.content) - return suggested_questions - - -def generate_suggested_questions( - session_id: Optional[int], - user_name: Optional[str] = None, -) -> List[str]: - if session_id is None: - return generate_dummy_suggested_questions() - session = session_metadata_api.get_session(session_id, user_name) - if len(session.data_source_ids) == 0: - return _generate_suggested_questions_direct_llm(session) - if len(session.data_source_ids) != 1: - raise HTTPException( - status_code=400, - detail="Only one datasource is supported for question suggestion.", - ) - data_source_id = session.data_source_ids[0] - - total_data_sources_size: int = sum( - map( - lambda ds_id: VectorStoreFactory.for_chunks(ds_id).size() or 0, - session.data_source_ids, - ) - ) - if total_data_sources_size == 0: - return _generate_suggested_questions_direct_llm(session) - # raise HTTPException(status_code=404, detail="Knowledge base not found.") - - chat_history = retrieve_chat_history(session_id) - if total_data_sources_size == 0: - suggested_questions = [] - else: - query_str = ( - "Give me a list of questions that you can answer." - " Each question should be on a new line." - " There should be no more than four (4) questions." - " Each question should be no longer than fifteen (15) words." - " The response should be a bulleted list, using an asterisk (*) to denote the bullet item." - " Do not return questions based on the metadata of the document. Only the content." - " Do not start like this - `Here are four questions that I can answer based on the context information`" - " Only return the list." - ) - if chat_history: - query_str = ( - query_str - + ( - "I will provide a response from my last question to help with generating new questions." - " Consider returning questions that are relevant to the response" - " They might be follow up questions or questions that are related to the response." - " Here is the last response received:\n" - ) - + chat_history[-1].content - ) - response, _ = querier.query( - data_source_id, - query_str, - QueryConfiguration( - top_k=session.response_chunks, - model_name=session.inference_model, - rerank_model_name=None, - exclude_knowledge_base=False, - use_question_condensing=False, - use_hyde=False, - use_postprocessor=False, - ), - [], - ) - suggested_questions = process_response(response.response) - return suggested_questions - - -def process_response(response: str | None) -> list[str]: - if response is None: - return [] - - sentences: Iterable[str] = response.splitlines() - sentences = map(lambda x: x.strip(), sentences) - sentences = map(lambda x: x.removeprefix("*").strip(), sentences) - sentences = map(lambda x: x.removeprefix("-").strip(), sentences) - sentences = map(lambda x: x.strip("*"), sentences) - sentences = filter(lambda x: len(x.split()) <= 60, sentences) - sentences = filter(lambda x: x != "Empty Response", sentences) - sentences = filter(lambda x: x != "", sentences) - return list(sentences)[:5] - - -def direct_llm_chat( - session: Session, response_id: str, query: str, user_name: Optional[str] -) -> RagStudioChatMessage: - record_direct_llm_mlflow_run(response_id, session, user_name) - - chat_response = llm_completion.completion( - session.id, query, session.inference_model - ) - new_chat_message = RagStudioChatMessage( - id=response_id, - session_id=session.id, - source_nodes=[], - inference_model=session.inference_model, - evaluations=[], - rag_message=RagMessage( - user=query, - assistant=str(chat_response.message.content), - ), - timestamp=time.time(), - condensed_question=None, - ) - chat_history_manager.append_to_history(session.id, [new_chat_message]) - return new_chat_message - - -def stream_direct_llm_chat( - session: Session, response_id: str, query: str, user_name: Optional[str] -) -> Generator[ChatResponse, None, None]: - record_direct_llm_mlflow_run(response_id, session, user_name) - - chat_response = llm_completion.stream_completion( - session.id, query, session.inference_model - ) - response: ChatResponse = ChatResponse(message=ChatMessage(content=query)) - for response in chat_response: - response.additional_kwargs["response_id"] = response_id - yield response - - new_chat_message = RagStudioChatMessage( - id=response_id, - session_id=session.id, - source_nodes=[], - inference_model=session.inference_model, - evaluations=[], - rag_message=RagMessage( - user=query, - assistant=response.message.content or "", - ), - timestamp=time.time(), - condensed_question=None, - ) - chat_history_manager.append_to_history(session.id, [new_chat_message]) diff --git a/llm-service/app/services/chat/__init__.py b/llm-service/app/services/chat/__init__.py new file mode 100644 index 00000000..9c598784 --- /dev/null +++ b/llm-service/app/services/chat/__init__.py @@ -0,0 +1,38 @@ +# +# CLOUDERA APPLIED MACHINE LEARNING PROTOTYPE (AMP) +# (C) Cloudera, Inc. 2025 +# All rights reserved. +# +# Applicable Open Source License: Apache 2.0 +# +# NOTE: Cloudera open source products are modular software products +# made up of hundreds of individual components, each of which was +# individually copyrighted. Each Cloudera open source product is a +# collective work under U.S. Copyright Law. Your license to use the +# collective work is as provided in your written agreement with +# Cloudera. Used apart from the collective work, this file is +# licensed for your use pursuant to the open source license +# identified above. +# +# This code is provided to you pursuant a written agreement with +# (i) Cloudera, Inc. or (ii) a third-party authorized to distribute +# this code. If you do not have a written agreement with Cloudera nor +# with an authorized and properly licensed third party, you do not +# have any rights to access nor to use this code. +# +# Absent a written agreement with Cloudera, Inc. ("Cloudera") to the +# contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY +# KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED +# WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO +# IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU, +# AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS +# ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE +# OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR +# CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES +# RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF +# BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF +# DATA. +# + diff --git a/llm-service/app/services/chat/chat.py b/llm-service/app/services/chat/chat.py new file mode 100644 index 00000000..b8c0b08f --- /dev/null +++ b/llm-service/app/services/chat/chat.py @@ -0,0 +1,171 @@ +# +# CLOUDERA APPLIED MACHINE LEARNING PROTOTYPE (AMP) +# (C) Cloudera, Inc. 2025 +# All rights reserved. +# +# Applicable Open Source License: Apache 2.0 +# +# NOTE: Cloudera open source products are modular software products +# made up of hundreds of individual components, each of which was +# individually copyrighted. Each Cloudera open source product is a +# collective work under U.S. Copyright Law. Your license to use the +# collective work is as provided in your written agreement with +# Cloudera. Used apart from the collective work, this file is +# licensed for your use pursuant to the open source license +# identified above. +# +# This code is provided to you pursuant a written agreement with +# (i) Cloudera, Inc. or (ii) a third-party authorized to distribute +# this code. If you do not have a written agreement with Cloudera nor +# with an authorized and properly licensed third party, you do not +# have any rights to access nor to use this code. +# +# Absent a written agreement with Cloudera, Inc. ("Cloudera") to the +# contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY +# KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED +# WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO +# IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU, +# AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS +# ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE +# OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR +# CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES +# RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF +# BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF +# DATA. +# + +import time +import uuid +from typing import Optional + +from fastapi import HTTPException + +from app.services import evaluators, llm_completion +from app.services.chat.utils import retrieve_chat_history, format_source_nodes +from app.services.chat_history.chat_history_manager import ( + Evaluation, + RagMessage, + RagStudioChatMessage, + chat_history_manager, +) +from app.services.metadata_apis.session_metadata_api import Session +from app.services.mlflow import record_rag_mlflow_run, record_direct_llm_mlflow_run +from app.services.query import querier +from app.services.query.query_configuration import QueryConfiguration +from app.ai.vector_stores.vector_store_factory import VectorStoreFactory +from app.rag_types import RagPredictConfiguration + + +def v2_chat( + session: Session, + query: str, + configuration: RagPredictConfiguration, + user_name: Optional[str], +) -> RagStudioChatMessage: + query_configuration = QueryConfiguration( + top_k=session.response_chunks, + model_name=session.inference_model, + rerank_model_name=session.rerank_model, + exclude_knowledge_base=configuration.exclude_knowledge_base, + use_question_condensing=configuration.use_question_condensing, + use_hyde=session.query_configuration.enable_hyde, + use_summary_filter=session.query_configuration.enable_summary_filter, + ) + + response_id = str(uuid.uuid4()) + + if configuration.exclude_knowledge_base or len(session.data_source_ids) == 0: + return direct_llm_chat(session, response_id, query, user_name) + + total_data_sources_size: int = sum( + map( + lambda ds_id: VectorStoreFactory.for_chunks(ds_id).size() or 0, + session.data_source_ids, + ) + ) + if total_data_sources_size == 0: + return direct_llm_chat(session, response_id, query, user_name) + + new_chat_message: RagStudioChatMessage = _run_chat( + session, response_id, query, query_configuration, user_name + ) + + chat_history_manager.append_to_history(session.id, [new_chat_message]) + return new_chat_message + + +def _run_chat( + session: Session, + response_id: str, + query: str, + query_configuration: QueryConfiguration, + user_name: Optional[str], +) -> RagStudioChatMessage: + if len(session.data_source_ids) != 1: + raise HTTPException( + status_code=400, detail="Only one datasource is supported for chat." + ) + + data_source_id: int = session.data_source_ids[0] + response, condensed_question = querier.query( + data_source_id, + query, + query_configuration, + retrieve_chat_history(session.id), + ) + if condensed_question and (condensed_question.strip() == query.strip()): + condensed_question = None + relevance, faithfulness = evaluators.evaluate_response( + query, response, session.inference_model + ) + response_source_nodes = format_source_nodes(response, data_source_id) + new_chat_message = RagStudioChatMessage( + id=response_id, + session_id=session.id, + source_nodes=response_source_nodes, + inference_model=session.inference_model, + rag_message=RagMessage( + user=query, + assistant=response.response, + ), + evaluations=[ + Evaluation(name="relevance", value=relevance), + Evaluation(name="faithfulness", value=faithfulness), + ], + timestamp=time.time(), + condensed_question=condensed_question, + ) + + record_rag_mlflow_run( + new_chat_message, query_configuration, response_id, session, user_name + ) + return new_chat_message + + +def direct_llm_chat( + session: Session, response_id: str, query: str, user_name: Optional[str] +) -> RagStudioChatMessage: + record_direct_llm_mlflow_run(response_id, session, user_name) + + chat_response = llm_completion.completion( + session.id, query, session.inference_model + ) + new_chat_message = RagStudioChatMessage( + id=response_id, + session_id=session.id, + source_nodes=[], + inference_model=session.inference_model, + evaluations=[], + rag_message=RagMessage( + user=query, + assistant=str(chat_response.message.content), + ), + timestamp=time.time(), + condensed_question=None, + ) + chat_history_manager.append_to_history(session.id, [new_chat_message]) + return new_chat_message + + diff --git a/llm-service/app/services/chat/streaming_chat.py b/llm-service/app/services/chat/streaming_chat.py new file mode 100644 index 00000000..6b9fe975 --- /dev/null +++ b/llm-service/app/services/chat/streaming_chat.py @@ -0,0 +1,212 @@ +# +# CLOUDERA APPLIED MACHINE LEARNING PROTOTYPE (AMP) +# (C) Cloudera, Inc. 2025 +# All rights reserved. +# +# Applicable Open Source License: Apache 2.0 +# +# NOTE: Cloudera open source products are modular software products +# made up of hundreds of individual components, each of which was +# individually copyrighted. Each Cloudera open source product is a +# collective work under U.S. Copyright Law. Your license to use the +# collective work is as provided in your written agreement with +# Cloudera. Used apart from the collective work, this file is +# licensed for your use pursuant to the open source license +# identified above. +# +# This code is provided to you pursuant a written agreement with +# (i) Cloudera, Inc. or (ii) a third-party authorized to distribute +# this code. If you do not have a written agreement with Cloudera nor +# with an authorized and properly licensed third party, you do not +# have any rights to access nor to use this code. +# +# Absent a written agreement with Cloudera, Inc. ("Cloudera") to the +# contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY +# KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED +# WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO +# IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU, +# AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS +# ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE +# OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR +# CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES +# RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF +# BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF +# DATA. +# + +# +# CLOUDERA APPLIED MACHINE LEARNING PROTOTYPE (AMP) +# (C) Cloudera, Inc. 2025 +# All rights reserved. +# +# Applicable Open Source License: Apache 2.0 +# +# +# This code is provided to you pursuant a written agreement with +# (i) Cloudera, Inc. or (ii) a third-party authorized to distribute +# this code. If you do not have a written agreement with Cloudera nor +# with an authorized and properly licensed third party, you do not +# have any rights to access nor to use this code. +# +# Absent a written agreement with Cloudera, Inc. ("Cloudera") to the +# contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY +# KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED +# WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO +# IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU, +# AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS +# ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE +# OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR +# CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES +# RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF +# BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF +# DATA. +# + +import time +import uuid +from typing import Optional, Generator + +from fastapi import HTTPException +from llama_index.core.base.llms.types import ChatResponse, ChatMessage +from llama_index.core.chat_engine.types import AgentChatResponse + +from app.ai.vector_stores.vector_store_factory import VectorStoreFactory +from app.rag_types import RagPredictConfiguration +from app.services import evaluators, llm_completion +from app.services.chat.utils import retrieve_chat_history, format_source_nodes +from app.services.chat_history.chat_history_manager import RagStudioChatMessage, RagMessage, Evaluation, \ + chat_history_manager +from app.services.metadata_apis.session_metadata_api import Session +from app.services.mlflow import record_rag_mlflow_run, record_direct_llm_mlflow_run +from app.services.query import querier +from app.services.query.query_configuration import QueryConfiguration + + +def v3_chat( + session: Session, + query: str, + configuration: RagPredictConfiguration, + user_name: Optional[str], +) -> Generator[ChatResponse, None, None]: + query_configuration = QueryConfiguration( + top_k=session.response_chunks, + model_name=session.inference_model, + rerank_model_name=session.rerank_model, + exclude_knowledge_base=configuration.exclude_knowledge_base, + use_question_condensing=configuration.use_question_condensing, + use_hyde=session.query_configuration.enable_hyde, + use_summary_filter=session.query_configuration.enable_summary_filter, + ) + + response_id = str(uuid.uuid4()) + + if configuration.exclude_knowledge_base or len(session.data_source_ids) == 0: + return _stream_direct_llm_chat(session, response_id, query, user_name) + + total_data_sources_size: int = sum( + map( + lambda ds_id: VectorStoreFactory.for_chunks(ds_id).size() or 0, + session.data_source_ids, + ) + ) + if total_data_sources_size == 0: + return _stream_direct_llm_chat(session, response_id, query, user_name) + + return _run_streaming_chat( + session, response_id, query, query_configuration, user_name + ) + + +def _run_streaming_chat( + session: Session, + response_id: str, + query: str, + query_configuration: QueryConfiguration, + user_name: Optional[str], +) -> Generator[ChatResponse, None, None]: + if len(session.data_source_ids) != 1: + raise HTTPException( + status_code=400, detail="Only one datasource is supported for chat." + ) + + data_source_id: int = session.data_source_ids[0] + streaming_chat_response, condensed_question = querier.streaming_query( + data_source_id, + query, + query_configuration, + retrieve_chat_history(session.id), + ) + + response: ChatResponse = ChatResponse(message=ChatMessage(content=query)) + if streaming_chat_response.chat_stream: + for response in streaming_chat_response.chat_stream: + response.additional_kwargs["response_id"] = response_id + yield response + + chat_response = AgentChatResponse( + response=response.message.content or "", + sources=streaming_chat_response.sources, + source_nodes=streaming_chat_response.source_nodes, + ) + + if condensed_question and (condensed_question.strip() == query.strip()): + condensed_question = None + relevance, faithfulness = evaluators.evaluate_response( + query, chat_response, session.inference_model + ) + response_source_nodes = format_source_nodes(chat_response, data_source_id) + new_chat_message = RagStudioChatMessage( + id=response_id, + session_id=session.id, + source_nodes=response_source_nodes, + inference_model=session.inference_model, + rag_message=RagMessage( + user=query, + assistant=chat_response.response, + ), + evaluations=[ + Evaluation(name="relevance", value=relevance), + Evaluation(name="faithfulness", value=faithfulness), + ], + timestamp=time.time(), + condensed_question=condensed_question, + ) + + chat_history_manager.append_to_history(session.id, [new_chat_message]) + + record_rag_mlflow_run( + new_chat_message, query_configuration, response_id, session, user_name + ) + + +def _stream_direct_llm_chat( + session: Session, response_id: str, query: str, user_name: Optional[str] +) -> Generator[ChatResponse, None, None]: + record_direct_llm_mlflow_run(response_id, session, user_name) + + chat_response = llm_completion.stream_completion( + session.id, query, session.inference_model + ) + response: ChatResponse = ChatResponse(message=ChatMessage(content=query)) + for response in chat_response: + response.additional_kwargs["response_id"] = response_id + yield response + + new_chat_message = RagStudioChatMessage( + id=response_id, + session_id=session.id, + source_nodes=[], + inference_model=session.inference_model, + evaluations=[], + rag_message=RagMessage( + user=query, + assistant=response.message.content or "", + ), + timestamp=time.time(), + condensed_question=None, + ) + chat_history_manager.append_to_history(session.id, [new_chat_message]) diff --git a/llm-service/app/services/chat/suggested_questions.py b/llm-service/app/services/chat/suggested_questions.py new file mode 100644 index 00000000..bdd409e6 --- /dev/null +++ b/llm-service/app/services/chat/suggested_questions.py @@ -0,0 +1,158 @@ +# +# CLOUDERA APPLIED MACHINE LEARNING PROTOTYPE (AMP) +# (C) Cloudera, Inc. 2025 +# All rights reserved. +# +# Applicable Open Source License: Apache 2.0 +# +# NOTE: Cloudera open source products are modular software products +# made up of hundreds of individual components, each of which was +# individually copyrighted. Each Cloudera open source product is a +# collective work under U.S. Copyright Law. Your license to use the +# collective work is as provided in your written agreement with +# Cloudera. Used apart from the collective work, this file is +# licensed for your use pursuant to the open source license +# identified above. +# +# This code is provided to you pursuant a written agreement with +# (i) Cloudera, Inc. or (ii) a third-party authorized to distribute +# this code. If you do not have a written agreement with Cloudera nor +# with an authorized and properly licensed third party, you do not +# have any rights to access nor to use this code. +# +# Absent a written agreement with Cloudera, Inc. ("Cloudera") to the +# contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY +# KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED +# WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO +# IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU, +# AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS +# ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE +# OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR +# CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES +# RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF +# BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF +# DATA. +# + +from random import shuffle +from typing import List, Optional + +from fastapi import HTTPException + +from app.ai.vector_stores.vector_store_factory import VectorStoreFactory +from app.services import llm_completion +from app.services.chat.utils import retrieve_chat_history, process_response +from app.services.metadata_apis import session_metadata_api +from app.services.metadata_apis.session_metadata_api import Session +from app.services.query import querier +from app.services.query.query_configuration import QueryConfiguration + +SAMPLE_QUESTIONS = [ + "What is Cloudera, and how does it support organizations in managing big data?", + "What are the key components of the Cloudera Data Platform (CDP), and how do they work together?", + "How does Cloudera enable hybrid and multi-cloud data management for enterprises?", + "What are the primary use cases for Cloudera's platform in industries such as finance, healthcare, and retail?", + "How does Cloudera ensure data security and compliance with regulations like GDPR, HIPAA, and CCPA?", + "What is the role of Apache Hadoop and Apache Spark in Cloudera's ecosystem, and how do they contribute to data processing?", + "How does Cloudera's platform support machine learning and artificial intelligence workflows?", + "What are the differences between Cloudera Data Platform (CDP) Public Cloud and CDP Private Cloud?", + "How does Cloudera's platform handle data ingestion, storage, and real-time analytics at scale?", + "What tools and features does Cloudera provide for data governance, lineage, and cataloging?,", +] + + +def generate_dummy_suggested_questions() -> List[str]: + questions = SAMPLE_QUESTIONS.copy() + shuffle(questions) + return questions[:4] + + +def _generate_suggested_questions_direct_llm(session: Session) -> List[str]: + chat_history = retrieve_chat_history(session.id) + if not chat_history: + return generate_dummy_suggested_questions() + query_str = ( + " Give me a list of possible follow-up questions." + " Each question should be on a new line." + " There should be no more than four (4) questions." + " Each question should be no longer than fifteen (15) words." + " The response should be a bulleted list, using an asterisk (*) to denote the bullet item." + " Do not start like this - `Here are four questions that I can answer based on the context information`" + " Only return the list." + ) + chat_response = llm_completion.completion( + session.id, query_str, session.inference_model + ) + suggested_questions = process_response(chat_response.message.content) + return suggested_questions + + +def generate_suggested_questions( + session_id: Optional[int], + user_name: Optional[str] = None, +) -> List[str]: + if session_id is None: + return generate_dummy_suggested_questions() + session = session_metadata_api.get_session(session_id, user_name) + if len(session.data_source_ids) == 0: + return _generate_suggested_questions_direct_llm(session) + if len(session.data_source_ids) != 1: + raise HTTPException( + status_code=400, + detail="Only one datasource is supported for question suggestion.", + ) + data_source_id = session.data_source_ids[0] + + total_data_sources_size: int = sum( + map( + lambda ds_id: VectorStoreFactory.for_chunks(ds_id).size() or 0, + session.data_source_ids, + ) + ) + if total_data_sources_size == 0: + return _generate_suggested_questions_direct_llm(session) + # raise HTTPException(status_code=404, detail="Knowledge base not found.") + + chat_history = retrieve_chat_history(session_id) + if total_data_sources_size == 0: + suggested_questions = [] + else: + query_str = ( + "Give me a list of questions that you can answer." + " Each question should be on a new line." + " There should be no more than four (4) questions." + " Each question should be no longer than fifteen (15) words." + " The response should be a bulleted list, using an asterisk (*) to denote the bullet item." + " Do not return questions based on the metadata of the document. Only the content." + " Do not start like this - `Here are four questions that I can answer based on the context information`" + " Only return the list." + ) + if chat_history: + query_str = ( + query_str + + ( + "I will provide a response from my last question to help with generating new questions." + " Consider returning questions that are relevant to the response" + " They might be follow up questions or questions that are related to the response." + " Here is the last response received:\n" + ) + + chat_history[-1].content + ) + response, _ = querier.query( + data_source_id, + query_str, + QueryConfiguration( + top_k=session.response_chunks, + model_name=session.inference_model, + rerank_model_name=None, + exclude_knowledge_base=False, + use_question_condensing=False, + use_hyde=False, + use_postprocessor=False, + ), + [], + ) + suggested_questions = process_response(response.response) + return suggested_questions diff --git a/llm-service/app/services/chat/utils.py b/llm-service/app/services/chat/utils.py new file mode 100644 index 00000000..f626be38 --- /dev/null +++ b/llm-service/app/services/chat/utils.py @@ -0,0 +1,101 @@ +# +# CLOUDERA APPLIED MACHINE LEARNING PROTOTYPE (AMP) +# (C) Cloudera, Inc. 2025 +# All rights reserved. +# +# Applicable Open Source License: Apache 2.0 +# +# NOTE: Cloudera open source products are modular software products +# made up of hundreds of individual components, each of which was +# individually copyrighted. Each Cloudera open source product is a +# collective work under U.S. Copyright Law. Your license to use the +# collective work is as provided in your written agreement with +# Cloudera. Used apart from the collective work, this file is +# licensed for your use pursuant to the open source license +# identified above. +# +# This code is provided to you pursuant a written agreement with +# (i) Cloudera, Inc. or (ii) a third-party authorized to distribute +# this code. If you do not have a written agreement with Cloudera nor +# with an authorized and properly licensed third party, you do not +# have any rights to access nor to use this code. +# +# Absent a written agreement with Cloudera, Inc. ("Cloudera") to the +# contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY +# KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED +# WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO +# IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU, +# AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS +# ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE +# OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR +# CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES +# RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF +# BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF +# DATA. +# + +from typing import List, Iterable + +from llama_index.core.base.llms.types import MessageRole +from llama_index.core.chat_engine.types import AgentChatResponse +from pydantic import BaseModel + +from app.services.chat_history.chat_history_manager import chat_history_manager, RagPredictSourceNode + + +class RagContext(BaseModel): + role: MessageRole + content: str + + +def retrieve_chat_history(session_id: int) -> List[RagContext]: + chat_history = chat_history_manager.retrieve_chat_history(session_id)[:10] + history: List[RagContext] = [] + for message in chat_history: + history.append( + RagContext(role=MessageRole.USER, content=message.rag_message.user) + ) + history.append( + RagContext( + role=MessageRole.ASSISTANT, content=message.rag_message.assistant + ) + ) + return history + + +def format_source_nodes( + response: AgentChatResponse, data_source_id: int +) -> List[RagPredictSourceNode]: + response_source_nodes = [] + for source_node in response.source_nodes: + doc_id = source_node.node.metadata.get("document_id", source_node.node.node_id) + response_source_nodes.append( + RagPredictSourceNode( + node_id=source_node.node.node_id, + doc_id=doc_id, + source_file_name=source_node.node.metadata["file_name"], + score=source_node.score or 0.0, + dataSourceId=data_source_id, + ) + ) + response_source_nodes = sorted( + response_source_nodes, key=lambda x: x.score, reverse=True + ) + return response_source_nodes + + +def process_response(response: str | None) -> list[str]: + if response is None: + return [] + + sentences: Iterable[str] = response.splitlines() + sentences = map(lambda x: x.strip(), sentences) + sentences = map(lambda x: x.removeprefix("*").strip(), sentences) + sentences = map(lambda x: x.removeprefix("-").strip(), sentences) + sentences = map(lambda x: x.strip("*"), sentences) + sentences = filter(lambda x: len(x.split()) <= 60, sentences) + sentences = filter(lambda x: x != "Empty Response", sentences) + sentences = filter(lambda x: x != "", sentences) + return list(sentences)[:5] diff --git a/llm-service/app/services/query/querier.py b/llm-service/app/services/query/querier.py index a713e069..717e8e74 100644 --- a/llm-service/app/services/query/querier.py +++ b/llm-service/app/services/query/querier.py @@ -32,7 +32,7 @@ import typing if typing.TYPE_CHECKING: - from ..chat import RagContext + from ..chat.utils import RagContext import logging from typing import List, Optional diff --git a/llm-service/app/services/session.py b/llm-service/app/services/session.py index 274780ca..7020dfd1 100644 --- a/llm-service/app/services/session.py +++ b/llm-service/app/services/session.py @@ -40,7 +40,10 @@ from fastapi import HTTPException from . import models -from .chat_history.chat_history_manager import chat_history_manager +from .chat_history.chat_history_manager import ( + chat_history_manager, + RagStudioChatMessage, +) from .metadata_apis import session_metadata_api RENAME_SESSION_PROMPT_TEMPLATE = """ @@ -78,7 +81,7 @@ def rename_session(session_id: int, user_name: Optional[str]) -> str: - chat_history = chat_history_manager.retrieve_chat_history(session_id=session_id) + chat_history: list[RagStudioChatMessage] = chat_history_manager.retrieve_chat_history(session_id=session_id) if not chat_history: raise HTTPException(status_code=400, detail="No chat history found") first_interaction = chat_history[0].rag_message diff --git a/llm-service/app/tests/services/test_chat.py b/llm-service/app/tests/services/test_chat.py index 32e4ae14..ce0136f5 100644 --- a/llm-service/app/tests/services/test_chat.py +++ b/llm-service/app/tests/services/test_chat.py @@ -40,7 +40,7 @@ from hypothesis import example, given from hypothesis import strategies as st -from app.services.chat import process_response +from app.services.chat.utils import process_response @st.composite From dd36cc0c35780bee7bc6754ee3a9c441fb1c8bad Mon Sep 17 00:00:00 2001 From: Baasit Sharief Date: Wed, 7 May 2025 10:18:51 -0700 Subject: [PATCH 40/41] mob next [ci-skip] [ci skip] [skip ci] lastFile:llm-service/app/routers/index/sessions/__init__.py --- .../app/routers/index/sessions/__init__.py | 8 ++--- llm-service/app/services/chat/chat.py | 2 +- .../app/services/chat/streaming_chat.py | 32 +------------------ 3 files changed, 6 insertions(+), 36 deletions(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index bcbe03bb..7b350e63 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -44,11 +44,11 @@ from fastapi.responses import StreamingResponse from pydantic import BaseModel -from app.services.chat.streaming_chat import v3_chat +from app.services.chat.streaming_chat import stream_chat from .... import exceptions from ....rag_types import RagPredictConfiguration from ....services.chat.chat import ( - v2_chat, + chat as run_chat, ) from ....services.chat_history.chat_history_manager import ( RagStudioChatMessage, @@ -211,7 +211,7 @@ def chat( session = session_metadata_api.get_session(session_id, user_name=origin_remote_user) configuration = request.configuration or RagPredictConfiguration() - return v2_chat(session, request.query, configuration, user_name=origin_remote_user) + return run_chat(session, request.query, configuration, user_name=origin_remote_user) @router.post( @@ -229,7 +229,7 @@ def stream_chat_completion( def generate_stream() -> Generator[str, None, None]: response_id: str = "" try: - for response in v3_chat( + for response in stream_chat( session, request.query, configuration, user_name=origin_remote_user ): print(response) diff --git a/llm-service/app/services/chat/chat.py b/llm-service/app/services/chat/chat.py index b8c0b08f..0256d4ae 100644 --- a/llm-service/app/services/chat/chat.py +++ b/llm-service/app/services/chat/chat.py @@ -58,7 +58,7 @@ from app.rag_types import RagPredictConfiguration -def v2_chat( +def chat( session: Session, query: str, configuration: RagPredictConfiguration, diff --git a/llm-service/app/services/chat/streaming_chat.py b/llm-service/app/services/chat/streaming_chat.py index 6b9fe975..5cdad4c5 100644 --- a/llm-service/app/services/chat/streaming_chat.py +++ b/llm-service/app/services/chat/streaming_chat.py @@ -36,36 +36,6 @@ # DATA. # -# -# CLOUDERA APPLIED MACHINE LEARNING PROTOTYPE (AMP) -# (C) Cloudera, Inc. 2025 -# All rights reserved. -# -# Applicable Open Source License: Apache 2.0 -# -# -# This code is provided to you pursuant a written agreement with -# (i) Cloudera, Inc. or (ii) a third-party authorized to distribute -# this code. If you do not have a written agreement with Cloudera nor -# with an authorized and properly licensed third party, you do not -# have any rights to access nor to use this code. -# -# Absent a written agreement with Cloudera, Inc. ("Cloudera") to the -# contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY -# KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED -# WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO -# IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND -# FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU, -# AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS -# ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE -# OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR -# CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES -# RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF -# BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF -# DATA. -# - import time import uuid from typing import Optional, Generator @@ -86,7 +56,7 @@ from app.services.query.query_configuration import QueryConfiguration -def v3_chat( +def stream_chat( session: Session, query: str, configuration: RagPredictConfiguration, From cd960a1342a38c0450556f1f868c99e73181104f Mon Sep 17 00:00:00 2001 From: Elijah Williams Date: Wed, 7 May 2025 11:21:27 -0600 Subject: [PATCH 41/41] nits --- llm-service/app/routers/index/sessions/__init__.py | 2 -- llm-service/app/services/chat/streaming_chat.py | 8 ++++++-- ui/src/api/chatApi.ts | 5 ++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index 7b350e63..4223500c 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -241,6 +241,4 @@ def generate_stream() -> Generator[str, None, None]: logger.exception("Failed to stream chat completion") yield f'data: {{"error" : "{e}"}}\n\n' - # kick off evals with full response - # todo: write to history, start evals, rewrite question, log to mlfow once the response is done return StreamingResponse(generate_stream(), media_type="text/event-stream") diff --git a/llm-service/app/services/chat/streaming_chat.py b/llm-service/app/services/chat/streaming_chat.py index 5cdad4c5..ff173448 100644 --- a/llm-service/app/services/chat/streaming_chat.py +++ b/llm-service/app/services/chat/streaming_chat.py @@ -48,8 +48,12 @@ from app.rag_types import RagPredictConfiguration from app.services import evaluators, llm_completion from app.services.chat.utils import retrieve_chat_history, format_source_nodes -from app.services.chat_history.chat_history_manager import RagStudioChatMessage, RagMessage, Evaluation, \ - chat_history_manager +from app.services.chat_history.chat_history_manager import ( + RagStudioChatMessage, + RagMessage, + Evaluation, + chat_history_manager, +) from app.services.metadata_apis.session_metadata_api import Session from app.services.mlflow import record_rag_mlflow_run, record_direct_llm_mlflow_run from app.services.query import querier diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts index 50938b02..7411d2f2 100644 --- a/ui/src/api/chatApi.ts +++ b/ui/src/api/chatApi.ts @@ -37,6 +37,7 @@ ******************************************************************************/ import { + commonHeaders, getRequest, llmServicePath, MutationKeys, @@ -426,9 +427,7 @@ const streamChatMutation = async ( `${llmServicePath}/sessions/${request.session_id.toString()}/stream-completion`, { method: "POST", - headers: { - "Content-Type": "application/json", - }, + headers: commonHeaders, body: JSON.stringify({ query: request.query, configuration: request.configuration,