Token Level Log (#3238)

yuhongsun96 · web-flow · commit 413891f14309 · 2024-11-23T18:41:50.000-08:00
diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py
@@ -422,6 +422,9 @@
 LOG_DANSWER_MODEL_INTERACTIONS = (
     os.environ.get("LOG_DANSWER_MODEL_INTERACTIONS", "").lower() == "true"
 )
+LOG_INDIVIDUAL_MODEL_TOKENS = (
+    os.environ.get("LOG_INDIVIDUAL_MODEL_TOKENS", "").lower() == "true"
+)
 # If set to `true` will enable additional logs about Vespa query performance
 # (time spent on finding the right docs + time spent fetching summaries from disk)
 LOG_VESPA_TIMING_INFORMATION = (
diff --git a/backend/danswer/llm/answering/stream_processing/answer_response_handler.py b/backend/danswer/llm/answering/stream_processing/answer_response_handler.py
@@ -13,6 +13,9 @@
     QuotesProcessor,
 )
 from danswer.llm.answering.stream_processing.utils import DocumentIdOrderMapping
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
 
 
 class AnswerResponseHandler(abc.ABC):
@@ -48,6 +51,9 @@ def __init__(
         self.processed_text = ""
         self.citations: list[CitationInfo] = []
 
+        # TODO remove this after citation issue is resolved
+        logger.debug(f"Document to ranking map {self.doc_id_to_rank_map}")
+
     def handle_response_part(
         self,
         response_item: BaseMessage | None,
diff --git a/backend/danswer/llm/interfaces.py b/backend/danswer/llm/interfaces.py
@@ -9,6 +9,7 @@
 
 from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
 from danswer.configs.app_configs import LOG_DANSWER_MODEL_INTERACTIONS
+from danswer.configs.app_configs import LOG_INDIVIDUAL_MODEL_TOKENS
 from danswer.utils.logger import setup_logger
 
 
@@ -117,10 +118,19 @@ def stream(
         self._precall(prompt)
         # TODO add a postcall to log model outputs independent of concrete class
         # implementation
-        return self._stream_implementation(
+        messages = self._stream_implementation(
             prompt, tools, tool_choice, structured_response_format
         )
 
+        tokens = []
+        for message in messages:
+            if LOG_INDIVIDUAL_MODEL_TOKENS:
+                tokens.append(message.content)
+            yield message
+
+        if LOG_INDIVIDUAL_MODEL_TOKENS and tokens:
+            logger.debug(f"Model Tokens: {tokens}")
+
     @abc.abstractmethod
     def _stream_implementation(
         self,
diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml
@@ -83,6 +83,7 @@ services:
       - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # LiteLLM Verbose Logging
       # Log all of Danswer prompts and interactions with the LLM
       - LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-}
+      - LOG_INDIVIDUAL_MODEL_TOKENS=${LOG_INDIVIDUAL_MODEL_TOKENS:-}
       # If set to `true` will enable additional logs about Vespa query performance
       # (time spent on finding the right docs + time spent fetching summaries from disk)
       - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
@@ -204,6 +205,7 @@ services:
       - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # LiteLLM Verbose Logging
       # Log all of Danswer prompts and interactions with the LLM
       - LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-}
+      - LOG_INDIVIDUAL_MODEL_TOKENS=${LOG_INDIVIDUAL_MODEL_TOKENS:-}
       - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
 
       # Analytics Configs