Skip to content

Commit 070697d

Browse files
committed
fix: leave room for new tokens when truncating
1 parent d895af0 commit 070697d

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

api/chatbot/agent.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from langchain_core.messages import BaseMessage, SystemMessage, trim_messages
77
from langchain_core.prompts import ChatPromptTemplate
88
from langgraph.graph import END, START, MessagesState, StateGraph
9+
from loguru import logger
910

1011
from chatbot.safety import create_hazard_classifier, hazard_categories
1112

@@ -29,7 +30,15 @@ def create_agent(
2930
if hasattr(chat_model, "get_num_tokens_from_messages"):
3031
token_counter = chat_model.get_num_tokens_from_messages
3132
else:
33+
logger.warning(
34+
"Could not get token counter function from chat model, will truncate messages by message count. This may lead to context overflow."
35+
)
3236
token_counter = len
37+
if max_tokens is None:
38+
raise ValueError("`None` passed as `max_tokens` which is not allowed")
39+
40+
# leave 0.2 for new tokens
41+
_max_tokens = int(max_tokens * 0.8)
3342

3443
hazard_classifier = None
3544
if safety_model is not None:
@@ -94,7 +103,7 @@ async def chatbot(state: MessagesState) -> MessagesState:
94103
windowed_messages: list[BaseMessage] = trim_messages(
95104
all_messages,
96105
token_counter=token_counter,
97-
max_tokens=max_tokens,
106+
max_tokens=_max_tokens,
98107
start_on="human", # This means that the first message should be from the user after trimming.
99108
)
100109

0 commit comments

Comments
 (0)