Skip to content

Commit bf291d0

Browse files
authored
Fix missing json (#3177)
* initial steps * k * remove logs * k * k
1 parent 8309f4a commit bf291d0

File tree

3 files changed

+15
-5
lines changed

3 files changed

+15
-5
lines changed

backend/danswer/llm/answering/stream_processing/quotes_processing.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -231,16 +231,16 @@ def process_token(
231231

232232
model_previous = self.model_output
233233
self.model_output += token
234-
235234
if not self.found_answer_start:
236235
m = answer_pattern.search(self.model_output)
237236
if m:
238237
self.found_answer_start = True
239238

240239
# Prevent heavy cases of hallucinations
241-
if self.is_json_prompt and len(self.model_output) > 70:
242-
logger.warning("LLM did not produce json as prompted")
240+
if self.is_json_prompt and len(self.model_output) > 400:
243241
self.found_answer_end = True
242+
logger.warning("LLM did not produce json as prompted")
243+
logger.debug("Model output thus far:", self.model_output)
244244
return
245245

246246
remaining = self.model_output[m.end() :]

backend/danswer/utils/text_processing.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
import string
55
from urllib.parse import quote
66

7+
from danswer.utils.logger import setup_logger
8+
9+
10+
logger = setup_logger(__name__)
711

812
ESCAPE_SEQUENCE_RE = re.compile(
913
r"""
@@ -77,7 +81,8 @@ def extract_embedded_json(s: str) -> dict:
7781
last_brace_index = s.rfind("}")
7882

7983
if first_brace_index == -1 or last_brace_index == -1:
80-
raise ValueError("No valid json found")
84+
logger.warning("No valid json found, assuming answer is entire string")
85+
return {"answer": s, "quotes": []}
8186

8287
json_str = s[first_brace_index : last_brace_index + 1]
8388
try:

backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,8 +324,13 @@ def test_lengthy_prefixed_json_with_quotes() -> None:
324324
assert quotes[0] == "Document"
325325

326326

327-
def test_prefixed_json_with_quotes() -> None:
327+
def test_json_with_lengthy_prefix_and_quotes() -> None:
328328
tokens = [
329+
"*** Based on the provided documents, there does not appear to be any information ",
330+
"directly relevant to answering which documents are my favorite. ",
331+
"The documents seem to be focused on describing the Danswer product ",
332+
"and its features/use cases. Since I do not have personal preferences ",
333+
"for documents, I will provide a general response:\n\n",
329334
"```",
330335
"json",
331336
"\n",

0 commit comments

Comments
 (0)