Skip to content

Commit 8c8b5bc

Browse files
fix: Display whisper-hash in case of errors (#180)
Signed-off-by: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com>
1 parent f194a2e commit 8c8b5bc

File tree

3 files changed

+41
-27
lines changed

3 files changed

+41
-27
lines changed

src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/constants.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,8 @@ class WhispererEnv:
3939
Can be used to alter behaviour at runtime.
4040
4141
Attributes:
42-
POLL_INTERVAL: Time in seconds to wait before polling
43-
LLMWhisperer's status API. Defaults to 30s
44-
MAX_POLLS: Total number of times to poll the status API.
45-
Set to -1 to poll indefinitely. Defaults to -1
46-
STATUS_RETRIES: Number of times to retry calling LLLMWhisperer's
47-
status API on failure during polling. Defaults to 5.
42+
WAIT_TIMEOUT: Timeout for the extraction in seconds. Defaults to 300s
43+
LOG_LEVEL: Logging level for the client library. Defaults to INFO
4844
"""
4945

5046
WAIT_TIMEOUT = "ADAPTER_LLMW_WAIT_TIMEOUT"
@@ -108,6 +104,6 @@ class WhispererDefaults:
108104
URL_IN_POST = False
109105
TAG = "default"
110106
TEXT_ONLY = False
111-
WAIT_TIMEOUT = int(os.getenv(WhispererEnv.WAIT_TIMEOUT, 300))
107+
WAIT_TIMEOUT = int(os.getenv(WhispererEnv.WAIT_TIMEOUT, 900))
112108
WAIT_FOR_COMPLETION = True
113109
LOGGING_LEVEL = os.getenv(WhispererEnv.LOG_LEVEL, "INFO")

src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/helper.py

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -60,38 +60,41 @@ def test_connection_request(
6060
"Unable to connect to LLMWhisperer service, please check the URL",
6161
actual_err=e,
6262
status_code=503,
63-
)
63+
) from e
6464
except Timeout as e:
6565
msg = "Request to LLMWhisperer has timed out"
6666
logger.error(f"{msg}: {e}")
67-
raise ExtractorError(msg, actual_err=e, status_code=504)
67+
raise ExtractorError(msg, actual_err=e, status_code=504) from e
6868
except HTTPError as e:
6969
logger.error(f"Adapter error: {e}")
7070
default_err = "Error while calling the LLMWhisperer service"
7171
msg = AdapterUtils.get_msg_from_request_exc(
7272
err=e, message_key="message", default_err=default_err
7373
)
74-
raise ExtractorError(msg, status_code=e.response.status_code, actual_err=e)
74+
raise ExtractorError(
75+
msg, status_code=e.response.status_code, actual_err=e
76+
) from e
7577

7678
@staticmethod
7779
def make_request(
7880
config: dict[str, Any],
7981
headers: dict[str, Any] | None = None,
8082
params: dict[str, Any] | None = None,
81-
data: Any | None = None,
83+
data: BytesIO | None = None,
8284
type: str = "whisper",
8385
) -> Response:
8486
"""Makes a request to LLMWhisperer service.
8587
8688
Args:
87-
request_method (HTTPMethod): HTTPMethod to call. Can be GET or POST
88-
request_endpoint (str): LLMWhisperer endpoint to hit
89+
config (dict[str, Any]): LLMWhisperer config to use
8990
headers (Optional[dict[str, Any]], optional): Headers to pass.
9091
Defaults to None.
9192
params (Optional[dict[str, Any]], optional): Query params to pass.
9293
Defaults to None.
93-
data (Optional[Any], optional): Data to pass in case of POST.
94+
data (Optional[BytesIO], optional): Data to pass in case of POST.
9495
Defaults to None.
96+
type (str, optional): Type of request / endpoint in LLMWhisperer.
97+
Defaults to "whisper".
9598
9699
Returns:
97100
Response: Response from the request
@@ -110,11 +113,19 @@ def make_request(
110113
if type == "whisper":
111114
response = client.whisper(**params, stream=data)
112115
if response["status_code"] == 200:
116+
logger.debug(
117+
"Successfully extracted for whisper hash: "
118+
f"{response.get(X2TextConstants.WHISPER_HASH_V2, '')}"
119+
)
113120
response["extraction"][X2TextConstants.WHISPER_HASH_V2] = (
114121
response.get(X2TextConstants.WHISPER_HASH_V2, "")
115122
)
116123
return response["extraction"]
117124
else:
125+
response["message"] += (
126+
". Whisper hash: "
127+
f"{response.get(X2TextConstants.WHISPER_HASH_V2, '')}"
128+
)
118129
raise ExtractorError(
119130
response["message"],
120131
response["status_code"],
@@ -130,18 +141,18 @@ def make_request(
130141
"Unable to connect to LLMWhisperer service, please check the URL",
131142
actual_err=e,
132143
status_code=503,
133-
)
144+
) from e
134145
except Timeout as e:
135146
msg = "Request to LLMWhisperer has timed out"
136147
logger.error(f"{msg}: {e}")
137-
raise ExtractorError(msg, actual_err=e, status_code=504)
148+
raise ExtractorError(msg, actual_err=e, status_code=504) from e
138149
except LLMWhispererClientException as e:
139150
logger.error(f"LLM Whisperer error: {e}")
140151
raise ExtractorError(
141152
message=f"LLM Whisperer error: {e}",
142153
actual_err=e,
143154
status_code=500,
144-
)
155+
) from e
145156

146157
return response
147158

@@ -251,7 +262,7 @@ def send_whisper_request(
251262
response["line_metadata"] = highlight_data
252263
except OSError as e:
253264
logger.error(f"OS error while reading {input_file_path}: {e}")
254-
raise ExtractorError(str(e))
265+
raise ExtractorError(str(e)) from e
255266
return response
256267

257268
@staticmethod
@@ -261,10 +272,12 @@ def make_highlight_data_request(
261272
"""Makes a call to get highlight data from LLMWhisperer.
262273
263274
Args:
275+
config (dict[str, Any]): LLMWhisperer config to use
264276
whisper_hash (str): Identifier of the extraction
277+
enable_highlight (bool): Whether to enable highlight
265278
266279
Returns:
267-
str: Extracted contents from the file
280+
dict[Any, Any]: Highlight data
268281
"""
269282
logger.info(f"Extracting async for whisper hash: {whisper_hash}")
270283

@@ -307,14 +320,17 @@ def write_output_to_file(
307320
output_file_path: Path,
308321
fs: FileStorage = FileStorage(provider=FileStorageProvider.LOCAL),
309322
) -> None:
310-
"""Writes the extracted text and metadata to the specified output file
323+
"""Write LLMW outputs to file.
324+
325+
Writes the extracted text and metadata to the specified output file
311326
and metadata file.
312327
313328
Args:
314329
output_json (dict): The dictionary containing the extracted data,
315330
with "text" as the key for the main content.
316331
output_file_path (Path): The file path where the extracted text
317332
should be written.
333+
fs (FileStorage): File storage instance to use for writing
318334
319335
Raises:
320336
ExtractorError: If there is an error while writing the output file.
@@ -330,7 +346,7 @@ def write_output_to_file(
330346
)
331347
except Exception as e:
332348
logger.error(f"Error while writing {output_file_path}: {e}")
333-
raise ExtractorError(str(e))
349+
raise ExtractorError(str(e)) from e
334350
try:
335351
# Define the directory of the output file and metadata paths
336352
output_dir = output_file_path.parent

src/unstract/sdk/prompt.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,13 @@ def __init__(
2121
prompt_port: str,
2222
is_public_call: bool = False,
2323
) -> None:
24-
"""Args:
25-
tool (AbstractTool): Instance of AbstractTool
26-
prompt_host (str): Host of platform service
27-
prompt_host (str): Port of platform service
24+
"""Class to interact with prompt-service.
25+
26+
Args:
27+
tool (AbstractTool): Instance of AbstractTool
28+
prompt_host (str): Host of platform service
29+
prompt_port (str): Port of platform service
30+
is_public_call (bool): Whether the call is public. Defaults to False
2831
"""
2932
self.tool = tool
3033
self.base_url = SdkHelper.get_platform_base_url(prompt_host, prompt_port)
@@ -113,8 +116,7 @@ def _post_call(
113116
params: dict[str, str] | None = None,
114117
headers: dict[str, str] | None = None,
115118
) -> dict[str, Any]:
116-
"""Invokes and communicates to prompt service to fetch response for the
117-
prompt.
119+
"""Communicates to prompt service to fetch response for the prompt.
118120
119121
Args:
120122
url_path (str): URL path to the service endpoint

0 commit comments

Comments
 (0)