Skip to content

Commit ad9c474

Browse files
Improve code quality and bump version to 2.4.1
Code Quality Improvements: - Fix sample.env: Update webhook URL to httpbin.org for consistency - Add test tolerance constants for better maintainability: * COORDINATE_TOLERANCE = 2 * PERCENTAGE_TOLERANCE = 0.05 * PAGE_HEIGHT_TOLERANCE = 5 * OCR_SIMILARITY_THRESHOLD = 0.90 - Improve error message consistency with "API error:" prefix - Add response truncation (500 chars) to prevent log pollution - Use constants in all test assertions for easier maintenance Version Bump: - Update version from 2.4.0 to 2.4.1 - Patch version bump reflects bug fixes and reliability improvements - No breaking changes, backward compatible enhancements 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 56f5f14 commit ad9c474

File tree

4 files changed

+21
-13
lines changed

4 files changed

+21
-13
lines changed

sample.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ LLMWHISPERER_BASE_URL=https://llmwhisperer-api.unstract.com/v1
22
LLMWHISPERER_BASE_URL_V2=https://llmwhisperer-api.us-central.unstract.com/api/v2
33
LLMWHISPERER_LOG_LEVEL=DEBUG
44
LLMWHISPERER_API_KEY=
5-
WEBHOOK_TEST_URL=https://webhook.site/7c69ca19-5853-4cd1-a4c5-03570b63dda4
5+
WEBHOOK_TEST_URL=https://httpbin.org/post

src/unstract/llmwhisperer/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "2.4.0"
1+
__version__ = "2.4.1"
22

33
from .client_v2 import LLMWhispererClientV2 # noqa: F401
44

src/unstract/llmwhisperer/client_v2.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -447,13 +447,15 @@ def whisper_status(self, whisper_hash: str) -> Any:
447447
response = s.send(prepared, timeout=self.api_timeout)
448448
if response.status_code != 200:
449449
if not (response.text or "").strip():
450-
self.logger.error(f"Empty response body from API, status code: {response.status_code}")
451-
raise LLMWhispererClientException("Empty response body from API", response.status_code)
450+
self.logger.error(f"API error - empty response body, status code: {response.status_code}")
451+
raise LLMWhispererClientException("API error: empty response body", response.status_code)
452452
try:
453453
err = json.loads(response.text)
454454
except json.JSONDecodeError as e:
455-
self.logger.error(f"JSON decode error: {e}; Response text: {response.text!r}")
456-
raise LLMWhispererClientException(f"Non-JSON response: {response.text}", response.status_code) from e
455+
# Truncate response text if too long to avoid log pollution
456+
response_preview = response.text[:500] + "..." if len(response.text) > 500 else response.text
457+
self.logger.error(f"API error - JSON decode failed: {e}; Response preview: {response_preview!r}")
458+
raise LLMWhispererClientException(f"API error: non-JSON response - {response_preview}", response.status_code) from e
457459
raise LLMWhispererClientException(err, response.status_code)
458460
message = json.loads(response.text)
459461
message["status_code"] = response.status_code

tests/integration/client_v2_test.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111

1212
logger = logging.getLogger(__name__)
1313

14+
# Test tolerance constants for better maintainability
15+
COORDINATE_TOLERANCE = 2
16+
PERCENTAGE_TOLERANCE = 0.05
17+
PAGE_HEIGHT_TOLERANCE = 5
18+
OCR_SIMILARITY_THRESHOLD = 0.90
19+
1420

1521
def test_get_usage_info(client_v2: LLMWhispererClientV2) -> None:
1622
usage_info = client_v2.get_usage_info()
@@ -103,12 +109,12 @@ def test_highlight(client_v2: LLMWhispererClientV2, data_dir: str, input_file: s
103109

104110
# Assert line 2 data
105111
line2 = highlight_data["2"]
106-
assert line2["base_y"] == pytest.approx(155, abs=2)
107-
assert line2["base_y_percent"] == pytest.approx(4.8927, abs=0.05) # Using approx for float comparison
108-
assert line2["height"] == pytest.approx(51, abs=2)
109-
assert line2["height_percent"] == pytest.approx(1.6098, abs=0.05) # Using approx for float comparison
112+
assert line2["base_y"] == pytest.approx(155, abs=COORDINATE_TOLERANCE)
113+
assert line2["base_y_percent"] == pytest.approx(4.8927, abs=PERCENTAGE_TOLERANCE)
114+
assert line2["height"] == pytest.approx(51, abs=COORDINATE_TOLERANCE)
115+
assert line2["height_percent"] == pytest.approx(1.6098, abs=PERCENTAGE_TOLERANCE)
110116
assert line2["page"] == 0
111-
assert line2["page_height"] == pytest.approx(3168, abs=5)
117+
assert line2["page_height"] == pytest.approx(3168, abs=PAGE_HEIGHT_TOLERANCE)
112118

113119

114120
@pytest.mark.parametrize(
@@ -237,13 +243,13 @@ def assert_extracted_text(file_path: str, whisper_result: dict, mode: str, outpu
237243
assert whisper_result["status_code"] == 200
238244

239245
# For OCR based processing
240-
threshold = 0.90
246+
threshold = OCR_SIMILARITY_THRESHOLD
241247

242248
# For text based processing
243249
if mode == "native_text" and output_mode == "text":
244250
threshold = 0.99
245251
elif mode == "low_cost":
246-
threshold = 0.90
252+
threshold = OCR_SIMILARITY_THRESHOLD
247253
extracted_text = whisper_result["extraction"]["result_text"]
248254
similarity = SequenceMatcher(None, extracted_text, exp).ratio()
249255

0 commit comments

Comments
 (0)