Improve code quality and bump version to 2.4.1

muhammad-ali-e · claude · muhammad-ali-e · commit ad9c4740a288 · 2025-07-10T09:58:39.000+05:30
Code Quality Improvements: - Fix sample.env: Update webhook URL to httpbin.org for consistency - Add test tolerance constants for better maintainability: * COORDINATE_TOLERANCE = 2 * PERCENTAGE_TOLERANCE = 0.05 * PAGE_HEIGHT_TOLERANCE = 5 * OCR_SIMILARITY_THRESHOLD = 0.90 - Improve error message consistency with "API error:" prefix - Add response truncation (500 chars) to prevent log pollution - Use constants in all test assertions for easier maintenance Version Bump: - Update version from 2.4.0 to 2.4.1 - Patch version bump reflects bug fixes and reliability improvements - No breaking changes, backward compatible enhancements 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/sample.env b/sample.env
@@ -2,4 +2,4 @@ LLMWHISPERER_BASE_URL=https://llmwhisperer-api.unstract.com/v1
 LLMWHISPERER_BASE_URL_V2=https://llmwhisperer-api.us-central.unstract.com/api/v2
 LLMWHISPERER_LOG_LEVEL=DEBUG
 LLMWHISPERER_API_KEY=
-WEBHOOK_TEST_URL=https://webhook.site/7c69ca19-5853-4cd1-a4c5-03570b63dda4
+WEBHOOK_TEST_URL=https://httpbin.org/post
diff --git a/src/unstract/llmwhisperer/__init__.py b/src/unstract/llmwhisperer/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "2.4.0"
+__version__ = "2.4.1"
 
 from .client_v2 import LLMWhispererClientV2  # noqa: F401
 
diff --git a/src/unstract/llmwhisperer/client_v2.py b/src/unstract/llmwhisperer/client_v2.py
@@ -447,13 +447,15 @@ def whisper_status(self, whisper_hash: str) -> Any:
         response = s.send(prepared, timeout=self.api_timeout)
         if response.status_code != 200:
             if not (response.text or "").strip():
-                self.logger.error(f"Empty response body from API, status code: {response.status_code}")
-                raise LLMWhispererClientException("Empty response body from API", response.status_code)
+                self.logger.error(f"API error - empty response body, status code: {response.status_code}")
+                raise LLMWhispererClientException("API error: empty response body", response.status_code)
             try:
                 err = json.loads(response.text)
             except json.JSONDecodeError as e:
-                self.logger.error(f"JSON decode error: {e}; Response text: {response.text!r}")
-                raise LLMWhispererClientException(f"Non-JSON response: {response.text}", response.status_code) from e
+                # Truncate response text if too long to avoid log pollution
+                response_preview = response.text[:500] + "..." if len(response.text) > 500 else response.text
+                self.logger.error(f"API error - JSON decode failed: {e}; Response preview: {response_preview!r}")
+                raise LLMWhispererClientException(f"API error: non-JSON response - {response_preview}", response.status_code) from e
             raise LLMWhispererClientException(err, response.status_code)
         message = json.loads(response.text)
         message["status_code"] = response.status_code
diff --git a/tests/integration/client_v2_test.py b/tests/integration/client_v2_test.py
@@ -11,6 +11,12 @@
 
 logger = logging.getLogger(__name__)
 
+# Test tolerance constants for better maintainability
+COORDINATE_TOLERANCE = 2
+PERCENTAGE_TOLERANCE = 0.05
+PAGE_HEIGHT_TOLERANCE = 5
+OCR_SIMILARITY_THRESHOLD = 0.90
+
 
 def test_get_usage_info(client_v2: LLMWhispererClientV2) -> None:
     usage_info = client_v2.get_usage_info()
@@ -103,12 +109,12 @@ def test_highlight(client_v2: LLMWhispererClientV2, data_dir: str, input_file: s
 
     # Assert line 2 data
     line2 = highlight_data["2"]
-    assert line2["base_y"] == pytest.approx(155, abs=2)
-    assert line2["base_y_percent"] == pytest.approx(4.8927, abs=0.05)  # Using approx for float comparison
-    assert line2["height"] == pytest.approx(51, abs=2)
-    assert line2["height_percent"] == pytest.approx(1.6098, abs=0.05)  # Using approx for float comparison
+    assert line2["base_y"] == pytest.approx(155, abs=COORDINATE_TOLERANCE)
+    assert line2["base_y_percent"] == pytest.approx(4.8927, abs=PERCENTAGE_TOLERANCE)
+    assert line2["height"] == pytest.approx(51, abs=COORDINATE_TOLERANCE)
+    assert line2["height_percent"] == pytest.approx(1.6098, abs=PERCENTAGE_TOLERANCE)
     assert line2["page"] == 0
-    assert line2["page_height"] == pytest.approx(3168, abs=5)
+    assert line2["page_height"] == pytest.approx(3168, abs=PAGE_HEIGHT_TOLERANCE)
 
 
 @pytest.mark.parametrize(
@@ -237,13 +243,13 @@ def assert_extracted_text(file_path: str, whisper_result: dict, mode: str, outpu
     assert whisper_result["status_code"] == 200
 
     # For OCR based processing
-    threshold = 0.90
+    threshold = OCR_SIMILARITY_THRESHOLD
 
     # For text based processing
     if mode == "native_text" and output_mode == "text":
         threshold = 0.99
     elif mode == "low_cost":
-        threshold = 0.90
+        threshold = OCR_SIMILARITY_THRESHOLD
     extracted_text = whisper_result["extraction"]["result_text"]
     similarity = SequenceMatcher(None, extracted_text, exp).ratio()
 

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "2.4.0"`
	`1`	`+__version__ = "2.4.1"`
`2`	`2`
`3`	`3`	`from .client_v2 import LLMWhispererClientV2 # noqa: F401`
`4`	`4`