11
11
12
12
logger = logging .getLogger (__name__ )
13
13
14
+ # Test tolerance constants for better maintainability
15
+ COORDINATE_TOLERANCE = 2
16
+ PERCENTAGE_TOLERANCE = 0.05
17
+ PAGE_HEIGHT_TOLERANCE = 5
18
+ OCR_SIMILARITY_THRESHOLD = 0.90
19
+
14
20
15
21
def test_get_usage_info (client_v2 : LLMWhispererClientV2 ) -> None :
16
22
usage_info = client_v2 .get_usage_info ()
@@ -28,6 +34,7 @@ def test_get_usage_info(client_v2: LLMWhispererClientV2) -> None:
28
34
"overage_page_count" ,
29
35
"subscription_plan" ,
30
36
"today_page_count" ,
37
+ "current_page_count_table" ,
31
38
]
32
39
assert set (usage_info .keys ()) == set (expected_keys ), f"usage_info { usage_info } does not contain the expected keys"
33
40
@@ -103,12 +110,12 @@ def test_highlight(client_v2: LLMWhispererClientV2, data_dir: str, input_file: s
103
110
104
111
# Assert line 2 data
105
112
line2 = highlight_data ["2" ]
106
- assert line2 ["base_y" ] == 155
107
- assert line2 ["base_y_percent" ] == pytest .approx (4.8927 ) # Using approx for float comparison
108
- assert line2 ["height" ] == 51
109
- assert line2 ["height_percent" ] == pytest .approx (1.6098 ) # Using approx for float comparison
113
+ assert line2 ["base_y" ] == pytest . approx ( 155 , abs = COORDINATE_TOLERANCE )
114
+ assert line2 ["base_y_percent" ] == pytest .approx (4.8927 , abs = PERCENTAGE_TOLERANCE )
115
+ assert line2 ["height" ] == pytest . approx ( 51 , abs = COORDINATE_TOLERANCE )
116
+ assert line2 ["height_percent" ] == pytest .approx (1.6098 , abs = PERCENTAGE_TOLERANCE )
110
117
assert line2 ["page" ] == 0
111
- assert line2 ["page_height" ] == 3168
118
+ assert line2 ["page_height" ] == pytest . approx ( 3168 , abs = PAGE_HEIGHT_TOLERANCE )
112
119
113
120
114
121
@pytest .mark .parametrize (
@@ -170,7 +177,7 @@ def test_whisper_v2_url_in_post(
170
177
"url,token,webhook_name" ,
171
178
[
172
179
(
173
- " https://webhook.site/0990fff9-ce95-4d11-95e1-be9ad38c40d6" , # need to find a clean solution
180
+ os . getenv ( "WEBHOOK_TEST_URL" , " https://httpbin.org/post" ) , # configurable via env var, defaults to httpbin.org
174
181
"" ,
175
182
"client_v2_test" ,
176
183
),
@@ -237,13 +244,13 @@ def assert_extracted_text(file_path: str, whisper_result: dict, mode: str, outpu
237
244
assert whisper_result ["status_code" ] == 200
238
245
239
246
# For OCR based processing
240
- threshold = 0.94
247
+ threshold = OCR_SIMILARITY_THRESHOLD
241
248
242
249
# For text based processing
243
250
if mode == "native_text" and output_mode == "text" :
244
251
threshold = 0.99
245
252
elif mode == "low_cost" :
246
- threshold = 0.90
253
+ threshold = OCR_SIMILARITY_THRESHOLD
247
254
extracted_text = whisper_result ["extraction" ]["result_text" ]
248
255
similarity = SequenceMatcher (None , extracted_text , exp ).ratio ()
249
256
0 commit comments