|
12 | 12 | "html_file, expected_tag_to_xpath, expected_page_text, expected_tag_string",
|
13 | 13 | [
|
14 | 14 | (
|
15 |
| - "mock_html/text_only.html", |
| 15 | + "text_only.html", |
16 | 16 | {0: "//html/body/h1"},
|
17 | 17 | ["Hello, World!"],
|
18 | 18 | ["[ 0 ]"],
|
19 | 19 | ),
|
20 | 20 | (
|
21 |
| - "mock_html/hyperlink_only.html", |
| 21 | + "hyperlink_only.html", |
22 | 22 | {0: '//html/body/p/a[@id="link1"]'},
|
23 | 23 | ["Example Link 1"],
|
24 | 24 | ["[ @ 0 ]"],
|
25 | 25 | ),
|
26 | 26 | (
|
27 |
| - "mock_html/interactable_only.html", |
| 27 | + "interactable_only.html", |
28 | 28 | {
|
29 | 29 | 0: '//html/body/button[@id="button"]',
|
30 | 30 | 1: '//html/body/input[@id="checkbox"]',
|
|
33 | 33 | ["[ $ 0 ]", "[ $ 1 ]"],
|
34 | 34 | ),
|
35 | 35 | (
|
36 |
| - "mock_html/combination.html", |
| 36 | + "combination.html", |
37 | 37 | {
|
38 | 38 | 0: '//html/body/input[1][@id="text"]',
|
39 | 39 | 1: '//html/body/input[2][@id="checkbox"]',
|
|
43 | 43 | ["[ # 0 ]", "[ $ 1 ]", "[ 2 ]"],
|
44 | 44 | ),
|
45 | 45 | (
|
46 |
| - "mock_html/insertable_only.html", |
| 46 | + "insertable_only.html", |
47 | 47 | {0: '//html/body/input[@id="text"]'},
|
48 | 48 | ["Enter text here"],
|
49 | 49 | ["[ # 0 ]"],
|
50 | 50 | ),
|
51 | 51 | (
|
52 |
| - "mock_html/br_elem.html", |
| 52 | + "br_elem.html", |
53 | 53 | {
|
54 | 54 | 0: "//html/body/div",
|
55 | 55 | 1: "//html/body/div",
|
|
59 | 59 | ["[ 0 ]", "[ 1 ]", "[ 2 ]"],
|
60 | 60 | ),
|
61 | 61 | (
|
62 |
| - "mock_html/display_contents.html", |
| 62 | + "display_contents.html", |
63 | 63 | {
|
64 | 64 | 0: "//html/body/div",
|
65 | 65 | },
|
66 | 66 | ["Display contents"],
|
67 | 67 | ["[ 0 ]"],
|
68 | 68 | ),
|
69 | 69 | (
|
70 |
| - "mock_html/icon_buttons.html", |
| 70 | + "icon_buttons.html", |
71 | 71 | {
|
72 | 72 | 0: "//html/body/button[1]",
|
73 | 73 | 1: "//html/body/button[2]",
|
|
76 | 76 | ["[ $ 0 ]", "[ $ 1 ]"],
|
77 | 77 | ),
|
78 | 78 | (
|
79 |
| - "mock_html/image.html", |
| 79 | + "image.html", |
80 | 80 | {},
|
81 | 81 | ["Hello World"],
|
82 | 82 | [],
|
83 | 83 | ),
|
84 | 84 | pytest.param(
|
85 |
| - "mock_html/japanese.html", |
| 85 | + "japanese.html", |
86 | 86 | {
|
87 | 87 | 0: '//html/body/p[@id="japanese"]',
|
88 | 88 | },
|
|
93 | 93 | ),
|
94 | 94 | ),
|
95 | 95 | pytest.param(
|
96 |
| - "mock_html/russian.html", |
| 96 | + "russian.html", |
97 | 97 | {
|
98 | 98 | 0: '//html/body/p[@id="russian"]',
|
99 | 99 | },
|
|
104 | 104 | ),
|
105 | 105 | ),
|
106 | 106 | pytest.param(
|
107 |
| - "mock_html/chinese.html", |
| 107 | + "chinese.html", |
108 | 108 | {
|
109 | 109 | 0: '//html/body/p[@id="chinese"]',
|
110 | 110 | },
|
|
115 | 115 | ),
|
116 | 116 | ),
|
117 | 117 | pytest.param(
|
118 |
| - "mock_html/arabic.html", |
| 118 | + "arabic.html", |
119 | 119 | {
|
120 | 120 | 0: '//html/body/p[@id="arabic"]',
|
121 | 121 | },
|
|
126 | 126 | ),
|
127 | 127 | ),
|
128 | 128 | pytest.param(
|
129 |
| - "mock_html/hindi.html", |
| 129 | + "hindi.html", |
130 | 130 | {
|
131 | 131 | 0: '//html/body/p[@id="hindi"]',
|
132 | 132 | },
|
|
137 | 137 | ),
|
138 | 138 | ),
|
139 | 139 | (
|
140 |
| - "mock_html/dropdown.html", |
| 140 | + "dropdown.html", |
141 | 141 | {
|
142 | 142 | 0: "//html/body/label",
|
143 | 143 | },
|
144 | 144 | ["Option 1"],
|
145 | 145 | ["[ $ 0 ]"],
|
146 | 146 | ),
|
147 | 147 | (
|
148 |
| - "mock_html/iframe.html", |
| 148 | + "iframe.html", |
149 | 149 | {
|
150 | 150 | 0: "iframe[0]//html/body/p",
|
151 | 151 | },
|
|
156 | 156 | )
|
157 | 157 | async def test_combined_elements_page(
|
158 | 158 | tarsier,
|
159 |
| - async_page, |
| 159 | + page_context, |
160 | 160 | html_file,
|
161 | 161 | expected_tag_to_xpath,
|
162 | 162 | expected_page_text,
|
163 | 163 | expected_tag_string,
|
164 | 164 | ):
|
165 |
| - html_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), html_file)) |
166 |
| - await async_page.goto(f"file://{html_file_path}") |
167 |
| - |
168 |
| - page_text, tag_to_xpath = await tarsier.page_to_text( |
169 |
| - async_page, tag_text_elements=True |
170 |
| - ) |
| 165 | + async with page_context(html_file) as page: |
| 166 | + page_text, tag_to_xpath = await tarsier.page_to_text( |
| 167 | + page, tag_text_elements=True |
| 168 | + ) |
171 | 169 |
|
172 |
| - assert tag_to_xpath == expected_tag_to_xpath, ( |
173 |
| - f"tag_to_xpath does not match expected output for " |
174 |
| - f"{html_file}. Got: {tag_to_xpath}" |
175 |
| - ) |
| 170 | + assert tag_to_xpath == expected_tag_to_xpath, ( |
| 171 | + f"tag_to_xpath does not match expected output for " |
| 172 | + f"{html_file}. Got: {tag_to_xpath}" |
| 173 | + ) |
176 | 174 |
|
177 |
| - # TODO: revert to testing against entire string when colour tagging is merged |
178 |
| - for expected_text in expected_page_text: |
179 |
| - normalized_expected_text = "".join( |
180 |
| - expected_text.split() |
181 |
| - ) # Remove whitespace from expected text |
182 |
| - page_text_combined = "".join(page_text).replace(" ", "") |
| 175 | + # TODO: revert to testing against entire string when colour tagging is merged |
| 176 | + for expected_text in expected_page_text: |
| 177 | + normalized_expected_text = "".join( |
| 178 | + expected_text.split() |
| 179 | + ) # Remove whitespace from expected text |
| 180 | + page_text_combined = "".join(page_text).replace(" ", "") |
183 | 181 |
|
184 |
| - assert all(char in page_text_combined for char in normalized_expected_text), ( |
185 |
| - f"Expected text '{expected_text}' not found in page text for {html_file}. " |
186 |
| - f"Got: {page_text}" |
187 |
| - ) |
| 182 | + assert all( |
| 183 | + char in page_text_combined for char in normalized_expected_text |
| 184 | + ), ( |
| 185 | + f"Expected text '{expected_text}' not found in page text for {html_file}. " |
| 186 | + f"Got: {page_text}" |
| 187 | + ) |
188 | 188 |
|
189 |
| - for expected_tag in expected_tag_string: |
190 |
| - assert expected_tag in page_text, ( |
191 |
| - f"Expected tag '{expected_tag}' not found in page text for {html_file}. " |
192 |
| - f"Got: {page_text}" |
193 |
| - ) |
| 189 | + for expected_tag in expected_tag_string: |
| 190 | + assert expected_tag in page_text, ( |
| 191 | + f"Expected tag '{expected_tag}' not found in page text for {html_file}. " |
| 192 | + f"Got: {page_text}" |
| 193 | + ) |
194 | 194 |
|
195 | 195 |
|
196 | 196 | @pytest.mark.asyncio
|
197 |
| -async def test_text_nodes_are_query_selectable(async_page): |
198 |
| - text_node_html_path = os.path.abspath( |
199 |
| - os.path.join(os.path.dirname(__file__), "mock_html/text_nodes.html") |
200 |
| - ) |
201 |
| - await async_page.goto(f"file://{text_node_html_path}") |
202 |
| - tarsier = Tarsier(DummyOCRService()) |
203 |
| - _, tag_to_xpath = await tarsier.page_to_text(async_page, tag_text_elements=True) |
| 197 | +async def test_text_nodes_are_query_selectable(page_context): |
| 198 | + async with page_context("text_nodes.html") as page: |
| 199 | + tarsier = Tarsier(DummyOCRService()) |
| 200 | + _, tag_to_xpath = await tarsier.page_to_text(page, tag_text_elements=True) |
204 | 201 |
|
205 |
| - # Query selector will specifically filter out TextNodes within XPath selectors |
206 |
| - # As a result, the tagged xpath for the text node should belong to the parent |
207 |
| - # https://github.com/microsoft/playwright/blob/main/packages/playwright-core/src/server/injected/xpathSelectorEngine.ts#L29-L30) |
208 |
| - assert len(tag_to_xpath) == 2 |
209 |
| - assert await async_page.query_selector(tag_to_xpath[0]) |
210 |
| - assert await async_page.query_selector(tag_to_xpath[1]) |
| 202 | + # Query selector will specifically filter out TextNodes within XPath selectors |
| 203 | + # As a result, the tagged xpath for the text node should belong to the parent |
| 204 | + # https://github.com/microsoft/playwright/blob/main/packages/playwright-core/src/server/injected/xpathSelectorEngine.ts#L29-L30) |
| 205 | + assert len(tag_to_xpath) == 2 |
| 206 | + assert await page.query_selector(tag_to_xpath[0]) |
| 207 | + assert await page.query_selector(tag_to_xpath[1]) |
211 | 208 |
|
212 | 209 |
|
213 | 210 | @pytest.mark.asyncio
|
214 |
| -async def test_dropdown_text_not_shown(tarsier, async_page): |
215 |
| - dropdown_html_path = os.path.abspath( |
216 |
| - os.path.join(os.path.dirname(__file__), "mock_html/dropdown.html") |
217 |
| - ) |
218 |
| - await async_page.goto(f"file://{dropdown_html_path}") |
219 |
| - page_text, tag_to_xpath = await tarsier.page_to_text( |
220 |
| - async_page, tag_text_elements=True |
221 |
| - ) |
| 211 | +async def test_dropdown_text_not_shown(tarsier, page_context): |
| 212 | + async with page_context("dropdown.html") as page: |
| 213 | + page_text, tag_to_xpath = await tarsier.page_to_text( |
| 214 | + page, tag_text_elements=True |
| 215 | + ) |
222 | 216 |
|
223 |
| - assert "[ $ 1 ]" not in page_text |
224 |
| - assert "[ $ 2 ]" not in page_text |
225 |
| - assert "[ $ 3 ]" not in page_text |
226 |
| - assert "[ $ 4 ]" not in page_text |
227 |
| - assert "Option 2" not in page_text |
228 |
| - assert "Option 3" not in page_text |
229 |
| - assert "Option 4" not in page_text |
| 217 | + assert "[ $ 1 ]" not in page_text |
| 218 | + assert "[ $ 2 ]" not in page_text |
| 219 | + assert "[ $ 3 ]" not in page_text |
| 220 | + assert "[ $ 4 ]" not in page_text |
| 221 | + assert "Option 2" not in page_text |
| 222 | + assert "Option 3" not in page_text |
| 223 | + assert "Option 4" not in page_text |
0 commit comments