Skip to content

Commit 8522067

Browse files
Merge pull request #27 from scivisum/task/RD-37208_get_frame_source_by_xpath
Task/rd 37208 get frame source by xpath
2 parents ae55194 + b296cfc commit 8522067

File tree

14 files changed

+526
-49
lines changed

14 files changed

+526
-49
lines changed

browserdebuggertools/chrome/interface.py

Lines changed: 140 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
import logging
33
from base64 import b64decode, b64encode
44

5+
from lxml.etree import XPath, XPathSyntaxError
6+
7+
from browserdebuggertools.exceptions import InvalidXPathError, ResourceNotFoundError
58
from browserdebuggertools.sockethandler import SocketHandler
69

710

@@ -29,6 +32,7 @@ def __init__(self, port, timeout=30, domains=None):
2932
is a dictionary of the arguments passed with the domain upon enabling.
3033
"""
3134
self._socket_handler = SocketHandler(port, timeout, domains=domains) # type: SocketHandler
35+
self._dom_manager = _DOMManager(self._socket_handler)
3236

3337
def quit(self):
3438
self._socket_handler.close()
@@ -37,6 +41,7 @@ def reset(self):
3741
""" Clears all stored messages
3842
"""
3943
self._socket_handler.reset()
44+
self._dom_manager.reset()
4045

4146
def get_events(self, domain, clear=False):
4247
""" Retrieves all events for a given domain
@@ -125,15 +130,6 @@ def get_document_readystate(self):
125130
"""
126131
return self.execute_javascript("document.readyState")
127132

128-
def get_page_source(self):
129-
# type: () -> str
130-
"""
131-
Consider enabling the Page domain to increase performance.
132-
133-
:returns: A string serialization of the active document's DOM.
134-
"""
135-
return self._socket_handler.event_handlers["PageLoad"].get_page_source()
136-
137133
def set_user_agent_override(self, user_agent):
138134
""" Overriding user agent with the given string.
139135
:param user_agent:
@@ -185,9 +181,143 @@ def get_opened_javascript_dialog(self):
185181
"""
186182
Gets the opened javascript dialog.
187183
188-
:raises DomainNotFoundError: If the Page domain isn't enabled
184+
:raises DomainNotEnabledError: If the Page domain isn't enabled
189185
:raises JavascriptDialogNotFoundError: If there is currently no dialog open
190186
"""
191187
return (
192188
self._socket_handler.event_handlers["JavascriptDialog"].get_opened_javascript_dialog()
193189
)
190+
191+
def get_iframe_source_content(self, xpath):
192+
# type: (str) -> str
193+
"""
194+
Returns the HTML markup for an iframe document, where the iframe node can be located in the
195+
DOM with the given xpath.
196+
197+
:param xpath: following the spec 3.1 https://www.w3.org/TR/xpath-31/
198+
:return: HTML markup
199+
:raises XPathSyntaxError: The given xpath is invalid
200+
:raises IFrameNotFoundError: A matching iframe document could not be found
201+
:raises UnknownError: The socket handler received a message with an unknown error code
202+
"""
203+
204+
try:
205+
XPath(xpath) # Validates the xpath
206+
207+
except XPathSyntaxError:
208+
raise InvalidXPathError("{0} is not a valid xpath".format(xpath))
209+
210+
return self._dom_manager.get_iframe_html(xpath)
211+
212+
def get_page_source(self):
213+
# type: () -> str
214+
"""
215+
Returns the HTML markup of the current page. Iframe tags are included but the enclosed
216+
documents are not. Consider enabling the Page domain to increase performance.
217+
218+
:return: HTML markup
219+
"""
220+
221+
root_node_id = self._socket_handler.event_handlers["PageLoad"].get_root_backend_node_id()
222+
return self._dom_manager.get_outer_html(root_node_id)
223+
224+
225+
class _DOMManager(object):
226+
227+
def __init__(self, socket_handler):
228+
self._socket_handler = socket_handler
229+
self._node_map = {}
230+
231+
def get_outer_html(self, backend_node_id):
232+
# type: (int) -> str
233+
return self._socket_handler.execute(
234+
"DOM", "getOuterHTML", {"backendNodeId": backend_node_id}
235+
)["outerHTML"]
236+
237+
def get_iframe_html(self, xpath):
238+
# type: (str) -> str
239+
240+
backend_node_id = self._get_iframe_backend_node_id(xpath)
241+
try:
242+
return self.get_outer_html(backend_node_id)
243+
except ResourceNotFoundError:
244+
# The cached node doesn't exist any more, so we need to find a new one that matches
245+
# the xpath. Backend node IDs are unique, so there is not a risk of getting the
246+
# outer html of the wrong node.
247+
if xpath in self._node_map:
248+
del self._node_map[xpath]
249+
backend_node_id = self._get_iframe_backend_node_id(xpath)
250+
return self.get_outer_html(backend_node_id)
251+
252+
def _get_iframe_backend_node_id(self, xpath):
253+
# type: (str) -> int
254+
255+
if xpath in self._node_map:
256+
return self._node_map[xpath]
257+
258+
node_info = self._get_info_for_first_matching_node(xpath)
259+
try:
260+
261+
backend_node_id = node_info["node"]["contentDocument"]["backendNodeId"]
262+
except KeyError:
263+
raise ResourceNotFoundError("The node found by xpath '%s' is not an iframe" % xpath)
264+
265+
self._node_map[xpath] = backend_node_id
266+
return backend_node_id
267+
268+
def _get_info_for_first_matching_node(self, xpath):
269+
# type: (str) -> dict
270+
271+
with self._get_node_ids(xpath) as node_ids:
272+
if node_ids:
273+
return self._describe_node(node_ids[0])
274+
raise ResourceNotFoundError("No matching nodes for xpath: %s" % xpath)
275+
276+
@contextlib.contextmanager
277+
def _get_node_ids(self, xpath, max_matches=1):
278+
# type: (str, int) -> list
279+
280+
assert max_matches > 0
281+
search_info = self._perform_search(xpath)
282+
try:
283+
results = []
284+
if search_info["resultCount"] > 0:
285+
results = self._get_search_results(
286+
search_info["searchId"], 0, min([max_matches, search_info["resultCount"]])
287+
)["nodeIds"]
288+
yield results
289+
290+
finally:
291+
self._discard_search(search_info["searchId"])
292+
293+
def _perform_search(self, xpath):
294+
# type: (str) -> dict
295+
296+
# DOM.getDocument must have been called on the current page first otherwise performSearch
297+
# returns an array of 0s.
298+
self._socket_handler.event_handlers["PageLoad"].check_page_load()
299+
return self._socket_handler.execute("DOM", "performSearch", {"query": xpath})
300+
301+
def _get_search_results(self, search_id, from_index, to_index):
302+
# type: (str, int, int) -> dict
303+
304+
return self._socket_handler.execute("DOM", "getSearchResults", {
305+
"searchId": search_id, "fromIndex": from_index, "toIndex": to_index
306+
})
307+
308+
def _discard_search(self, search_id):
309+
# type: (str) -> None
310+
"""
311+
Discards search results for the session with the given id. get_search_results should no
312+
longer be called for that search.
313+
"""
314+
315+
self._socket_handler.execute("DOM", "discardSearchResults", {"searchId": search_id})
316+
317+
def _describe_node(self, node_id):
318+
# type: (str) -> dict
319+
320+
return self._socket_handler.execute("DOM", "describeNode", {"nodeId": node_id})
321+
322+
def reset(self):
323+
self._node_map = {}

browserdebuggertools/eventhandlers.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,9 @@ def get_current_url(self):
6464
self.check_page_load()
6565
return self._url
6666

67-
def get_page_source(self):
67+
def get_root_backend_node_id(self):
6868
self.check_page_load()
69-
return self._socket_handler.execute(
70-
"DOM", "getOuterHTML", {"backendNodeId": self._root_node_id}
71-
)["outerHTML"]
69+
return self._root_node_id
7270

7371

7472
class JavascriptDialogEventHandler(EventHandler):

browserdebuggertools/exceptions.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,29 @@ class DomainNotEnabledError(DevToolsException):
2222
pass
2323

2424

25-
class DomainNotFoundError(ProtocolError, NotFoundError):
25+
class MethodNotFoundError(ProtocolError, NotFoundError):
2626
pass
2727

2828

2929
class ResultNotFoundError(NotFoundError):
3030
pass
3131

3232

33+
class ResourceNotFoundError(NotFoundError):
34+
pass
35+
36+
3337
class JavascriptDialogNotFoundError(NotFoundError):
3438
pass
3539

3640

3741
class MaxRetriesException(DevToolsException):
3842
pass
43+
44+
45+
class InvalidXPathError(DevToolsException):
46+
pass
47+
48+
49+
class UnknownError(ProtocolError):
50+
pass

browserdebuggertools/sockethandler.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
)
1414
from browserdebuggertools.exceptions import (
1515
DevToolsException, ResultNotFoundError, TabNotFoundError, MaxRetriesException,
16-
DomainNotEnabledError, DevToolsTimeoutException, DomainNotFoundError,
16+
DevToolsTimeoutException, DomainNotEnabledError,
17+
MethodNotFoundError, UnknownError, ResourceNotFoundError
1718
)
1819

1920

@@ -190,7 +191,16 @@ def _execute(self, domain_name, method_name, params=None):
190191

191192
def execute(self, domain_name, method_name, params=None):
192193
self._execute(domain_name, method_name, params)
193-
return self._wait_for_result()
194+
result = self._wait_for_result()
195+
if "error" in result:
196+
code = result["error"]["code"]
197+
message = result["error"]["message"]
198+
if code == -32000:
199+
raise ResourceNotFoundError(message)
200+
if code == -32601:
201+
raise MethodNotFoundError(message)
202+
raise UnknownError("DevTools Protocol error code %s: %s" % (code, message))
203+
return result
194204

195205
def execute_async(self, domain_name, method_name, params=None):
196206
self._execute(domain_name, method_name, params)
@@ -231,6 +241,7 @@ def reset(self):
231241
self._events[domain] = []
232242

233243
self._results = {}
244+
self._next_result_id = 0
234245

235246
def _wait_for_result(self):
236247
""" Waits for a result to complete within the timeout duration then returns it.
@@ -253,11 +264,8 @@ def enable_domain(self, domain_name, parameters=None):
253264
if not parameters:
254265
parameters = {}
255266

267+
self.execute(domain_name, "enable", parameters)
256268
self._add_domain(domain_name, parameters)
257-
result = self.execute(domain_name, "enable", parameters)
258-
if "error" in result:
259-
self._remove_domain(domain_name)
260-
raise DomainNotFoundError("Domain \"{}\" not found.".format(domain_name))
261269

262270
logging.info("\"{}\" domain has been enabled".format(domain_name))
263271

dev_requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
requests
22
mock
3+
typing
4+
jinja2
5+
lxml
36
websocket-client==0.56
47
cherrypy==17.4.2

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"requests",
66
"websocket-client",
77
"typing",
8+
"lxml"
89
]
910

1011

0 commit comments

Comments
 (0)