22import logging
33from base64 import b64decode , b64encode
44
5+ from lxml .etree import XPath , XPathSyntaxError
6+
7+ from browserdebuggertools .exceptions import InvalidXPathError , ResourceNotFoundError
58from browserdebuggertools .sockethandler import SocketHandler
69
710
@@ -29,6 +32,7 @@ def __init__(self, port, timeout=30, domains=None):
2932 is a dictionary of the arguments passed with the domain upon enabling.
3033 """
3134 self ._socket_handler = SocketHandler (port , timeout , domains = domains ) # type: SocketHandler
35+ self ._dom_manager = _DOMManager (self ._socket_handler )
3236
3337 def quit (self ):
3438 self ._socket_handler .close ()
@@ -37,6 +41,7 @@ def reset(self):
3741 """ Clears all stored messages
3842 """
3943 self ._socket_handler .reset ()
44+ self ._dom_manager .reset ()
4045
4146 def get_events (self , domain , clear = False ):
4247 """ Retrieves all events for a given domain
@@ -125,15 +130,6 @@ def get_document_readystate(self):
125130 """
126131 return self .execute_javascript ("document.readyState" )
127132
128- def get_page_source (self ):
129- # type: () -> str
130- """
131- Consider enabling the Page domain to increase performance.
132-
133- :returns: A string serialization of the active document's DOM.
134- """
135- return self ._socket_handler .event_handlers ["PageLoad" ].get_page_source ()
136-
137133 def set_user_agent_override (self , user_agent ):
138134 """ Overriding user agent with the given string.
139135 :param user_agent:
@@ -185,9 +181,143 @@ def get_opened_javascript_dialog(self):
185181 """
186182 Gets the opened javascript dialog.
187183
188- :raises DomainNotFoundError : If the Page domain isn't enabled
184+ :raises DomainNotEnabledError : If the Page domain isn't enabled
189185 :raises JavascriptDialogNotFoundError: If there is currently no dialog open
190186 """
191187 return (
192188 self ._socket_handler .event_handlers ["JavascriptDialog" ].get_opened_javascript_dialog ()
193189 )
190+
191+ def get_iframe_source_content (self , xpath ):
192+ # type: (str) -> str
193+ """
194+ Returns the HTML markup for an iframe document, where the iframe node can be located in the
195+ DOM with the given xpath.
196+
197+ :param xpath: following the spec 3.1 https://www.w3.org/TR/xpath-31/
198+ :return: HTML markup
199+ :raises XPathSyntaxError: The given xpath is invalid
200+ :raises IFrameNotFoundError: A matching iframe document could not be found
201+ :raises UnknownError: The socket handler received a message with an unknown error code
202+ """
203+
204+ try :
205+ XPath (xpath ) # Validates the xpath
206+
207+ except XPathSyntaxError :
208+ raise InvalidXPathError ("{0} is not a valid xpath" .format (xpath ))
209+
210+ return self ._dom_manager .get_iframe_html (xpath )
211+
212+ def get_page_source (self ):
213+ # type: () -> str
214+ """
215+ Returns the HTML markup of the current page. Iframe tags are included but the enclosed
216+ documents are not. Consider enabling the Page domain to increase performance.
217+
218+ :return: HTML markup
219+ """
220+
221+ root_node_id = self ._socket_handler .event_handlers ["PageLoad" ].get_root_backend_node_id ()
222+ return self ._dom_manager .get_outer_html (root_node_id )
223+
224+
225+ class _DOMManager (object ):
226+
227+ def __init__ (self , socket_handler ):
228+ self ._socket_handler = socket_handler
229+ self ._node_map = {}
230+
231+ def get_outer_html (self , backend_node_id ):
232+ # type: (int) -> str
233+ return self ._socket_handler .execute (
234+ "DOM" , "getOuterHTML" , {"backendNodeId" : backend_node_id }
235+ )["outerHTML" ]
236+
237+ def get_iframe_html (self , xpath ):
238+ # type: (str) -> str
239+
240+ backend_node_id = self ._get_iframe_backend_node_id (xpath )
241+ try :
242+ return self .get_outer_html (backend_node_id )
243+ except ResourceNotFoundError :
244+ # The cached node doesn't exist any more, so we need to find a new one that matches
245+ # the xpath. Backend node IDs are unique, so there is not a risk of getting the
246+ # outer html of the wrong node.
247+ if xpath in self ._node_map :
248+ del self ._node_map [xpath ]
249+ backend_node_id = self ._get_iframe_backend_node_id (xpath )
250+ return self .get_outer_html (backend_node_id )
251+
252+ def _get_iframe_backend_node_id (self , xpath ):
253+ # type: (str) -> int
254+
255+ if xpath in self ._node_map :
256+ return self ._node_map [xpath ]
257+
258+ node_info = self ._get_info_for_first_matching_node (xpath )
259+ try :
260+
261+ backend_node_id = node_info ["node" ]["contentDocument" ]["backendNodeId" ]
262+ except KeyError :
263+ raise ResourceNotFoundError ("The node found by xpath '%s' is not an iframe" % xpath )
264+
265+ self ._node_map [xpath ] = backend_node_id
266+ return backend_node_id
267+
268+ def _get_info_for_first_matching_node (self , xpath ):
269+ # type: (str) -> dict
270+
271+ with self ._get_node_ids (xpath ) as node_ids :
272+ if node_ids :
273+ return self ._describe_node (node_ids [0 ])
274+ raise ResourceNotFoundError ("No matching nodes for xpath: %s" % xpath )
275+
276+ @contextlib .contextmanager
277+ def _get_node_ids (self , xpath , max_matches = 1 ):
278+ # type: (str, int) -> list
279+
280+ assert max_matches > 0
281+ search_info = self ._perform_search (xpath )
282+ try :
283+ results = []
284+ if search_info ["resultCount" ] > 0 :
285+ results = self ._get_search_results (
286+ search_info ["searchId" ], 0 , min ([max_matches , search_info ["resultCount" ]])
287+ )["nodeIds" ]
288+ yield results
289+
290+ finally :
291+ self ._discard_search (search_info ["searchId" ])
292+
293+ def _perform_search (self , xpath ):
294+ # type: (str) -> dict
295+
296+ # DOM.getDocument must have been called on the current page first otherwise performSearch
297+ # returns an array of 0s.
298+ self ._socket_handler .event_handlers ["PageLoad" ].check_page_load ()
299+ return self ._socket_handler .execute ("DOM" , "performSearch" , {"query" : xpath })
300+
301+ def _get_search_results (self , search_id , from_index , to_index ):
302+ # type: (str, int, int) -> dict
303+
304+ return self ._socket_handler .execute ("DOM" , "getSearchResults" , {
305+ "searchId" : search_id , "fromIndex" : from_index , "toIndex" : to_index
306+ })
307+
308+ def _discard_search (self , search_id ):
309+ # type: (str) -> None
310+ """
311+ Discards search results for the session with the given id. get_search_results should no
312+ longer be called for that search.
313+ """
314+
315+ self ._socket_handler .execute ("DOM" , "discardSearchResults" , {"searchId" : search_id })
316+
317+ def _describe_node (self , node_id ):
318+ # type: (str) -> dict
319+
320+ return self ._socket_handler .execute ("DOM" , "describeNode" , {"nodeId" : node_id })
321+
322+ def reset (self ):
323+ self ._node_map = {}
0 commit comments