@@ -342,6 +342,47 @@ async def select_all(
342342 await self .sleep (0.5 )
343343 return items
344344
345+ async def xpath (self , xpath : str , timeout : float = 2.5 ) -> List [Element ]: # noqa
346+ """
347+ find elements by xpath string.
348+ if not immediately found, retries are attempted until :ref:`timeout` is reached (default 2.5 seconds).
349+ in case nothing is found, it returns an empty list. It will not raise.
350+ this timeout mechanism helps when relying on some element to appear before continuing your script.
351+
352+
353+ .. code-block:: python
354+
355+ # find all the inline scripts (script elements without src attribute)
356+ await tab.xpath('//script[not(@src)]')
357+
358+ # or here, more complex, but my personal favorite to case-insensitive text search
359+
360+ await tab.xpath('//text()[ contains( translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"),"test")]')
361+
362+
363+ :param xpath:
364+ :type xpath: str
365+ :param timeout: 2.5
366+ :type timeout: float
367+ :return:List[Element] or []
368+ :rtype:
369+ """
370+ items : List [Element ] = []
371+ try :
372+ await self .send (cdp .dom .enable (), True )
373+ items = await self .find_all (xpath , timeout = 0 )
374+ if not items :
375+ loop = asyncio .get_running_loop ()
376+ start_time = loop .time ()
377+ while not items :
378+ items = await self .find_all (xpath , timeout = 0 )
379+ await self .sleep (0.1 )
380+ if loop .time () - start_time > timeout :
381+ break
382+ finally :
383+ await self .disable_dom_agent ()
384+ return items
385+
345386 async def get (
346387 self , url = "about:blank" , new_tab : bool = False , new_window : bool = False
347388 ):
@@ -511,6 +552,8 @@ async def find_elements_by_text(
511552
512553 await self .send (cdp .dom .discard_search_results (search_id ))
513554
555+ if not node_ids :
556+ node_ids = []
514557 items = []
515558 for nid in node_ids :
516559 node = util .filter_recurse (doc , lambda n : n .node_id == nid )
@@ -589,67 +632,7 @@ async def find_element_by_text(
589632 :return:
590633 :rtype:
591634 """
592- doc = await self .send (cdp .dom .get_document (- 1 , True ))
593- text = text .strip ()
594- search_id , nresult = await self .send (cdp .dom .perform_search (text , True ))
595-
596- if nresult :
597- node_ids = await self .send (
598- cdp .dom .get_search_results (search_id , 0 , nresult )
599- )
600- else :
601- node_ids = []
602- await self .send (cdp .dom .discard_search_results (search_id ))
603-
604- if not node_ids :
605- node_ids = []
606- items = []
607- for nid in node_ids :
608- node = util .filter_recurse (doc , lambda n : n .node_id == nid )
609- if node is None :
610- continue
611-
612- try :
613- elem = element .create (node , self , doc )
614- except : # noqa
615- continue
616- if elem .node_type == 3 :
617- # if found element is a text node (which is plain text, and useless for our purpose),
618- # we return the parent element of the node (which is often a tag which can have text between their
619- # opening and closing tags (that is most tags, except for example "img" and "video", "br")
620-
621- if not elem .parent :
622- # check if parent actually has a parent and update it to be absolutely sure
623- await elem .update ()
624-
625- items .append (
626- elem .parent or elem
627- ) # when it really has no parent, use the text node itself
628- continue
629- else :
630- # just add the element itself
631- items .append (elem )
632-
633- # since we already fetched the entire doc, including shadow and frames
634- # let's also search through the iframes
635- iframes = util .filter_recurse_all (doc , lambda node : node .node_name == "IFRAME" )
636- if iframes :
637- iframes_elems = [
638- element .create (iframe , self , iframe .content_document )
639- for iframe in iframes
640- ]
641- for iframe_elem in iframes_elems :
642- iframe_text_nodes = util .filter_recurse_all (
643- iframe_elem ,
644- lambda node : node .node_type == 3 # noqa
645- and text .lower () in node .node_value .lower (),
646- )
647- if iframe_text_nodes :
648- iframe_text_elems = [
649- element .create (text_node , self , iframe_elem .tree )
650- for text_node in iframe_text_nodes
651- ]
652- items .extend (text_node .parent for text_node in iframe_text_elems )
635+ items = await self .find_elements_by_text (text )
653636 try :
654637 if not items :
655638 return None
@@ -666,7 +649,7 @@ async def find_element_by_text(
666649 if elem :
667650 return elem
668651 finally :
669- await self . disable_dom_agent ()
652+ pass
670653
671654 return None
672655
0 commit comments