From 1de6a759f3f7ba538ef6b721daf4c542d2245d08 Mon Sep 17 00:00:00 2001 From: EugeneJinXin Date: Wed, 23 Apr 2025 16:01:01 -0700 Subject: [PATCH] Adopt pagination to list_shared_examples to ensure all data returned --- python/langsmith/client.py | 27 +++++++------- python/tests/unit_tests/test_client.py | 51 ++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 5d4662a8c..e73bad13c 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -2851,31 +2851,30 @@ def read_shared_dataset( ) def list_shared_examples( - self, share_token: str, *, example_ids: Optional[list[ID_TYPE]] = None - ) -> list[ls_schemas.Example]: + self, share_token: str, *, example_ids: Optional[List[ID_TYPE]] = None, + limit: Optional[int] = None + ) -> List[ls_schemas.Example]: """Get shared examples. Args: share_token (Union[UUID, str]): The share token or URL of the shared dataset. example_ids (Optional[List[UUID, str]], optional): The IDs of the examples to filter by. Defaults to None. - + limit (Optional[int]): Maximum number of examples to return, by default None. Returns: List[ls_schemas.Example]: The list of shared examples. """ params = {} if example_ids is not None: params["id"] = [str(id) for id in example_ids] - response = self.request_with_retries( - "GET", - f"/public/{_as_uuid(share_token, 'share_token')}/examples", - headers=self._headers, - params=params, - ) - ls_utils.raise_for_status_with_text(response) - return [ - ls_schemas.Example(**dataset, _host_url=self._host_url) - for dataset in response.json() - ] + for i, example in enumerate( + self._get_paginated_list( + f"/public/{_as_uuid(share_token, 'share_token')}/examples", + params=params, + ) + ): + yield ls_schemas.Example(**example, _host_url=self._host_url) + if limit is not None and i + 1 >= limit: + break def list_shared_projects( self, diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index cd4956e8f..0e1e5cad8 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -2551,3 +2551,54 @@ def test__construct_url(): for prefix in ("", "/", "https://foobar.com/api/"): actual = _construct_url(api_url + suffix, prefix + pathname) assert actual == expected + + +@mock.patch("langsmith.client.requests.Session") +def test_list_shared_examples_pagination(mock_session_cls: mock.Mock) -> None: + """Test list_shared_examples handles pagination correctly.""" + mock_session = mock.Mock() + + def mock_request(*args, **kwargs): + response = mock.Mock() + response.status_code = 200 + + if "/info" in args[1]: + response.json.return_value = {} + return response + + # First request will return 100 examples, second request 50 examples + if kwargs.get('params', {}).get('offset', 0) == 0: + examples = [ + { + "id": str(uuid.uuid4()), + "created_at": _CREATED_AT.isoformat(), + "inputs": {"text": f"input_{i}"}, + "outputs": {"result": f"output_{i}"}, + "dataset_id": str(uuid.uuid4()) + } + for i in range(100) + ] + else: + examples = [ + { + "id": str(uuid.uuid4()), + "created_at": _CREATED_AT.isoformat(), + "inputs": {"text": f"input_{i}"}, + "outputs": {"result": f"output_{i}"}, + "dataset_id": str(uuid.uuid4()) + } + for i in range(100, 150) + ] + + response.json.return_value = examples + return response + + mock_session.request.side_effect = mock_request + mock_session_cls.return_value = mock_session + + client = Client(api_url="http://localhost:1984", api_key="fake-key", session=mock_session) + examples = list(client.list_shared_examples(str(uuid.uuid4()))) + + assert len(examples) == 150 # Should get all examples + assert examples[0].inputs["text"] == "input_0" + assert examples[149].inputs["text"] == "input_149"