File tree Expand file tree Collapse file tree 2 files changed +36
-2
lines changed
onyx/connectors/sharepoint
tests/daily/connectors/sharepoint Expand file tree Collapse file tree 2 files changed +36
-2
lines changed Original file line number Diff line number Diff line change 1
1
import io
2
2
import os
3
+ from collections .abc import Generator
3
4
from datetime import datetime
4
5
from datetime import timezone
5
6
from typing import Any
8
9
import msal # type: ignore
9
10
from office365 .graph_client import GraphClient # type: ignore
10
11
from office365 .onedrive .driveitems .driveItem import DriveItem # type: ignore
12
+ from office365 .onedrive .sites .site import Site # type: ignore
13
+ from office365 .onedrive .sites .sites_with_root import SitesWithRoot # type: ignore
11
14
from pydantic import BaseModel
12
15
13
16
from onyx .configs .app_configs import INDEX_BATCH_SIZE
@@ -227,14 +230,29 @@ def _fetch_driveitems(
227
230
228
231
return final_driveitems
229
232
233
+ def _handle_paginated_sites (
234
+ self , sites : SitesWithRoot
235
+ ) -> Generator [Site , None , None ]:
236
+ while sites :
237
+ if sites .current_page :
238
+ yield from sites .current_page
239
+ if not sites .has_next :
240
+ break
241
+ sites = sites ._get_next ().execute_query ()
242
+
230
243
def _fetch_sites (self ) -> list [SiteDescriptor ]:
231
- sites = self .graph_client .sites .get_all ().execute_query ()
244
+ sites = self .graph_client .sites .get_all_sites ().execute_query ()
245
+
246
+ if not sites :
247
+ raise RuntimeError ("No sites found in the tenant" )
248
+
232
249
site_descriptors = [
233
250
SiteDescriptor (
234
- url = sites . resource_url ,
251
+ url = site . web_url ,
235
252
drive_name = None ,
236
253
folder_path = None ,
237
254
)
255
+ for site in self ._handle_paginated_sites (sites )
238
256
]
239
257
return site_descriptors
240
258
Original file line number Diff line number Diff line change @@ -85,6 +85,22 @@ def sharepoint_credentials() -> dict[str, str]:
85
85
}
86
86
87
87
88
+ def test_sharepoint_connector_all_sites (
89
+ mock_get_unstructured_api_key : MagicMock ,
90
+ sharepoint_credentials : dict [str , str ],
91
+ ) -> None :
92
+ # Initialize connector with no sites
93
+ connector = SharepointConnector ()
94
+
95
+ # Load credentials
96
+ connector .load_credentials (sharepoint_credentials )
97
+
98
+ # Not asserting expected sites because that can change in test tenant at any time
99
+ # Finding any docs is good enough to verify that the connector is working
100
+ document_batches = list (connector .load_from_state ())
101
+ assert document_batches , "Should find documents from all sites"
102
+
103
+
88
104
def test_sharepoint_connector_specific_folder (
89
105
mock_get_unstructured_api_key : MagicMock ,
90
106
sharepoint_credentials : dict [str , str ],
You can’t perform that action at this time.
0 commit comments