@@ -506,6 +506,13 @@ def __init__(
506
506
self .include_site_documents = include_site_documents
507
507
self .sp_tenant_domain : str | None = None
508
508
509
+ # Validate that at least one content type is enabled
510
+ if not self .include_site_documents and not self .include_site_pages :
511
+ raise ConnectorValidationError (
512
+ "At least one content type must be enabled. "
513
+ "Please check either 'Include Site Documents' or 'Include Site Pages' (or both)."
514
+ )
515
+
509
516
@property
510
517
def graph_client (self ) -> GraphClient :
511
518
if self ._graph_client is None :
@@ -1089,13 +1096,13 @@ def _load_from_checkpoint(
1089
1096
return checkpoint
1090
1097
1091
1098
# Phase 2: Initialize cached_drive_names for current site if needed
1092
- if ( checkpoint .current_site_descriptor and checkpoint .cached_drive_names is None ) :
1099
+ if checkpoint .current_site_descriptor and checkpoint .cached_drive_names is None :
1093
1100
# If site documents flag is False, set empty drive list to skip document processing
1094
1101
if not self .include_site_documents :
1095
1102
logger .debug ("Documents disabled, skipping drive initialization" )
1096
1103
checkpoint .cached_drive_names = deque ()
1097
1104
return checkpoint
1098
-
1105
+
1099
1106
logger .info (
1100
1107
f"Initializing drives for site: { checkpoint .current_site_descriptor .url } "
1101
1108
)
@@ -1270,45 +1277,35 @@ def _load_from_checkpoint(
1270
1277
and checkpoint .current_site_descriptor is not None
1271
1278
):
1272
1279
# Fetch SharePoint site pages (.aspx files)
1273
- # Only fetch site pages if a folder or drive is not specified since this
1274
- # processing happens at a site-wide level + specifying a folder implies
1275
- # that the user probably isn't looking for site pages
1276
1280
site_descriptor = checkpoint .current_site_descriptor
1277
- specified_path = (
1278
- site_descriptor .folder_path is not None
1279
- or site_descriptor .drive_name is not None
1280
- )
1281
1281
start_dt = datetime .fromtimestamp (start , tz = timezone .utc )
1282
1282
end_dt = datetime .fromtimestamp (end , tz = timezone .utc )
1283
- if not specified_path :
1284
- site_pages = self ._fetch_site_pages (
1285
- site_descriptor , start = start_dt , end = end_dt
1286
- )
1287
- client_ctx : ClientContext | None = None
1288
- if include_permissions :
1289
- if self .msal_app and self .sp_tenant_domain :
1290
- msal_app = self .msal_app
1291
- sp_tenant_domain = self .sp_tenant_domain
1292
- client_ctx = ClientContext (
1293
- site_descriptor .url
1294
- ).with_access_token (
1295
- lambda : acquire_token_for_rest (msal_app , sp_tenant_domain )
1296
- )
1297
- else :
1298
- raise RuntimeError ("MSAL app or tenant domain is not set" )
1299
- for site_page in site_pages :
1300
- logger .debug (
1301
- f"Processing site page: { site_page .get ('webUrl' , site_page .get ('name' , 'Unknown' ))} "
1283
+ site_pages = self ._fetch_site_pages (
1284
+ site_descriptor , start = start_dt , end = end_dt
1285
+ )
1286
+ client_ctx : ClientContext | None = None
1287
+ if include_permissions :
1288
+ if self .msal_app and self .sp_tenant_domain :
1289
+ msal_app = self .msal_app
1290
+ sp_tenant_domain = self .sp_tenant_domain
1291
+ client_ctx = ClientContext (site_descriptor .url ).with_access_token (
1292
+ lambda : acquire_token_for_rest (msal_app , sp_tenant_domain )
1302
1293
)
1303
- yield (
1304
- _convert_sitepage_to_document (
1305
- site_page ,
1306
- site_descriptor .drive_name ,
1307
- client_ctx ,
1308
- self .graph_client ,
1309
- include_permissions = include_permissions ,
1310
- )
1294
+ else :
1295
+ raise RuntimeError ("MSAL app or tenant domain is not set" )
1296
+ for site_page in site_pages :
1297
+ logger .debug (
1298
+ f"Processing site page: { site_page .get ('webUrl' , site_page .get ('name' , 'Unknown' ))} "
1299
+ )
1300
+ yield (
1301
+ _convert_sitepage_to_document (
1302
+ site_page ,
1303
+ site_descriptor .drive_name ,
1304
+ client_ctx ,
1305
+ self .graph_client ,
1306
+ include_permissions = include_permissions ,
1311
1307
)
1308
+ )
1312
1309
logger .info (
1313
1310
f"Finished processing site pages for site: { site_descriptor .url } "
1314
1311
)
0 commit comments