Skip to content

Commit 29f4d89

Browse files
author
Nils Kleinrahm
committed
fix: address review feedback for PR #5183
1 parent 84edbab commit 29f4d89

File tree

3 files changed

+41
-42
lines changed

3 files changed

+41
-42
lines changed

backend/onyx/connectors/sharepoint/connector.py

Lines changed: 33 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,13 @@ def __init__(
506506
self.include_site_documents = include_site_documents
507507
self.sp_tenant_domain: str | None = None
508508

509+
# Validate that at least one content type is enabled
510+
if not self.include_site_documents and not self.include_site_pages:
511+
raise ConnectorValidationError(
512+
"At least one content type must be enabled. "
513+
"Please check either 'Include Site Documents' or 'Include Site Pages' (or both)."
514+
)
515+
509516
@property
510517
def graph_client(self) -> GraphClient:
511518
if self._graph_client is None:
@@ -1089,13 +1096,13 @@ def _load_from_checkpoint(
10891096
return checkpoint
10901097

10911098
# Phase 2: Initialize cached_drive_names for current site if needed
1092-
if (checkpoint.current_site_descriptor and checkpoint.cached_drive_names is None):
1099+
if checkpoint.current_site_descriptor and checkpoint.cached_drive_names is None:
10931100
# If site documents flag is False, set empty drive list to skip document processing
10941101
if not self.include_site_documents:
10951102
logger.debug("Documents disabled, skipping drive initialization")
10961103
checkpoint.cached_drive_names = deque()
10971104
return checkpoint
1098-
1105+
10991106
logger.info(
11001107
f"Initializing drives for site: {checkpoint.current_site_descriptor.url}"
11011108
)
@@ -1270,45 +1277,35 @@ def _load_from_checkpoint(
12701277
and checkpoint.current_site_descriptor is not None
12711278
):
12721279
# Fetch SharePoint site pages (.aspx files)
1273-
# Only fetch site pages if a folder or drive is not specified since this
1274-
# processing happens at a site-wide level + specifying a folder implies
1275-
# that the user probably isn't looking for site pages
12761280
site_descriptor = checkpoint.current_site_descriptor
1277-
specified_path = (
1278-
site_descriptor.folder_path is not None
1279-
or site_descriptor.drive_name is not None
1280-
)
12811281
start_dt = datetime.fromtimestamp(start, tz=timezone.utc)
12821282
end_dt = datetime.fromtimestamp(end, tz=timezone.utc)
1283-
if not specified_path:
1284-
site_pages = self._fetch_site_pages(
1285-
site_descriptor, start=start_dt, end=end_dt
1286-
)
1287-
client_ctx: ClientContext | None = None
1288-
if include_permissions:
1289-
if self.msal_app and self.sp_tenant_domain:
1290-
msal_app = self.msal_app
1291-
sp_tenant_domain = self.sp_tenant_domain
1292-
client_ctx = ClientContext(
1293-
site_descriptor.url
1294-
).with_access_token(
1295-
lambda: acquire_token_for_rest(msal_app, sp_tenant_domain)
1296-
)
1297-
else:
1298-
raise RuntimeError("MSAL app or tenant domain is not set")
1299-
for site_page in site_pages:
1300-
logger.debug(
1301-
f"Processing site page: {site_page.get('webUrl', site_page.get('name', 'Unknown'))}"
1283+
site_pages = self._fetch_site_pages(
1284+
site_descriptor, start=start_dt, end=end_dt
1285+
)
1286+
client_ctx: ClientContext | None = None
1287+
if include_permissions:
1288+
if self.msal_app and self.sp_tenant_domain:
1289+
msal_app = self.msal_app
1290+
sp_tenant_domain = self.sp_tenant_domain
1291+
client_ctx = ClientContext(site_descriptor.url).with_access_token(
1292+
lambda: acquire_token_for_rest(msal_app, sp_tenant_domain)
13021293
)
1303-
yield (
1304-
_convert_sitepage_to_document(
1305-
site_page,
1306-
site_descriptor.drive_name,
1307-
client_ctx,
1308-
self.graph_client,
1309-
include_permissions=include_permissions,
1310-
)
1294+
else:
1295+
raise RuntimeError("MSAL app or tenant domain is not set")
1296+
for site_page in site_pages:
1297+
logger.debug(
1298+
f"Processing site page: {site_page.get('webUrl', site_page.get('name', 'Unknown'))}"
1299+
)
1300+
yield (
1301+
_convert_sitepage_to_document(
1302+
site_page,
1303+
site_descriptor.drive_name,
1304+
client_ctx,
1305+
self.graph_client,
1306+
include_permissions=include_permissions,
13111307
)
1308+
)
13121309
logger.info(
13131310
f"Finished processing site pages for site: {site_descriptor.url}"
13141311
)

backend/tests/daily/connectors/sharepoint/test_sharepoint_connector.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,8 @@ def test_sharepoint_connector_all_sites__docs_only(
111111
):
112112
# Initialize connector with no sites
113113
connector = SharepointConnector(
114-
include_site_pages=False, include_site_documents=True
115-
)
114+
include_site_pages=False, include_site_documents=True
115+
)
116116

117117
# Load credentials
118118
connector.load_credentials(sharepoint_credentials)
@@ -179,8 +179,8 @@ def test_sharepoint_connector_root_folder__docs_only(
179179
# Initialize connector with the base site URL
180180
connector = SharepointConnector(
181181
sites=[os.environ["SHAREPOINT_SITE"]],
182-
include_site_pages=False,
183-
include_site_documents=True,
182+
include_site_pages=False,
183+
include_site_documents=True,
184184
)
185185

186186
# Load credentials

web/src/lib/connectors/connectors.tsx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,8 @@ Example:
676676
name: "include_site_documents",
677677
optional: true,
678678
default: true,
679-
description: "Index documents from SharePoint document libraries",
679+
description:
680+
"Index documents of all SharePoint libraries or folders defined above.",
680681
},
681682
{
682683
type: "checkbox",
@@ -685,7 +686,8 @@ Example:
685686
name: "include_site_pages",
686687
optional: true,
687688
default: true,
688-
description: "Index SharePoint site pages (.aspx files)",
689+
description:
690+
"Index aspx-pages of all SharePoint sites defined above, even if a library or folder is specified.",
689691
},
690692
],
691693
},

0 commit comments

Comments
 (0)