Skip to content

Commit cbff29a

Browse files
author
Nils Kleinrahm
committed
feat: make sharepoint documents and sharepoint pages optional
1 parent df2fef3 commit cbff29a

File tree

3 files changed

+47
-19
lines changed

3 files changed

+47
-19
lines changed

backend/onyx/connectors/sharepoint/connector.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ def __init__(
294294
batch_size: int = INDEX_BATCH_SIZE,
295295
sites: list[str] = [],
296296
include_site_pages: bool = True,
297+
include_site_documents: bool = True,
297298
) -> None:
298299
self.batch_size = batch_size
299300
self._graph_client: GraphClient | None = None
@@ -302,6 +303,7 @@ def __init__(
302303
)
303304
self.msal_app: msal.ConfidentialClientApplication | None = None
304305
self.include_site_pages = include_site_pages
306+
self.include_site_documents = include_site_documents
305307

306308
@property
307309
def graph_client(self) -> GraphClient:
@@ -558,18 +560,21 @@ def _fetch_from_sharepoint(
558560
doc_batch: list[Document] = []
559561
for site_descriptor in site_descriptors:
560562
# Fetch regular documents from document libraries
561-
driveitems = self._fetch_driveitems(site_descriptor, start=start, end=end)
562-
for driveitem, drive_name in driveitems:
563-
logger.debug(f"Processing: {driveitem.web_url}")
564-
565-
# Convert driveitem to document with size checking
566-
doc = _convert_driveitem_to_document(driveitem, drive_name)
567-
if doc is not None:
568-
doc_batch.append(doc)
569-
570-
if len(doc_batch) >= self.batch_size:
571-
yield doc_batch
572-
doc_batch = []
563+
if self.include_site_documents:
564+
driveitems = self._fetch_driveitems(
565+
site_descriptor, start=start, end=end
566+
)
567+
for driveitem, drive_name in driveitems:
568+
logger.debug(f"Processing: {driveitem.web_url}")
569+
570+
# Convert driveitem to document with size checking
571+
doc = _convert_driveitem_to_document(driveitem, drive_name)
572+
if doc is not None:
573+
doc_batch.append(doc)
574+
575+
if len(doc_batch) >= self.batch_size:
576+
yield doc_batch
577+
doc_batch = []
573578

574579
# Fetch SharePoint site pages (.aspx files)
575580
# Only fetch site pages if a folder is not specified since this processing

backend/tests/daily/connectors/sharepoint/test_sharepoint_connector.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,9 @@ def test_sharepoint_connector_all_sites__docs_only(
9090
sharepoint_credentials: dict[str, str],
9191
) -> None:
9292
# Initialize connector with no sites
93-
connector = SharepointConnector(include_site_pages=False)
93+
connector = SharepointConnector(
94+
include_site_pages=False, include_site_documents=True
95+
)
9496

9597
# Load credentials
9698
connector.load_credentials(sharepoint_credentials)
@@ -141,7 +143,9 @@ def test_sharepoint_connector_root_folder__docs_only(
141143
) -> None:
142144
# Initialize connector with the base site URL
143145
connector = SharepointConnector(
144-
sites=[os.environ["SHAREPOINT_SITE"]], include_site_pages=False
146+
sites=[os.environ["SHAREPOINT_SITE"]],
147+
include_site_pages=False,
148+
include_site_documents=True,
145149
)
146150

147151
# Load credentials

web/src/lib/connectors/connectors.tsx

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -666,14 +666,31 @@ See our docs for more details.`,
666666
name: "sites",
667667
optional: true,
668668
description: `• If no sites are specified, all sites in your organization will be indexed (Sites.Read.All permission required).
669-
670-
• Specifying 'https://onyxai.sharepoint.com/sites/support' for example will only index documents within this site.
671-
672-
• Specifying 'https://onyxai.sharepoint.com/sites/support/subfolder' for example will only index documents within this folder.
669+
• Specifying 'https://onyxai.sharepoint.com/sites/support' for example only indexes this site.
670+
• Specifying 'https://onyxai.sharepoint.com/sites/support/subfolder' for example only indexes this folder.
673671
`,
674672
},
675673
],
676-
advanced_values: [],
674+
advanced_values: [
675+
{
676+
type: "checkbox",
677+
query: "Index Documents:",
678+
label: "Index Documents",
679+
name: "include_site_documents",
680+
optional: true,
681+
default: true,
682+
description: "Index documents from SharePoint document libraries",
683+
},
684+
{
685+
type: "checkbox",
686+
query: "Index ASPX Sites:",
687+
label: "Index ASPX Sites",
688+
name: "include_site_pages",
689+
optional: true,
690+
default: true,
691+
description: "Index SharePoint site pages (.aspx files)",
692+
},
693+
],
677694
},
678695
teams: {
679696
description: "Configure Teams connector",
@@ -1579,6 +1596,8 @@ export interface SalesforceConfig {
15791596

15801597
export interface SharepointConfig {
15811598
sites?: string[];
1599+
include_site_pages?: boolean;
1600+
include_site_documents?: boolean;
15821601
}
15831602

15841603
export interface TeamsConfig {

0 commit comments

Comments
 (0)