Skip to content

Commit 9bc62cc

Browse files
authored
feat: sharepoint perm sync (#5033)
* sharepoint perm sync first draft * feat: Implement SharePoint permission synchronization * mypy fix * remove commented code * bot comments fixes and job failure fixes * introduce generic way to upload certificates in credentials * mypy fix * add checkpoiting to sharepoint connector * add sharepoint integration tests * Refactor SharePoint connector to derive tenant domain from verified domains and remove direct tenant domain input from credentials * address review comments * add permission sync to site pages * mypy fix * fix tests error * fix tests and address comments * Update file extraction behavior in SharePoint connector to continue processing on unprocessable files
1 parent bf6705a commit 9bc62cc

File tree

27 files changed

+2927
-263
lines changed

27 files changed

+2927
-263
lines changed

backend/ee/onyx/configs/app_configs.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,19 @@
102102
os.environ.get("TEAMS_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
103103
)
104104

105+
#####
106+
# SharePoint
107+
#####
108+
# In seconds, default is 30 minutes
109+
SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY = int(
110+
os.environ.get("SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY") or 30 * 60
111+
)
112+
113+
# In seconds, default is 5 minutes
114+
SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY = int(
115+
os.environ.get("SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY") or 5 * 60
116+
)
117+
105118

106119
####
107120
# Celery Job Frequency
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from collections.abc import Generator
2+
3+
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
4+
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
5+
from ee.onyx.external_permissions.utils import generic_doc_sync
6+
from onyx.access.models import DocExternalAccess
7+
from onyx.configs.constants import DocumentSource
8+
from onyx.connectors.sharepoint.connector import SharepointConnector
9+
from onyx.db.models import ConnectorCredentialPair
10+
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
11+
from onyx.utils.logger import setup_logger
12+
13+
logger = setup_logger()
14+
15+
SHAREPOINT_DOC_SYNC_TAG = "sharepoint_doc_sync"
16+
17+
18+
def sharepoint_doc_sync(
19+
cc_pair: ConnectorCredentialPair,
20+
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
21+
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
22+
callback: IndexingHeartbeatInterface | None = None,
23+
) -> Generator[DocExternalAccess, None, None]:
24+
sharepoint_connector = SharepointConnector(
25+
**cc_pair.connector.connector_specific_config,
26+
)
27+
sharepoint_connector.load_credentials(cc_pair.credential.credential_json)
28+
29+
yield from generic_doc_sync(
30+
cc_pair=cc_pair,
31+
fetch_all_existing_docs_ids_fn=fetch_all_existing_docs_ids_fn,
32+
callback=callback,
33+
doc_source=DocumentSource.SHAREPOINT,
34+
slim_connector=sharepoint_connector,
35+
label=SHAREPOINT_DOC_SYNC_TAG,
36+
)
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
from collections.abc import Generator
2+
3+
from office365.sharepoint.client_context import ClientContext # type: ignore[import-untyped]
4+
5+
from ee.onyx.db.external_perm import ExternalUserGroup
6+
from ee.onyx.external_permissions.sharepoint.permission_utils import (
7+
get_sharepoint_external_groups,
8+
)
9+
from onyx.connectors.sharepoint.connector import acquire_token_for_rest
10+
from onyx.connectors.sharepoint.connector import SharepointConnector
11+
from onyx.db.models import ConnectorCredentialPair
12+
from onyx.utils.logger import setup_logger
13+
14+
logger = setup_logger()
15+
16+
17+
def sharepoint_group_sync(
18+
tenant_id: str,
19+
cc_pair: ConnectorCredentialPair,
20+
) -> Generator[ExternalUserGroup, None, None]:
21+
"""Sync SharePoint groups and their members"""
22+
23+
# Get site URLs from connector config
24+
connector_config = cc_pair.connector.connector_specific_config
25+
26+
# Create SharePoint connector instance and load credentials
27+
connector = SharepointConnector(**connector_config)
28+
connector.load_credentials(cc_pair.credential.credential_json)
29+
30+
if not connector.msal_app:
31+
raise RuntimeError("MSAL app not initialized in connector")
32+
33+
if not connector.sp_tenant_domain:
34+
raise RuntimeError("Tenant domain not initialized in connector")
35+
36+
# Get site descriptors from connector (either configured sites or all sites)
37+
site_descriptors = connector.site_descriptors or connector.fetch_sites()
38+
39+
if not site_descriptors:
40+
raise RuntimeError("No SharePoint sites found for group sync")
41+
42+
logger.info(f"Processing {len(site_descriptors)} sites for group sync")
43+
44+
msal_app = connector.msal_app
45+
sp_tenant_domain = connector.sp_tenant_domain
46+
# Process each site
47+
for site_descriptor in site_descriptors:
48+
logger.debug(f"Processing site: {site_descriptor.url}")
49+
50+
# Create client context for the site using connector's MSAL app
51+
ctx = ClientContext(site_descriptor.url).with_access_token(
52+
lambda: acquire_token_for_rest(msal_app, sp_tenant_domain)
53+
)
54+
55+
# Get external groups for this site
56+
external_groups = get_sharepoint_external_groups(ctx, connector.graph_client)
57+
58+
# Yield each group
59+
for group in external_groups:
60+
logger.debug(
61+
f"Found group: {group.id} with {len(group.user_emails)} members"
62+
)
63+
yield group

0 commit comments

Comments
 (0)