Skip to content

Commit b3f324a

Browse files
evan-onyxaronszanto
authored andcommitted
specific user emails for drive connector (onyx-dot-app#4608)
* specific user emails for drive connector * fix drive connector tests * fix connector tests
1 parent 7508621 commit b3f324a

File tree

6 files changed

+110
-24
lines changed

6 files changed

+110
-24
lines changed

backend/onyx/connectors/google_drive/connector.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ def __init__(
143143
shared_drive_urls: str | None = None,
144144
my_drive_emails: str | None = None,
145145
shared_folder_urls: str | None = None,
146+
specific_user_emails: str | None = None,
146147
batch_size: int = INDEX_BATCH_SIZE,
147148
# OLD PARAMETERS
148149
folder_paths: list[str] | None = None,
@@ -208,6 +209,9 @@ def __init__(
208209

209210
shared_folder_url_list = _extract_str_list_from_comma_str(shared_folder_urls)
210211
self._requested_folder_ids = set(_extract_ids_from_urls(shared_folder_url_list))
212+
self._specific_user_emails = _extract_str_list_from_comma_str(
213+
specific_user_emails
214+
)
211215

212216
self._primary_admin_email: str | None = None
213217

@@ -269,6 +273,9 @@ def _update_traversed_parent_ids(self, folder_id: str) -> None:
269273
self._retrieved_ids.add(folder_id)
270274

271275
def _get_all_user_emails(self) -> list[str]:
276+
if self._specific_user_emails:
277+
return self._specific_user_emails
278+
272279
# Start with primary admin email
273280
user_emails = [self.primary_admin_email]
274281

backend/tests/daily/connectors/google_drive/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ def _connector_factory(
110110
my_drive_emails: str | None,
111111
shared_folder_urls: str | None,
112112
include_files_shared_with_me: bool,
113+
specific_user_emails: str | None = None,
113114
) -> GoogleDriveConnector:
114115
print("Creating GoogleDriveConnector with service account credentials")
115116
connector = GoogleDriveConnector(
@@ -119,6 +120,7 @@ def _connector_factory(
119120
my_drive_emails=my_drive_emails,
120121
shared_folder_urls=shared_folder_urls,
121122
include_files_shared_with_me=include_files_shared_with_me,
123+
specific_user_emails=specific_user_emails,
122124
)
123125

124126
json_string = os.environ[

backend/tests/daily/connectors/google_drive/consts_and_utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@
7474

7575
SHARED_DRIVE_3_URL = "https://drive.google.com/drive/folders/0AJYm2K_I_vtNUk9PVA"
7676

77+
RESTRICTED_ACCESS_FOLDER_URL = (
78+
"https://drive.google.com/drive/folders/1HK4wZ16ucz8QGywlcS87Y629W7i7KdeN"
79+
)
80+
7781
ADMIN_EMAIL = "admin@onyx-test.com"
7882
TEST_USER_1_EMAIL = "test_user_1@onyx-test.com"
7983
TEST_USER_2_EMAIL = "test_user_2@onyx-test.com"

backend/tests/daily/connectors/google_drive/test_service_acct.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,16 @@
3232
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_URL
3333
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_URL
3434
from tests.daily.connectors.google_drive.consts_and_utils import load_all_docs
35+
from tests.daily.connectors.google_drive.consts_and_utils import (
36+
RESTRICTED_ACCESS_FOLDER_URL,
37+
)
3538
from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FILE_IDS
3639
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
3740
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_URL
3841
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_2_FILE_IDS
3942
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_EMAIL
4043
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_FILE_IDS
44+
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_2_EMAIL
4145
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_2_FILE_IDS
4246
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_EMAIL
4347
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_FILE_IDS
@@ -111,7 +115,16 @@ def test_include_shared_drives_only_with_size_threshold(
111115
retrieved_docs = load_all_docs(connector)
112116

113117
# 2 extra files from shared drive owned by non-admin and not shared with admin
114-
assert len(retrieved_docs) == 52
118+
# TODO: added a file in a "restricted" folder, which the connector sometimes succeeds at finding
119+
# and adding. Specifically, our shared drive retrieval logic currently assumes that
120+
# "having access to a shared drive" means that the connector has access to all files in the shared drive.
121+
# therefore when a user successfully retrieves a shared drive, we mark it as "done". If that user's
122+
# access is restricted for a folder in the shared drive, the connector will not retrieve that folder.
123+
# If instead someone with FULL access to the shared drive retrieves it, the connector will retrieve
124+
# the folder and all its files. There is currently no consistency to the order of assignment of users
125+
# to shared drives, so this is a heisenbug. When we guarantee that restricted folders are retrieved,
126+
# we can change this to 53
127+
assert len(retrieved_docs) == 52 or len(retrieved_docs) == 53
115128

116129

117130
@patch(
@@ -149,7 +162,8 @@ def test_include_shared_drives_only(
149162
)
150163

151164
# 2 extra files from shared drive owned by non-admin and not shared with admin
152-
assert len(retrieved_docs) == 53
165+
# TODO: switch to 54 when restricted access issue is resolved
166+
assert len(retrieved_docs) == 53 or len(retrieved_docs) == 54
153167

154168
assert_expected_docs_in_retrieved_docs(
155169
retrieved_docs=retrieved_docs,
@@ -423,3 +437,43 @@ def get_specific_folders_in_my_drive(
423437
retrieved_docs=retrieved_docs,
424438
expected_file_ids=expected_file_ids,
425439
)
440+
441+
442+
@patch(
443+
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
444+
return_value=None,
445+
)
446+
def test_specific_user_emails_restricted_folder(
447+
mock_get_api_key: MagicMock,
448+
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
449+
) -> None:
450+
print("\n\nRunning test_specific_user_emails_restricted_folder")
451+
452+
# Test with admin email - should get 1 doc
453+
admin_connector = google_drive_service_acct_connector_factory(
454+
primary_admin_email=ADMIN_EMAIL,
455+
include_shared_drives=False,
456+
include_my_drives=False,
457+
include_files_shared_with_me=False,
458+
shared_folder_urls=RESTRICTED_ACCESS_FOLDER_URL,
459+
shared_drive_urls=None,
460+
my_drive_emails=None,
461+
specific_user_emails=ADMIN_EMAIL,
462+
)
463+
admin_docs = load_all_docs(admin_connector)
464+
assert len(admin_docs) == 1
465+
466+
# Test with test users - should get 0 docs
467+
test_users = [TEST_USER_1_EMAIL, TEST_USER_2_EMAIL, TEST_USER_3_EMAIL]
468+
test_connector = google_drive_service_acct_connector_factory(
469+
primary_admin_email=ADMIN_EMAIL,
470+
include_shared_drives=False,
471+
include_my_drives=False,
472+
include_files_shared_with_me=False,
473+
shared_folder_urls=RESTRICTED_ACCESS_FOLDER_URL,
474+
shared_drive_urls=None,
475+
my_drive_emails=None,
476+
specific_user_emails=",".join(test_users),
477+
)
478+
test_docs = load_all_docs(test_connector)
479+
assert len(test_docs) == 0

web/src/app/admin/connectors/[connector]/pages/DynamicConnectorCreationForm.tsx

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -72,27 +72,29 @@ const DynamicConnectionForm: FC<DynamicConnectionFormProps> = ({
7272
<AccessTypeForm connector={connector} />
7373
<AccessTypeGroupSelector connector={connector} />
7474

75-
{config.advanced_values.length > 0 && (
76-
<>
77-
<AdvancedOptionsToggle
78-
showAdvancedOptions={showAdvancedOptions}
79-
setShowAdvancedOptions={setShowAdvancedOptions}
80-
/>
81-
{showAdvancedOptions &&
82-
config.advanced_values.map(
83-
(field) =>
84-
!field.hidden && (
85-
<RenderField
86-
key={field.name}
87-
field={field}
88-
values={values}
89-
connector={connector}
90-
currentCredential={currentCredential}
91-
/>
92-
)
93-
)}
94-
</>
95-
)}
75+
{config.advanced_values.length > 0 &&
76+
(!config.advancedValuesVisibleCondition ||
77+
config.advancedValuesVisibleCondition(values, currentCredential)) && (
78+
<>
79+
<AdvancedOptionsToggle
80+
showAdvancedOptions={showAdvancedOptions}
81+
setShowAdvancedOptions={setShowAdvancedOptions}
82+
/>
83+
{showAdvancedOptions &&
84+
config.advanced_values.map(
85+
(field) =>
86+
!field.hidden && (
87+
<RenderField
88+
key={field.name}
89+
field={field}
90+
values={values}
91+
connector={connector}
92+
currentCredential={currentCredential}
93+
/>
94+
)
95+
)}
96+
</>
97+
)}
9698
</>
9799
);
98100
};

web/src/lib/connectors/connectors.tsx

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ export interface ConnectionConfiguration {
126126
| TabOption
127127
)[];
128128
overrideDefaultFreq?: number;
129+
advancedValuesVisibleCondition?: (
130+
values: any,
131+
currentCredential: Credential<any> | null
132+
) => boolean;
129133
}
130134

131135
export const connectorConfigs: Record<
@@ -379,7 +383,20 @@ export const connectorConfigs: Record<
379383
defaultTab: "space",
380384
},
381385
],
382-
advanced_values: [],
386+
advanced_values: [
387+
{
388+
type: "text",
389+
description:
390+
"Enter a comma separated list of specific user emails to index. This will only index files accessible to these users.",
391+
label: "Specific User Emails",
392+
name: "specific_user_emails",
393+
optional: true,
394+
default: "",
395+
isTextArea: true,
396+
},
397+
],
398+
advancedValuesVisibleCondition: (values, currentCredential) =>
399+
!currentCredential?.credential_json?.google_tokens,
383400
},
384401
gmail: {
385402
description: "Configure Gmail connector",

0 commit comments

Comments
 (0)