Skip to content

Commit 8e25c3c

Browse files
author
Richard Kuo (Danswer)
committed
Merge branch 'main' of https://github.com/danswer-ai/danswer into bugfix/light_cpu
2 parents 9622400 + 1470b7e commit 8e25c3c

File tree

13 files changed

+322
-144
lines changed

13 files changed

+322
-144
lines changed

backend/ee/onyx/external_permissions/confluence/doc_sync.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
def _get_server_space_permissions(
2525
confluence_client: OnyxConfluence, space_key: str
2626
) -> ExternalAccess:
27-
space_permissions = confluence_client.get_space_permissions(space_key=space_key)
27+
space_permissions = confluence_client.get_all_space_permissions_server(
28+
space_key=space_key
29+
)
2830

2931
viewspace_permissions = []
3032
for permission_category in space_permissions:

backend/onyx/background/celery/celery_utils.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from onyx.connectors.interfaces import SlimConnector
1515
from onyx.connectors.models import Document
1616
from onyx.db.connector_credential_pair import get_connector_credential_pair
17+
from onyx.db.enums import ConnectorCredentialPairStatus
1718
from onyx.db.enums import TaskStatus
1819
from onyx.db.models import TaskQueueState
1920
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
@@ -41,14 +42,21 @@ def _get_deletion_status(
4142
return None
4243

4344
redis_connector = RedisConnector(tenant_id, cc_pair.id)
44-
if not redis_connector.delete.fenced:
45-
return None
46-
47-
return TaskQueueState(
48-
task_id="",
49-
task_name=redis_connector.delete.fence_key,
50-
status=TaskStatus.STARTED,
51-
)
45+
if redis_connector.delete.fenced:
46+
return TaskQueueState(
47+
task_id="",
48+
task_name=redis_connector.delete.fence_key,
49+
status=TaskStatus.STARTED,
50+
)
51+
52+
if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
53+
return TaskQueueState(
54+
task_id="",
55+
task_name=redis_connector.delete.fence_key,
56+
status=TaskStatus.PENDING,
57+
)
58+
59+
return None
5260

5361

5462
def get_deletion_attempt_snapshot(

backend/onyx/connectors/confluence/onyx_confluence.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,33 @@ def paginated_group_members_retrieval(
354354
group_name = quote(group_name)
355355
yield from self._paginate_url(f"rest/api/group/{group_name}/member", limit)
356356

357+
def get_all_space_permissions_server(
358+
self,
359+
space_key: str,
360+
) -> list[dict[str, Any]]:
361+
"""
362+
This is a confluence server specific method that can be used to
363+
fetch the permissions of a space.
364+
This is better logging than calling the get_space_permissions method
365+
because it returns a jsonrpc response.
366+
"""
367+
url = "rpc/json-rpc/confluenceservice-v2"
368+
data = {
369+
"jsonrpc": "2.0",
370+
"method": "getSpacePermissionSets",
371+
"id": 7,
372+
"params": [space_key],
373+
}
374+
response = self.post(url, data=data)
375+
logger.debug(f"jsonrpc response: {response}")
376+
if not response.get("result"):
377+
logger.warning(
378+
f"No jsonrpc response for space permissions for space {space_key}"
379+
f"\nResponse: {response}"
380+
)
381+
382+
return response.get("result", [])
383+
357384

358385
def _validate_connector_configuration(
359386
credentials: dict[str, Any],

backend/onyx/connectors/google_utils/google_kv.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
from onyx.configs.constants import KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY
1818
from onyx.connectors.google_utils.resources import get_drive_service
1919
from onyx.connectors.google_utils.resources import get_gmail_service
20+
from onyx.connectors.google_utils.shared_constants import (
21+
DB_CREDENTIALS_AUTHENTICATION_METHOD,
22+
)
2023
from onyx.connectors.google_utils.shared_constants import (
2124
DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
2225
)
@@ -29,6 +32,9 @@
2932
from onyx.connectors.google_utils.shared_constants import (
3033
GOOGLE_SCOPES,
3134
)
35+
from onyx.connectors.google_utils.shared_constants import (
36+
GoogleOAuthAuthenticationMethod,
37+
)
3238
from onyx.connectors.google_utils.shared_constants import (
3339
MISSING_SCOPES_ERROR_STR,
3440
)
@@ -96,6 +102,7 @@ def update_credential_access_tokens(
96102
user: User,
97103
db_session: Session,
98104
source: DocumentSource,
105+
auth_method: GoogleOAuthAuthenticationMethod,
99106
) -> OAuthCredentials | None:
100107
app_credentials = get_google_app_cred(source)
101108
flow = InstalledAppFlow.from_client_config(
@@ -119,6 +126,7 @@ def update_credential_access_tokens(
119126
new_creds_dict = {
120127
DB_CREDENTIALS_DICT_TOKEN_KEY: token_json_str,
121128
DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,
129+
DB_CREDENTIALS_AUTHENTICATION_METHOD: auth_method.value,
122130
}
123131

124132
if not update_credential_json(credential_id, new_creds_dict, user, db_session):
@@ -129,6 +137,7 @@ def update_credential_access_tokens(
129137
def build_service_account_creds(
130138
source: DocumentSource,
131139
primary_admin_email: str | None = None,
140+
name: str | None = None,
132141
) -> CredentialBase:
133142
service_account_key = get_service_account_key(source=source)
134143

@@ -138,10 +147,15 @@ def build_service_account_creds(
138147
if primary_admin_email:
139148
credential_dict[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] = primary_admin_email
140149

150+
credential_dict[
151+
DB_CREDENTIALS_AUTHENTICATION_METHOD
152+
] = GoogleOAuthAuthenticationMethod.UPLOADED.value
153+
141154
return CredentialBase(
142155
credential_json=credential_dict,
143156
admin_public=True,
144157
source=source,
158+
name=name,
145159
)
146160

147161

backend/onyx/server/documents/cc_pair.py

Lines changed: 29 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -164,17 +164,12 @@ def update_cc_pair_status(
164164
db_session: Session = Depends(get_session),
165165
tenant_id: str | None = Depends(get_current_tenant_id),
166166
) -> JSONResponse:
167-
"""This method may wait up to 30 seconds if pausing the connector due to the need to
168-
terminate tasks in progress. Tasks are not guaranteed to terminate within the
169-
timeout.
167+
"""This method returns nearly immediately. It simply sets some signals and
168+
optimistically assumes any running background processes will clean themselves up.
169+
This is done to improve the perceived end user experience.
170170
171171
Returns HTTPStatus.OK if everything finished.
172-
Returns HTTPStatus.ACCEPTED if the connector is being paused, but background tasks
173-
did not finish within the timeout.
174172
"""
175-
WAIT_TIMEOUT = 15.0
176-
still_terminating = False
177-
178173
cc_pair = get_connector_credential_pair_from_id(
179174
cc_pair_id=cc_pair_id,
180175
db_session=db_session,
@@ -188,73 +183,37 @@ def update_cc_pair_status(
188183
detail="Connection not found for current user's permissions",
189184
)
190185

186+
redis_connector = RedisConnector(tenant_id, cc_pair_id)
191187
if status_update_request.status == ConnectorCredentialPairStatus.PAUSED:
188+
redis_connector.stop.set_fence(True)
189+
192190
search_settings_list: list[SearchSettings] = get_active_search_settings(
193191
db_session
194192
)
195193

196-
redis_connector = RedisConnector(tenant_id, cc_pair_id)
197-
198-
try:
199-
redis_connector.stop.set_fence(True)
200-
while True:
201-
logger.debug(
202-
f"Wait for indexing soft termination starting: cc_pair={cc_pair_id}"
203-
)
204-
wait_succeeded = redis_connector.wait_for_indexing_termination(
205-
search_settings_list, WAIT_TIMEOUT
206-
)
207-
if wait_succeeded:
208-
logger.debug(
209-
f"Wait for indexing soft termination succeeded: cc_pair={cc_pair_id}"
210-
)
211-
break
212-
213-
logger.debug(
214-
"Wait for indexing soft termination timed out. "
215-
f"Moving to hard termination: cc_pair={cc_pair_id} timeout={WAIT_TIMEOUT:.2f}"
216-
)
217-
218-
for search_settings in search_settings_list:
219-
redis_connector_index = redis_connector.new_index(
220-
search_settings.id
221-
)
222-
if not redis_connector_index.fenced:
223-
continue
224-
225-
index_payload = redis_connector_index.payload
226-
if not index_payload:
227-
continue
228-
229-
if not index_payload.celery_task_id:
230-
continue
231-
232-
# Revoke the task to prevent it from running
233-
primary_app.control.revoke(index_payload.celery_task_id)
234-
235-
# If it is running, then signaling for termination will get the
236-
# watchdog thread to kill the spawned task
237-
redis_connector_index.set_terminate(index_payload.celery_task_id)
238-
239-
logger.debug(
240-
f"Wait for indexing hard termination starting: cc_pair={cc_pair_id}"
241-
)
242-
wait_succeeded = redis_connector.wait_for_indexing_termination(
243-
search_settings_list, WAIT_TIMEOUT
244-
)
245-
if wait_succeeded:
246-
logger.debug(
247-
f"Wait for indexing hard termination succeeded: cc_pair={cc_pair_id}"
248-
)
249-
break
250-
251-
logger.debug(
252-
f"Wait for indexing hard termination timed out: cc_pair={cc_pair_id}"
253-
)
254-
still_terminating = True
255-
break
256-
finally:
257-
redis_connector.stop.set_fence(False)
194+
while True:
195+
for search_settings in search_settings_list:
196+
redis_connector_index = redis_connector.new_index(search_settings.id)
197+
if not redis_connector_index.fenced:
198+
continue
199+
200+
index_payload = redis_connector_index.payload
201+
if not index_payload:
202+
continue
203+
204+
if not index_payload.celery_task_id:
205+
continue
206+
207+
# Revoke the task to prevent it from running
208+
primary_app.control.revoke(index_payload.celery_task_id)
209+
210+
# If it is running, then signaling for termination will get the
211+
# watchdog thread to kill the spawned task
212+
redis_connector_index.set_terminate(index_payload.celery_task_id)
213+
214+
break
215+
else:
216+
redis_connector.stop.set_fence(False)
258217

259218
update_connector_credential_pair_from_id(
260219
db_session=db_session,
@@ -264,14 +223,6 @@ def update_cc_pair_status(
264223

265224
db_session.commit()
266225

267-
if still_terminating:
268-
return JSONResponse(
269-
status_code=HTTPStatus.ACCEPTED,
270-
content={
271-
"message": "Request accepted, background task termination still in progress"
272-
},
273-
)
274-
275226
return JSONResponse(
276227
status_code=HTTPStatus.OK, content={"message": str(HTTPStatus.OK)}
277228
)

backend/onyx/server/documents/connector.py

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,9 @@
5353
upsert_service_account_key,
5454
)
5555
from onyx.connectors.google_utils.google_kv import verify_csrf
56+
from onyx.connectors.google_utils.shared_constants import DB_CREDENTIALS_DICT_TOKEN_KEY
5657
from onyx.connectors.google_utils.shared_constants import (
57-
DB_CREDENTIALS_DICT_TOKEN_KEY,
58+
GoogleOAuthAuthenticationMethod,
5859
)
5960
from onyx.db.connector import create_connector
6061
from onyx.db.connector import delete_connector
@@ -314,6 +315,7 @@ def upsert_service_account_credential(
314315
credential_base = build_service_account_creds(
315316
DocumentSource.GOOGLE_DRIVE,
316317
primary_admin_email=service_account_credential_request.google_primary_admin,
318+
name="Service Account (uploaded)",
317319
)
318320
except KvKeyNotFoundError as e:
319321
raise HTTPException(status_code=400, detail=str(e))
@@ -408,6 +410,38 @@ def upload_files(
408410
return FileUploadResponse(file_paths=deduped_file_paths)
409411

410412

413+
@router.get("/admin/connector")
414+
def get_connectors_by_credential(
415+
_: User = Depends(current_curator_or_admin_user),
416+
db_session: Session = Depends(get_session),
417+
credential: int | None = None,
418+
) -> list[ConnectorSnapshot]:
419+
"""Get a list of connectors. Allow filtering by a specific credential id."""
420+
421+
connectors = fetch_connectors(db_session)
422+
423+
filtered_connectors = []
424+
for connector in connectors:
425+
if connector.source == DocumentSource.INGESTION_API:
426+
# don't include INGESTION_API, as it's a system level
427+
# connector not manageable by the user
428+
continue
429+
430+
if credential is not None:
431+
found = False
432+
for cc_pair in connector.credentials:
433+
if credential == cc_pair.credential_id:
434+
found = True
435+
break
436+
437+
if not found:
438+
continue
439+
440+
filtered_connectors.append(ConnectorSnapshot.from_connector_db_model(connector))
441+
442+
return filtered_connectors
443+
444+
411445
# Retrieves most recent failure cases for connectors that are currently failing
412446
@router.get("/admin/connector/failed-indexing-status")
413447
def get_currently_failed_indexing_status(
@@ -987,7 +1021,12 @@ def gmail_callback(
9871021
credential_id = int(credential_id_cookie)
9881022
verify_csrf(credential_id, callback.state)
9891023
credentials: Credentials | None = update_credential_access_tokens(
990-
callback.code, credential_id, user, db_session, DocumentSource.GMAIL
1024+
callback.code,
1025+
credential_id,
1026+
user,
1027+
db_session,
1028+
DocumentSource.GMAIL,
1029+
GoogleOAuthAuthenticationMethod.UPLOADED,
9911030
)
9921031
if credentials is None:
9931032
raise HTTPException(
@@ -1013,7 +1052,12 @@ def google_drive_callback(
10131052
verify_csrf(credential_id, callback.state)
10141053

10151054
credentials: Credentials | None = update_credential_access_tokens(
1016-
callback.code, credential_id, user, db_session, DocumentSource.GOOGLE_DRIVE
1055+
callback.code,
1056+
credential_id,
1057+
user,
1058+
db_session,
1059+
DocumentSource.GOOGLE_DRIVE,
1060+
GoogleOAuthAuthenticationMethod.UPLOADED,
10171061
)
10181062
if credentials is None:
10191063
raise HTTPException(

backend/onyx/server/documents/credential.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from onyx.auth.users import current_user
1010
from onyx.db.credentials import alter_credential
1111
from onyx.db.credentials import cleanup_gmail_credentials
12-
from onyx.db.credentials import cleanup_google_drive_credentials
1312
from onyx.db.credentials import create_credential
1413
from onyx.db.credentials import CREDENTIAL_PERMISSIONS_TO_IGNORE
1514
from onyx.db.credentials import delete_credential
@@ -133,8 +132,6 @@ def create_credential_from_model(
133132
# Temporary fix for empty Google App credentials
134133
if credential_info.source == DocumentSource.GMAIL:
135134
cleanup_gmail_credentials(db_session=db_session)
136-
if credential_info.source == DocumentSource.GOOGLE_DRIVE:
137-
cleanup_google_drive_credentials(db_session=db_session)
138135

139136
credential = create_credential(credential_info, user, db_session)
140137
return ObjectCreationIdResponse(

0 commit comments

Comments
 (0)