Skip to content

Commit 419dc9c

Browse files
committed
tested mutlitenant
1 parent d525598 commit 419dc9c

File tree

2 files changed

+6
-29
lines changed

2 files changed

+6
-29
lines changed

backend/alembic/versions/12635f6655b7_drive_canonical_ids.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,11 @@ def active_search_settings() -> tuple[SearchSettings, SearchSettings | None]:
3737
)
3838
)
3939
search_settings_fetch = result.fetchall()
40-
print(search_settings_fetch)
4140
search_settings = (
4241
SearchSettings(**search_settings_fetch[0]._asdict())
4342
if search_settings_fetch
4443
else None
4544
)
46-
print(search_settings)
4745

4846
result2 = op.get_bind().execute(
4947
sa.text(
@@ -303,10 +301,6 @@ def delete_document_chunks_from_vespa(index_name: str, doc_id: str) -> None:
303301
if not hits:
304302
break # No more chunks to process
305303

306-
print(
307-
f"Deleting {len(hits)} chunks (offset {offset}) for duplicate document {doc_id}"
308-
)
309-
310304
# Delete each chunk in this batch
311305
for hit in hits:
312306
vespa_doc_id = hit.get("id") # This is the internal Vespa document ID
@@ -334,8 +328,6 @@ def delete_document_chunks_from_vespa(index_name: str, doc_id: str) -> None:
334328
if len(hits) < limit:
335329
break
336330

337-
print(f"Successfully deleted {total_deleted} chunks for document {doc_id}")
338-
339331

340332
def update_document_id_in_vespa(
341333
index_name: str, old_doc_id: str, new_doc_id: str
@@ -485,8 +477,6 @@ def delete_document_from_db(current_doc_id: str, index_name: str) -> None:
485477
# Delete chunks from vespa
486478
delete_document_chunks_from_vespa(index_name, current_doc_id)
487479

488-
print(f"Successfully deleted duplicate document: {current_doc_id}")
489-
490480
except Exception as e:
491481
print(f"Failed to delete duplicate document {current_doc_id}: {e}")
492482
# Continue with other documents instead of failing the entire migration
@@ -522,28 +512,19 @@ def upgrade() -> None:
522512

523513
# Check for duplicates
524514
if normalized_doc_id in all_normalized_doc_ids:
525-
print(f"Found duplicate document with normalized ID: {normalized_doc_id}")
526-
print(f"Deleting duplicate document: {current_doc_id}")
527-
528515
delete_document_from_db(current_doc_id, index_name)
529516
continue
530517

531518
all_normalized_doc_ids.add(normalized_doc_id)
532519

533520
# If the document ID already doesn't have query parameters, skip it
534521
if current_doc_id == normalized_doc_id:
535-
print(
536-
f"Skipping document {current_doc_id} -> {normalized_doc_id} because it already has no query parameters"
537-
)
538522
continue
539523

540-
# print(f"Updating document ID: {current_doc_id} -> {normalized_doc_id}")
541-
542524
try:
543525
# Update both database and Vespa in order
544526
# Database first to ensure consistency
545527
update_document_id_in_database(current_doc_id, normalized_doc_id)
546-
print(f"Updated database for {current_doc_id} -> {normalized_doc_id}")
547528

548529
# For Vespa, we can now use the original document IDs since we're using contains matching
549530
update_document_id_in_vespa(index_name, current_doc_id, normalized_doc_id)

backend/onyx/connectors/google_drive/doc_conversion.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,17 +80,13 @@ class PermissionSyncContext(BaseModel):
8080

8181
def onyx_document_id_from_drive_file(file: GoogleDriveFileType) -> str:
8282
link = file[WEB_VIEW_LINK_KEY]
83-
# TODO: remove after mig testing
8483
parsed_url = urlparse(link)
85-
import random
86-
87-
num = random.randint(0, 1000000)
88-
parsed_url = parsed_url._replace(query=f"boop={num}")
89-
# spl_path = parsed_url.path.split("/")
90-
# if spl_path and (spl_path[-1] in ["edit", "view", "preview"]):
91-
# spl_path.pop()
92-
# parsed_url = parsed_url._replace(path="/".join(spl_path))
93-
# # Remove query parameters and reconstruct URL
84+
parsed_url = parsed_url._replace(query="") # remove query parameters
85+
spl_path = parsed_url.path.split("/")
86+
if spl_path and (spl_path[-1] in ["edit", "view", "preview"]):
87+
spl_path.pop()
88+
parsed_url = parsed_url._replace(path="/".join(spl_path))
89+
# Remove query parameters and reconstruct URL
9490
return urlunparse(parsed_url)
9591

9692

0 commit comments

Comments
 (0)