Skip to content

Commit 53e9165

Browse files
authored
tenant seeding docs (#2925)
* tenant seeding docs * k
1 parent 179dc41 commit 53e9165

File tree

7 files changed

+11
-8
lines changed

7 files changed

+11
-8
lines changed

backend/danswer/background/celery/apps/beat.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def on_setup_logging(
7878
},
7979
]
8080

81+
8182
# Build the celery beat schedule dynamically
8283
beat_schedule = {}
8384

backend/danswer/document_index/vespa/indexing_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ def _does_document_exist(
5757
chunk. This checks for whether the chunk exists already in the index"""
5858
doc_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}"
5959
doc_fetch_response = http_client.get(doc_url)
60-
6160
if doc_fetch_response.status_code == 404:
6261
return False
6362

backend/danswer/document_index/vespa_constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
# main search application
3030
VESPA_APP_CONTAINER_URL = VESPA_CLOUD_URL or f"http://{VESPA_HOST}:{VESPA_PORT}"
3131

32+
3233
# danswer_chunk below is defined in vespa/app_configs/schemas/danswer_chunk.sd
3334
DOCUMENT_ID_ENDPOINT = (
3435
f"{VESPA_APP_CONTAINER_URL}/document/v1/default/{{index_name}}/docid"

backend/danswer/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
184184

185185
# If we are multi-tenant, we need to only set up initial public tables
186186
with Session(engine) as db_session:
187-
setup_danswer(db_session)
187+
setup_danswer(db_session, None)
188188
else:
189189
setup_multitenant_danswer()
190190

backend/danswer/seeding/load_docs.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
def _create_indexable_chunks(
4141
preprocessed_docs: list[dict],
42+
tenant_id: str | None,
4243
) -> tuple[list[Document], list[DocMetadataAwareIndexChunk]]:
4344
ids_to_documents = {}
4445
chunks = []
@@ -80,7 +81,7 @@ def _create_indexable_chunks(
8081
mini_chunk_embeddings=[],
8182
),
8283
title_embedding=preprocessed_doc["title_embedding"],
83-
tenant_id=None,
84+
tenant_id=tenant_id,
8485
access=default_public_access,
8586
document_sets=set(),
8687
boost=DEFAULT_BOOST,
@@ -90,7 +91,7 @@ def _create_indexable_chunks(
9091
return list(ids_to_documents.values()), chunks
9192

9293

93-
def seed_initial_documents(db_session: Session) -> None:
94+
def seed_initial_documents(db_session: Session, tenant_id: str | None) -> None:
9495
"""
9596
Seed initial documents so users don't have an empty index to start
9697
@@ -177,7 +178,7 @@ def seed_initial_documents(db_session: Session) -> None:
177178
)
178179
processed_docs = json.load(open(initial_docs_path))
179180

180-
docs, chunks = _create_indexable_chunks(processed_docs)
181+
docs, chunks = _create_indexable_chunks(processed_docs, tenant_id)
181182

182183
index_doc_batch_prepare(
183184
document_batch=docs,
@@ -198,6 +199,7 @@ def seed_initial_documents(db_session: Session) -> None:
198199

199200
# Retries here because the index may take a few seconds to become ready
200201
# as we just sent over the Vespa schema and there is a slight delay
202+
201203
index_with_retries = retry_builder()(document_index.index)
202204
index_with_retries(chunks=chunks)
203205

backend/danswer/setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
logger = setup_logger()
6060

6161

62-
def setup_danswer(db_session: Session) -> None:
62+
def setup_danswer(db_session: Session, tenant_id: str | None) -> None:
6363
"""
6464
Setup Danswer for a particular tenant. In the Single Tenant case, it will set it up for the default schema
6565
on server startup. In the MT case, it will be called when the tenant is created.
@@ -148,7 +148,7 @@ def setup_danswer(db_session: Session) -> None:
148148
# update multipass indexing setting based on GPU availability
149149
update_default_multipass_indexing(db_session)
150150

151-
seed_initial_documents(db_session)
151+
seed_initial_documents(db_session, tenant_id)
152152

153153

154154
def translate_saved_search_settings(db_session: Session) -> None:

backend/ee/danswer/server/tenants/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def create_tenant(
5959
run_alembic_migrations(tenant_id)
6060

6161
with get_session_with_tenant(tenant_id) as db_session:
62-
setup_danswer(db_session)
62+
setup_danswer(db_session, tenant_id)
6363

6464
add_users_to_tenant([email], tenant_id)
6565

0 commit comments

Comments
 (0)