Skip to content

Commit 712a86a

Browse files
committed
type fixes
1 parent bb34971 commit 712a86a

File tree

7 files changed

+14
-136
lines changed

7 files changed

+14
-136
lines changed

backend/onyx/background/celery/tasks/connector_deletion/tasks.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,6 @@
2828
from onyx.db.connector_credential_pair import (
2929
delete_connector_credential_pair__no_commit,
3030
)
31-
from onyx.db.connector_credential_pair import (
32-
delete_userfiles_for_cc_pair__no_commit,
33-
)
3431
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
3532
from onyx.db.connector_credential_pair import get_connector_credential_pairs
3633
from onyx.db.document import (
@@ -484,12 +481,6 @@ def monitor_connector_deletion_taskset(
484481
# related to the deleted DocumentByConnectorCredentialPair during commit
485482
db_session.expire(cc_pair)
486483

487-
# delete all userfiles for the cc_pair
488-
delete_userfiles_for_cc_pair__no_commit(
489-
db_session=db_session,
490-
cc_pair_id=cc_pair_id,
491-
)
492-
493484
# finally, delete the cc-pair
494485
delete_connector_credential_pair__no_commit(
495486
db_session=db_session,

backend/onyx/db/chat.py

Lines changed: 0 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
from onyx.auth.schemas import UserRole
3434
from onyx.chat.models import DocumentRelevance
3535
from onyx.configs.chat_configs import HARD_DELETE_CHATS
36-
from onyx.configs.constants import DocumentSource
3736
from onyx.configs.constants import MessageType
3837
from onyx.context.search.models import InferenceSection
3938
from onyx.context.search.models import RetrievalDocs
@@ -53,12 +52,10 @@
5352
from onyx.db.models import SearchDoc as DBSearchDoc
5453
from onyx.db.models import ToolCall
5554
from onyx.db.models import User
56-
from onyx.db.models import UserFile
5755
from onyx.db.persona import get_best_persona_id_for_user
5856
from onyx.db.tools import get_tool_by_id
5957
from onyx.file_store.file_store import get_default_file_store
6058
from onyx.file_store.models import FileDescriptor
61-
from onyx.file_store.models import InMemoryChatFile
6259
from onyx.llm.override_models import LLMOverride
6360
from onyx.llm.override_models import PromptOverride
6461
from onyx.server.query_and_chat.models import ChatMessageDetail
@@ -1193,90 +1190,6 @@ def get_db_search_doc_by_document_id(
11931190
return search_doc
11941191

11951192

1196-
def create_search_doc_from_user_file(
1197-
db_user_file: UserFile, associated_chat_file: InMemoryChatFile, db_session: Session
1198-
) -> SearchDoc:
1199-
"""Create a SearchDoc in the database from a UserFile and return it.
1200-
This ensures proper ID generation by SQLAlchemy and prevents duplicate key errors.
1201-
"""
1202-
blurb = ""
1203-
if associated_chat_file and associated_chat_file.content:
1204-
try:
1205-
# Try to decode as UTF-8, but handle errors gracefully
1206-
content_sample = associated_chat_file.content[:100]
1207-
# Remove null bytes which can cause SQL errors
1208-
content_sample = content_sample.replace(b"\x00", b"")
1209-
1210-
# NOTE(rkuo): this used to be "replace" instead of strict, but
1211-
# that would bypass the binary handling below
1212-
blurb = content_sample.decode("utf-8", errors="strict")
1213-
except Exception:
1214-
# If decoding fails completely, provide a generic description
1215-
blurb = f"[Binary file: {db_user_file.name}]"
1216-
1217-
db_search_doc = SearchDoc(
1218-
document_id=db_user_file.document_id,
1219-
chunk_ind=0, # Default to 0 for user files
1220-
semantic_id=db_user_file.name,
1221-
link=db_user_file.link_url,
1222-
blurb=blurb,
1223-
source_type=DocumentSource.FILE, # Assuming internal source for user files
1224-
boost=0, # Default boost
1225-
hidden=False, # Default visibility
1226-
doc_metadata={}, # Empty metadata
1227-
score=0.0, # Default score of 0.0 instead of None
1228-
is_relevant=None, # No relevance initially
1229-
relevance_explanation=None, # No explanation initially
1230-
match_highlights=[], # No highlights initially
1231-
updated_at=db_user_file.created_at, # Use created_at as updated_at
1232-
primary_owners=[], # Empty list instead of None
1233-
secondary_owners=[], # Empty list instead of None
1234-
is_internet=False, # Not from internet
1235-
)
1236-
1237-
db_session.add(db_search_doc)
1238-
db_session.flush() # Get the ID but don't commit yet
1239-
1240-
return db_search_doc
1241-
1242-
1243-
def translate_db_user_file_to_search_doc(
1244-
db_user_file: UserFile, associated_chat_file: InMemoryChatFile
1245-
) -> SearchDoc:
1246-
blurb = ""
1247-
if associated_chat_file and associated_chat_file.content:
1248-
try:
1249-
# Try to decode as UTF-8, but handle errors gracefully
1250-
content_sample = associated_chat_file.content[:100]
1251-
# Remove null bytes which can cause SQL errors
1252-
content_sample = content_sample.replace(b"\x00", b"")
1253-
blurb = content_sample.decode("utf-8", errors="replace")
1254-
except Exception:
1255-
# If decoding fails completely, provide a generic description
1256-
blurb = f"[Binary file: {db_user_file.name}]"
1257-
1258-
return SearchDoc(
1259-
# Don't set ID - let SQLAlchemy auto-generate it
1260-
document_id=db_user_file.document_id,
1261-
chunk_ind=0, # Default to 0 for user files
1262-
semantic_id=db_user_file.name,
1263-
link=db_user_file.link_url,
1264-
blurb=blurb,
1265-
source_type=DocumentSource.FILE, # Assuming internal source for user files
1266-
boost=0, # Default boost
1267-
hidden=False, # Default visibility
1268-
doc_metadata={}, # Empty metadata
1269-
score=0.0, # Default score of 0.0 instead of None
1270-
is_relevant=None, # No relevance initially
1271-
relevance_explanation=None, # No explanation initially
1272-
match_highlights=[], # No highlights initially
1273-
updated_at=db_user_file.created_at, # Use created_at as updated_at
1274-
primary_owners=[], # Empty list instead of None
1275-
secondary_owners=[], # Empty list instead of None
1276-
is_internet=False, # Not from internet
1277-
)
1278-
1279-
12801193
def translate_db_search_doc_to_server_search_doc(
12811194
db_search_doc: SearchDoc,
12821195
remove_doc_content: bool = False,

backend/onyx/db/connector_credential_pair.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
from onyx.db.models import SearchSettings
3535
from onyx.db.models import User
3636
from onyx.db.models import User__UserGroup
37-
from onyx.db.models import UserFile
3837
from onyx.db.models import UserGroup__ConnectorCredentialPair
3938
from onyx.db.models import UserRole
4039
from onyx.server.models import StatusResponse
@@ -805,31 +804,3 @@ def find_latest_index_attempt(
805804
)
806805

807806
db_session.commit()
808-
809-
810-
def get_connector_credential_pairs_with_user_files(
811-
db_session: Session,
812-
) -> list[ConnectorCredentialPair]:
813-
"""
814-
Get all connector credential pairs that have associated user files.
815-
816-
Args:
817-
db_session: Database session
818-
819-
Returns:
820-
List of ConnectorCredentialPair objects that have user files
821-
"""
822-
return (
823-
db_session.query(ConnectorCredentialPair)
824-
.join(UserFile, UserFile.cc_pair_id == ConnectorCredentialPair.id)
825-
.distinct()
826-
.all()
827-
)
828-
829-
830-
def delete_userfiles_for_cc_pair__no_commit(
831-
db_session: Session,
832-
cc_pair_id: int,
833-
) -> None:
834-
stmt = delete(UserFile).where(UserFile.cc_pair_id == cc_pair_id)
835-
db_session.execute(stmt)

backend/onyx/db/user_file.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77

88
def fetch_chunk_counts_for_user_files(
9-
user_file_ids: list[int],
9+
user_file_ids: list[str],
1010
db_session: Session,
1111
) -> list[tuple[str, int]]:
1212
"""
@@ -56,6 +56,6 @@ def fetch_user_project_ids_for_user_files(
5656
stmt = select(UserFile).where(UserFile.id.in_(user_file_ids))
5757
results = db_session.execute(stmt).scalars().all()
5858
return {
59-
user_file.id: [project.id for project in user_file.projects]
59+
str(user_file.id): [project.id for project in user_file.projects]
6060
for user_file in results
6161
}

backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,11 @@ def _build_time_filter(
170170
# Document sets
171171
filter_str += _build_or_filters(DOCUMENT_SETS, filters.document_set)
172172

173-
# New: user_file_ids as integer filters
174-
filter_str += _build_or_filters(DOCUMENT_ID, filters.user_file_ids)
173+
# Convert UUIDs to strings for user_file_ids
174+
user_file_ids_str = (
175+
[str(uuid) for uuid in filters.user_file_ids] if filters.user_file_ids else None
176+
)
177+
filter_str += _build_or_filters(DOCUMENT_ID, user_file_ids_str)
175178

176179
# Time filter
177180
filter_str += _build_time_filter(filters.time_cutoff)

backend/onyx/file_store/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@
2222
logger = setup_logger()
2323

2424

25-
def user_file_id_to_plaintext_file_name(user_file_id: int) -> str:
25+
def user_file_id_to_plaintext_file_name(user_file_id: UUID) -> str:
2626
"""Generate a consistent file name for storing plaintext content of a user file."""
2727
return f"plaintext_{user_file_id}"
2828

2929

30-
def store_user_file_plaintext(user_file_id: int, plaintext_content: str) -> bool:
30+
def store_user_file_plaintext(user_file_id: UUID, plaintext_content: str) -> bool:
3131
"""
3232
Store plaintext content for a user file in the file store.
3333

backend/onyx/indexing/adapters/user_file_indexing_adapter.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def build_metadata_aware_chunks(
145145
llm_tokenizer = None
146146

147147
user_file_id_to_raw_text: dict[str, str] = {}
148-
user_file_id_to_token_count: dict[int, int | None] = {}
148+
user_file_id_to_token_count: dict[str, int | None] = {}
149149
for user_file_id in updatable_ids:
150150
user_file_chunks = [
151151
chunk
@@ -156,14 +156,14 @@ def build_metadata_aware_chunks(
156156
combined_content = " ".join(
157157
[chunk.content for chunk in user_file_chunks]
158158
)
159-
user_file_id_to_raw_text[user_file_id] = combined_content
159+
user_file_id_to_raw_text[str(user_file_id)] = combined_content
160160
token_count = (
161161
len(llm_tokenizer.encode(combined_content)) if llm_tokenizer else 0
162162
)
163-
user_file_id_to_token_count[user_file_id] = token_count
163+
user_file_id_to_token_count[str(user_file_id)] = token_count
164164
else:
165-
user_file_id_to_raw_text[user_file_id] = None
166-
user_file_id_to_token_count[user_file_id] = None
165+
user_file_id_to_raw_text[str(user_file_id)] = ""
166+
user_file_id_to_token_count[str(user_file_id)] = None
167167

168168
access_aware_chunks = [
169169
DocMetadataAwareIndexChunk.from_index_chunk(

0 commit comments

Comments
 (0)