Skip to content

Commit c5c907f

Browse files
committed
ai comments fixes
1 parent ede673e commit c5c907f

File tree

15 files changed

+106
-85
lines changed

15 files changed

+106
-85
lines changed

backend/onyx/access/access.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from collections.abc import Callable
22
from typing import cast
33

4+
from sqlalchemy.orm import joinedload
45
from sqlalchemy.orm import Session
56

67
from onyx.access.models import DocumentAccess
@@ -131,7 +132,12 @@ def get_access_for_user_files(
131132
user_file_ids: list[str],
132133
db_session: Session,
133134
) -> dict[str, DocumentAccess]:
134-
user_files = db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).all()
135+
user_files = (
136+
db_session.query(UserFile)
137+
.options(joinedload(UserFile.user)) # Eager load the user relationship
138+
.filter(UserFile.id.in_(user_file_ids))
139+
.all()
140+
)
135141
return {
136142
str(user_file.id): DocumentAccess.build(
137143
user_emails=[user_file.user.email] if user_file.user else [],

backend/onyx/background/celery/tasks/user_file_processing/tasks.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
107107
for user_file_id in user_file_ids:
108108
self.app.send_task(
109109
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
110-
kwargs={"user_file_id": user_file_id, "tenant_id": tenant_id},
110+
kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
111111
queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
112112
priority=OnyxCeleryPriority.HIGH,
113113
)
@@ -247,7 +247,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
247247

248248
except Exception as e:
249249
task_logger.exception(
250-
f"process_single_user_file - Error id={user_file_id}: {e}"
250+
f"process_single_user_file - Error processing file id={user_file_id} - {e.__class__.__name__}"
251251
)
252252
uf.status = UserFileStatus.FAILED
253253
db_session.add(uf)
@@ -269,7 +269,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
269269
db_session.commit()
270270

271271
task_logger.exception(
272-
f"process_single_user_file - Error id={user_file_id}: {e}"
272+
f"process_single_user_file - Error processing file id={user_file_id} - {e.__class__.__name__}"
273273
)
274274
return None
275275
finally:
@@ -313,7 +313,7 @@ def check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:
313313
for user_file_id in user_file_ids:
314314
self.app.send_task(
315315
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
316-
kwargs={"user_file_id": user_file_id, "tenant_id": tenant_id},
316+
kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
317317
queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
318318
priority=OnyxCeleryPriority.HIGH,
319319
)
@@ -392,7 +392,7 @@ def process_single_user_file_project_sync(
392392

393393
except Exception as e:
394394
task_logger.exception(
395-
f"process_single_user_file_project_sync - Error id={user_file_id}: {e}"
395+
f"process_single_user_file_project_sync - Error syncing project for file id={user_file_id} - {e.__class__.__name__}"
396396
)
397397
return None
398398
finally:
@@ -529,7 +529,7 @@ def user_file_docid_migration_task(self: Task, *, tenant_id: str) -> bool:
529529
user_project_ids = [project.id for project in uf.projects]
530530
except Exception as e:
531531
task_logger.warning(
532-
f"Tenant={tenant_id} failed fetching projects for doc_id={new_uuid}: {e}"
532+
f"Tenant={tenant_id} failed fetching projects for doc_id={new_uuid} - {e.__class__.__name__}"
533533
)
534534
try:
535535
_update_document_id_in_vespa(
@@ -540,7 +540,7 @@ def user_file_docid_migration_task(self: Task, *, tenant_id: str) -> bool:
540540
)
541541
except Exception as e:
542542
task_logger.warning(
543-
f"Tenant={tenant_id} failed Vespa update for {old_doc_id} -> {new_uuid}: {e}"
543+
f"Tenant={tenant_id} failed Vespa update for doc_id={new_uuid} - {e.__class__.__name__}"
544544
)
545545

546546
# Update search_doc records to refer to the UUID string
@@ -627,7 +627,8 @@ def user_file_docid_migration_task(self: Task, *, tenant_id: str) -> bool:
627627
normalized += 1
628628
except Exception as e:
629629
task_logger.warning(
630-
f"Tenant={tenant_id} failed plaintext object normalize for id={fr.file_id}: {e}"
630+
f"Tenant={tenant_id} failed plaintext object normalize for "
631+
f"id={fr.file_id} - {e.__class__.__name__}"
631632
)
632633

633634
if normalized:
@@ -641,7 +642,7 @@ def user_file_docid_migration_task(self: Task, *, tenant_id: str) -> bool:
641642
)
642643
except Exception:
643644
task_logger.exception(
644-
f"user_file_docid_migration_task encountered an error during plaintext normalization for tenant={tenant_id}"
645+
f"user_file_docid_migration_task - Error during plaintext normalization for tenant={tenant_id}"
645646
)
646647

647648
task_logger.info(
@@ -650,6 +651,6 @@ def user_file_docid_migration_task(self: Task, *, tenant_id: str) -> bool:
650651
return True
651652
except Exception:
652653
task_logger.exception(
653-
f"user_file_docid_migration_task encountered an error for tenant={tenant_id}"
654+
f"user_file_docid_migration_task - Error during execution for tenant={tenant_id}"
654655
)
655656
return False

backend/onyx/configs/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ class FileOrigin(str, Enum):
304304
PLAINTEXT_CACHE = "plaintext_cache"
305305
OTHER = "other"
306306
QUERY_HISTORY_CSV = "query_history_csv"
307+
USER_FILE = "user_file"
307308

308309

309310
class FileType(str, Enum):

backend/onyx/db/chat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def get_chat_sessions_by_user(
170170
db_session: Session,
171171
include_onyxbot_flows: bool = False,
172172
limit: int = 50,
173-
project_id: UUID | None = None,
173+
project_id: int | None = None,
174174
only_non_project_chats: bool = False,
175175
) -> list[ChatSession]:
176176
stmt = select(ChatSession).where(ChatSession.user_id == user_id)

backend/onyx/db/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3271,7 +3271,7 @@ class UserDocument(str, Enum):
32713271
class UserFile(Base):
32723272
__tablename__ = "user_file"
32733273

3274-
id: Mapped[UUID] = mapped_column(PGUUID, primary_key=True)
3274+
id: Mapped[UUID] = mapped_column(PGUUID(as_uuid=True), primary_key=True)
32753275
user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=False)
32763276
assistants: Mapped[list["Persona"]] = relationship(
32773277
"Persona",

backend/onyx/db/projects.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
from typing import List
44
from uuid import UUID
55

6+
from fastapi import HTTPException
67
from fastapi import UploadFile
78
from pydantic import BaseModel
89
from pydantic import ConfigDict
910
from sqlalchemy.orm import Session
1011

1112
from onyx.background.celery.versioned_apps.client import app as client_app
13+
from onyx.configs.constants import FileOrigin
1214
from onyx.configs.constants import OnyxCeleryPriority
1315
from onyx.configs.constants import OnyxCeleryQueues
1416
from onyx.configs.constants import OnyxCeleryTask
@@ -44,7 +46,7 @@ def create_user_files(
4446
categorized_files = categorize_uploaded_files(files)
4547
# NOTE: At the moment, zip metadata is not used for user files.
4648
# Should revisit to decide whether this should be a feature.
47-
upload_response = upload_files(categorized_files.acceptable)
49+
upload_response = upload_files(categorized_files.acceptable, FileOrigin.USER_FILE)
4850
user_files = []
4951
non_accepted_files = categorized_files.non_accepted
5052
unsupported_files = categorized_files.unsupported
@@ -92,6 +94,11 @@ def upload_files_to_user_files_with_indexing(
9294
user: User | None,
9395
db_session: Session,
9496
) -> CategorizedFilesResult:
97+
# Validate project ownership if a project_id is provided
98+
if project_id is not None and user is not None:
99+
if not check_project_ownership(project_id, user.id, db_session):
100+
raise HTTPException(status_code=404, detail="Project not found")
101+
95102
categorized_files_result = create_user_files(files, project_id, user, db_session)
96103
user_files = categorized_files_result.user_files
97104
non_accepted_files = categorized_files_result.non_accepted_files

backend/onyx/server/features/projects/api.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,11 @@ def upload_user_files(
8181
return CategorizedFilesSnapshot.from_result(categorized_files_result)
8282

8383
except Exception as e:
84-
logger.error(f"Error uploading files: {str(e)}")
85-
raise HTTPException(status_code=500, detail=f"Failed to upload files: {str(e)}")
84+
logger.error(f"Error uploading files - type: {type(e).__name__}")
85+
raise HTTPException(
86+
status_code=500,
87+
detail="Failed to upload files. Please try again or contact support if the issue persists.",
88+
)
8689

8790

8891
@router.get("/{project_id}")

backend/onyx/server/features/projects/projects_file_utils.py

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,16 @@
1919

2020
logger = setup_logger()
2121
FILE_TOKEN_COUNT_THRESHOLD = 50000
22+
UNKNOWN_FILENAME = "[unknown_file]" # More descriptive than empty string
23+
24+
25+
def get_safe_filename(upload: UploadFile) -> str:
26+
"""Get filename from upload, with fallback to UNKNOWN_FILENAME if None."""
27+
if not upload.filename:
28+
logger.warning("Received upload with no filename")
29+
return UNKNOWN_FILENAME
30+
return upload.filename
31+
2232

2333
# Guard against extremely large images
2434
Image.MAX_IMAGE_PIXELS = 12000 * 12000
@@ -118,23 +128,25 @@ def categorize_uploaded_files(files: list[UploadFile]) -> CategorizedFiles:
118128

119129
for upload in files:
120130
try:
121-
extension = get_file_ext(upload.filename or "")
131+
filename = get_safe_filename(upload)
132+
extension = get_file_ext(filename)
122133

123134
# If image, estimate tokens via dedicated method first
124135
if extension in ACCEPTED_IMAGE_FILE_EXTENSIONS:
125136
try:
126137
token_count = estimate_image_tokens_for_upload(upload)
127-
except (UnidentifiedImageError, OSError):
128-
results.unsupported.append(upload.filename or "")
138+
except (UnidentifiedImageError, OSError) as e:
139+
logger.warning(
140+
f"Failed to process image file '{filename}': {str(e)}"
141+
)
142+
results.unsupported.append(filename)
129143
continue
130144

131145
if token_count > FILE_TOKEN_COUNT_THRESHOLD:
132-
results.non_accepted.append(upload.filename or "")
146+
results.non_accepted.append(filename)
133147
else:
134148
results.acceptable.append(upload)
135-
results.acceptable_file_to_token_count[upload.filename or ""] = (
136-
token_count
137-
)
149+
results.acceptable_file_to_token_count[filename] = token_count
138150
continue
139151

140152
# Otherwise, handle as text/document: extract text and count tokens
@@ -144,35 +156,40 @@ def categorize_uploaded_files(files: list[UploadFile]) -> CategorizedFiles:
144156
):
145157
text_content = extract_file_text(
146158
file=upload.file,
147-
file_name=upload.filename or "",
159+
file_name=filename,
148160
break_on_unprocessable=False,
149161
extension=extension,
150162
)
151163
if not text_content:
152-
results.unsupported.append(upload.filename or "")
164+
logger.warning(f"No text content extracted from '{filename}'")
165+
results.unsupported.append(filename)
153166
continue
154167

155168
token_count = len(tokenizer.encode(text_content))
156169
if token_count > FILE_TOKEN_COUNT_THRESHOLD:
157-
results.non_accepted.append(upload.filename or "")
170+
results.non_accepted.append(filename)
158171
else:
159172
results.acceptable.append(upload)
160-
results.acceptable_file_to_token_count[upload.filename or ""] = (
161-
token_count
162-
)
173+
results.acceptable_file_to_token_count[filename] = token_count
174+
163175
# Reset file pointer for subsequent upload handling
164176
try:
165177
upload.file.seek(0)
166-
except Exception:
167-
pass
178+
except Exception as e:
179+
logger.warning(
180+
f"Failed to reset file pointer for '{filename}': {str(e)}"
181+
)
168182
continue
169183

170184
# If not recognized as supported types above, mark unsupported
171-
results.unsupported.append(upload.filename or "")
185+
logger.warning(
186+
f"Unsupported file extension '{extension}' for file '{filename}'"
187+
)
188+
results.unsupported.append(filename)
172189
except Exception as e:
173190
logger.warning(
174-
f"Failed to process uploaded file '{getattr(upload, 'filename', 'unknown')}': {e}"
191+
f"Failed to process uploaded file '{get_safe_filename(upload)}' (error_type={type(e).__name__}, error={str(e)})"
175192
)
176-
results.unsupported.append(upload.filename or "")
193+
results.unsupported.append(get_safe_filename(upload))
177194

178195
return results

backend/onyx/server/query_and_chat/chat_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@
9999
def get_user_chat_sessions(
100100
user: User | None = Depends(current_user),
101101
db_session: Session = Depends(get_session),
102-
project_id: UUID | None = None,
102+
project_id: int | None = None,
103103
only_non_project_chats: bool = True,
104104
) -> ChatSessionsResponse:
105105
user_id = user.id if user is not None else None

web/src/app/chat/components/input/ChatInputBar.tsx

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,8 +291,6 @@ export const ChatInputBar = React.memo(function ChatInputBar({
291291
);
292292
const totalTokens =
293293
(currentSessionFileTokenCount || 0) + currentFilesTokenTotal;
294-
console.log("totalTokens", totalTokens);
295-
console.log("availableContextTokens", availableContextTokens);
296294
// Hide processing state when files are within context limits
297295
return totalTokens < availableContextTokens;
298296
}

0 commit comments

Comments
 (0)