|
33 | 33 | from onyx.auth.schemas import UserRole
|
34 | 34 | from onyx.chat.models import DocumentRelevance
|
35 | 35 | from onyx.configs.chat_configs import HARD_DELETE_CHATS
|
36 |
| -from onyx.configs.constants import DocumentSource |
37 | 36 | from onyx.configs.constants import MessageType
|
38 | 37 | from onyx.context.search.models import InferenceSection
|
39 | 38 | from onyx.context.search.models import RetrievalDocs
|
|
53 | 52 | from onyx.db.models import SearchDoc as DBSearchDoc
|
54 | 53 | from onyx.db.models import ToolCall
|
55 | 54 | from onyx.db.models import User
|
56 |
| -from onyx.db.models import UserFile |
57 | 55 | from onyx.db.persona import get_best_persona_id_for_user
|
58 | 56 | from onyx.db.tools import get_tool_by_id
|
59 | 57 | from onyx.file_store.file_store import get_default_file_store
|
60 | 58 | from onyx.file_store.models import FileDescriptor
|
61 |
| -from onyx.file_store.models import InMemoryChatFile |
62 | 59 | from onyx.llm.override_models import LLMOverride
|
63 | 60 | from onyx.llm.override_models import PromptOverride
|
64 | 61 | from onyx.server.query_and_chat.models import ChatMessageDetail
|
@@ -1193,90 +1190,6 @@ def get_db_search_doc_by_document_id(
|
1193 | 1190 | return search_doc
|
1194 | 1191 |
|
1195 | 1192 |
|
1196 |
| -def create_search_doc_from_user_file( |
1197 |
| - db_user_file: UserFile, associated_chat_file: InMemoryChatFile, db_session: Session |
1198 |
| -) -> SearchDoc: |
1199 |
| - """Create a SearchDoc in the database from a UserFile and return it. |
1200 |
| - This ensures proper ID generation by SQLAlchemy and prevents duplicate key errors. |
1201 |
| - """ |
1202 |
| - blurb = "" |
1203 |
| - if associated_chat_file and associated_chat_file.content: |
1204 |
| - try: |
1205 |
| - # Try to decode as UTF-8, but handle errors gracefully |
1206 |
| - content_sample = associated_chat_file.content[:100] |
1207 |
| - # Remove null bytes which can cause SQL errors |
1208 |
| - content_sample = content_sample.replace(b"\x00", b"") |
1209 |
| - |
1210 |
| - # NOTE(rkuo): this used to be "replace" instead of strict, but |
1211 |
| - # that would bypass the binary handling below |
1212 |
| - blurb = content_sample.decode("utf-8", errors="strict") |
1213 |
| - except Exception: |
1214 |
| - # If decoding fails completely, provide a generic description |
1215 |
| - blurb = f"[Binary file: {db_user_file.name}]" |
1216 |
| - |
1217 |
| - db_search_doc = SearchDoc( |
1218 |
| - document_id=db_user_file.document_id, |
1219 |
| - chunk_ind=0, # Default to 0 for user files |
1220 |
| - semantic_id=db_user_file.name, |
1221 |
| - link=db_user_file.link_url, |
1222 |
| - blurb=blurb, |
1223 |
| - source_type=DocumentSource.FILE, # Assuming internal source for user files |
1224 |
| - boost=0, # Default boost |
1225 |
| - hidden=False, # Default visibility |
1226 |
| - doc_metadata={}, # Empty metadata |
1227 |
| - score=0.0, # Default score of 0.0 instead of None |
1228 |
| - is_relevant=None, # No relevance initially |
1229 |
| - relevance_explanation=None, # No explanation initially |
1230 |
| - match_highlights=[], # No highlights initially |
1231 |
| - updated_at=db_user_file.created_at, # Use created_at as updated_at |
1232 |
| - primary_owners=[], # Empty list instead of None |
1233 |
| - secondary_owners=[], # Empty list instead of None |
1234 |
| - is_internet=False, # Not from internet |
1235 |
| - ) |
1236 |
| - |
1237 |
| - db_session.add(db_search_doc) |
1238 |
| - db_session.flush() # Get the ID but don't commit yet |
1239 |
| - |
1240 |
| - return db_search_doc |
1241 |
| - |
1242 |
| - |
1243 |
| -def translate_db_user_file_to_search_doc( |
1244 |
| - db_user_file: UserFile, associated_chat_file: InMemoryChatFile |
1245 |
| -) -> SearchDoc: |
1246 |
| - blurb = "" |
1247 |
| - if associated_chat_file and associated_chat_file.content: |
1248 |
| - try: |
1249 |
| - # Try to decode as UTF-8, but handle errors gracefully |
1250 |
| - content_sample = associated_chat_file.content[:100] |
1251 |
| - # Remove null bytes which can cause SQL errors |
1252 |
| - content_sample = content_sample.replace(b"\x00", b"") |
1253 |
| - blurb = content_sample.decode("utf-8", errors="replace") |
1254 |
| - except Exception: |
1255 |
| - # If decoding fails completely, provide a generic description |
1256 |
| - blurb = f"[Binary file: {db_user_file.name}]" |
1257 |
| - |
1258 |
| - return SearchDoc( |
1259 |
| - # Don't set ID - let SQLAlchemy auto-generate it |
1260 |
| - document_id=db_user_file.document_id, |
1261 |
| - chunk_ind=0, # Default to 0 for user files |
1262 |
| - semantic_id=db_user_file.name, |
1263 |
| - link=db_user_file.link_url, |
1264 |
| - blurb=blurb, |
1265 |
| - source_type=DocumentSource.FILE, # Assuming internal source for user files |
1266 |
| - boost=0, # Default boost |
1267 |
| - hidden=False, # Default visibility |
1268 |
| - doc_metadata={}, # Empty metadata |
1269 |
| - score=0.0, # Default score of 0.0 instead of None |
1270 |
| - is_relevant=None, # No relevance initially |
1271 |
| - relevance_explanation=None, # No explanation initially |
1272 |
| - match_highlights=[], # No highlights initially |
1273 |
| - updated_at=db_user_file.created_at, # Use created_at as updated_at |
1274 |
| - primary_owners=[], # Empty list instead of None |
1275 |
| - secondary_owners=[], # Empty list instead of None |
1276 |
| - is_internet=False, # Not from internet |
1277 |
| - ) |
1278 |
| - |
1279 |
| - |
1280 | 1193 | def translate_db_search_doc_to_server_search_doc(
|
1281 | 1194 | db_search_doc: SearchDoc,
|
1282 | 1195 | remove_doc_content: bool = False,
|
|
0 commit comments