1
+ from collections .abc import Generator
1
2
from datetime import datetime
2
3
from datetime import timezone
3
4
from http import HTTPStatus
10
11
from fastapi .responses import StreamingResponse
11
12
from sqlalchemy .orm import Session
12
13
13
- from ee .onyx .db .query_history import fetch_chat_sessions_eagerly_by_time
14
14
from ee .onyx .db .query_history import get_all_query_history_export_tasks
15
15
from ee .onyx .db .query_history import get_page_of_chat_sessions
16
16
from ee .onyx .db .query_history import get_total_filtered_chat_sessions_count
45
45
from onyx .server .documents .models import PaginatedReturn
46
46
from onyx .server .query_and_chat .models import ChatSessionDetails
47
47
from onyx .server .query_and_chat .models import ChatSessionsResponse
48
+ from onyx .utils .threadpool_concurrency import parallel_yield
48
49
49
50
router = APIRouter ()
50
51
@@ -61,41 +62,55 @@ def ensure_query_history_is_enabled(
61
62
)
62
63
63
64
65
+ def yield_snapshot_from_chat_session (
66
+ chat_session : ChatSession ,
67
+ db_session : Session ,
68
+ ) -> Generator [ChatSessionSnapshot | None ]:
69
+ yield snapshot_from_chat_session (chat_session = chat_session , db_session = db_session )
70
+
71
+
64
72
def fetch_and_process_chat_session_history (
65
73
db_session : Session ,
66
74
start : datetime ,
67
75
end : datetime ,
68
- feedback_type : QAFeedbackType | None ,
69
76
limit : int | None = 500 ,
70
- ) -> list [ChatSessionSnapshot ]:
71
- # observed to be slow a scale of 8192 sessions and 4 messages per session
77
+ ) -> Generator [ChatSessionSnapshot ]:
78
+ PAGE_SIZE = 100
79
+
80
+ page = 0
81
+ while True :
82
+ paged_chat_sessions = get_page_of_chat_sessions (
83
+ start_time = start ,
84
+ end_time = end ,
85
+ db_session = db_session ,
86
+ page_num = page ,
87
+ page_size = PAGE_SIZE ,
88
+ )
72
89
73
- # this is a little slow (5 seconds)
74
- chat_sessions = fetch_chat_sessions_eagerly_by_time (
75
- start = start , end = end , db_session = db_session , limit = limit
76
- )
90
+ if not paged_chat_sessions :
91
+ break
92
+
93
+ paged_snapshots = parallel_yield (
94
+ [
95
+ yield_snapshot_from_chat_session (
96
+ db_session = db_session ,
97
+ chat_session = chat_session ,
98
+ )
99
+ for chat_session in paged_chat_sessions
100
+ ]
101
+ )
77
102
78
- # this is VERY slow (80 seconds) due to create_chat_chain being called
79
- # for each session. Needs optimizing.
80
- chat_session_snapshots = [
81
- snapshot_from_chat_session (chat_session = chat_session , db_session = db_session )
82
- for chat_session in chat_sessions
83
- ]
84
-
85
- valid_snapshots = [
86
- snapshot for snapshot in chat_session_snapshots if snapshot is not None
87
- ]
88
-
89
- if feedback_type :
90
- valid_snapshots = [
91
- snapshot
92
- for snapshot in valid_snapshots
93
- if any (
94
- message .feedback_type == feedback_type for message in snapshot .messages
95
- )
96
- ]
103
+ for snapshot in paged_snapshots :
104
+ if snapshot :
105
+ yield snapshot
106
+
107
+ # If we've fetched *less* than a `PAGE_SIZE` worth
108
+ # of data, we have reached the end of the
109
+ # pagination sequence; break.
110
+ if len (paged_chat_sessions ) < PAGE_SIZE :
111
+ break
97
112
98
- return valid_snapshots
113
+ page += 1
99
114
100
115
101
116
def snapshot_from_chat_session (
0 commit comments