1
1
import argparse
2
2
import json
3
3
import random
4
+ import time
4
5
from collections import defaultdict
5
6
from concurrent .futures import ThreadPoolExecutor , as_completed
6
7
from datetime import datetime , timedelta
11
12
from app .config import (
12
13
LITELLM_API_KEY ,
13
14
LITELLM_ENDPOINT ,
14
- LITELLM_MODEL_DEFAULT ,
15
+ LITELLM_MODEL_GENERATION ,
15
16
)
17
+ from app .contents .models import ContentDB
16
18
from app .database import get_session
17
- from app .question_answer .models import ContentFeedbackDB , QueryDB , ResponseFeedbackDB
19
+ from app .llm_call .utils import remove_json_markdown
20
+ from app .question_answer .models import (
21
+ ContentFeedbackDB ,
22
+ QueryDB ,
23
+ QueryResponseContentDB ,
24
+ ResponseFeedbackDB ,
25
+ )
18
26
from app .urgency_detection .models import UrgencyQueryDB
19
27
from app .users .models import UserDB
20
28
from app .utils import get_key_hash
29
37
30
38
try :
31
39
import requests # type: ignore
40
+
32
41
except ImportError :
33
42
print (
34
43
"Please install requests library using `pip install requests` "
39
48
(QueryDB , "query_datetime_utc" ),
40
49
(ResponseFeedbackDB , "feedback_datetime_utc" ),
41
50
(ContentFeedbackDB , "feedback_datetime_utc" ),
51
+ (QueryResponseContentDB , "created_datetime_utc" ),
42
52
(UrgencyQueryDB , "message_datetime_utc" ),
43
53
]
44
54
@@ -106,47 +116,54 @@ def generate_feedback(question_text: str, faq_text: str, sentiment: str) -> dict
106
116
"""
107
117
108
118
response = completion (
109
- model = LITELLM_MODEL_DEFAULT ,
119
+ model = LITELLM_MODEL_GENERATION ,
110
120
api_base = LITELLM_ENDPOINT ,
111
121
api_key = LITELLM_API_KEY ,
112
122
messages = [{"role" : "user" , "content" : prompt }],
113
123
max_tokens = 100 ,
114
124
temperature = 0.7 ,
115
125
)
116
126
117
- # Extract the output from the response
118
- feedback_output = response ["choices" ][0 ]["message" ]["content" ].strip ()
119
- feedback_output = feedback_output .replace ("json" , "" )
120
- feedback_output = feedback_output .replace ("\n " , "" ).strip ()
121
-
122
127
try :
128
+ # Extract the output from the response
129
+ feedback_output = response ["choices" ][0 ]["message" ]["content" ].strip ()
130
+ feedback_output = remove_json_markdown (feedback_output )
123
131
feedback_dict = json .loads (feedback_output )
124
132
if isinstance (feedback_dict , dict ) and "output" in feedback_dict :
125
-
126
133
return feedback_dict
127
134
else :
128
135
raise ValueError ("Output is not in the correct format." )
129
- except ( SyntaxError , ValueError ) as e :
136
+ except Exception as e :
130
137
print (f"Output is not in the correct format.{ e } " )
131
138
return None
132
139
133
140
134
- def save_single_row (endpoint : str , data : dict ) -> dict :
141
+ def save_single_row (endpoint : str , data : dict , retries : int = 2 ) -> dict | None :
135
142
"""
136
143
Save a single row in the database.
137
144
"""
138
-
139
- response = requests .post (
140
- endpoint ,
141
- headers = {
142
- "accept" : "application/json" ,
143
- "Content-Type" : "application/json" ,
144
- "Authorization" : f"Bearer { API_KEY } " ,
145
- },
146
- json = data ,
147
- verify = False ,
148
- )
149
- return response .json ()
145
+ try :
146
+ response = requests .post (
147
+ endpoint ,
148
+ headers = {
149
+ "accept" : "application/json" ,
150
+ "Content-Type" : "application/json" ,
151
+ "Authorization" : f"Bearer { API_KEY } " ,
152
+ },
153
+ json = data ,
154
+ verify = False ,
155
+ )
156
+ response .raise_for_status ()
157
+ return response .json ()
158
+
159
+ except Exception as e :
160
+ if retries > 0 :
161
+ # Implement exponential wait before retrying
162
+ time .sleep (2 ** (2 - retries ))
163
+ return save_single_row (endpoint , data , retries = retries - 1 )
164
+ else :
165
+ print (f"Request failed after retries: { e } " )
166
+ return None
150
167
151
168
152
169
def process_search (_id : int , text : str ) -> tuple | None :
@@ -161,7 +178,7 @@ def process_search(_id: int, text: str) -> tuple | None:
161
178
"generate_tts" : False ,
162
179
}
163
180
response = save_single_row (endpoint , data )
164
- if "search_results" in response :
181
+ if response and isinstance ( response , dict ) and "search_results" in response :
165
182
return (
166
183
_id ,
167
184
response ["query_id" ],
@@ -215,7 +232,13 @@ def process_content_feedback(
215
232
if is_off_topic and feedback_sentiment == "positive" :
216
233
return None
217
234
# randomly get a content from the search results to provide feedback on
218
- content = search_results [str (random .randint (0 , 3 ))]
235
+ content_num = str (random .randint (0 , 3 ))
236
+ if not search_results or not isinstance (search_results , dict ):
237
+ return None
238
+ if content_num not in search_results :
239
+ return None
240
+
241
+ content = search_results [content_num ]
219
242
220
243
# Get content text and use to generate feedback text using LLMs
221
244
content_text = content ["title" ] + " " + content ["text" ]
@@ -253,19 +276,16 @@ def process_urgency_detection(_id: int, text: str) -> tuple | None:
253
276
}
254
277
255
278
response = save_single_row (endpoint , data )
256
- if "is_urgent" in response :
279
+ if response and "is_urgent" in response :
257
280
return (response ["is_urgent" ],)
258
281
return None
259
282
260
283
261
- def create_random_datetime_from_string (date_string : str ) -> datetime :
284
+ def create_random_datetime_from_string (start_date : datetime ) -> datetime :
262
285
"""
263
286
Create a random datetime from a date in the format "%d-%m-%y
264
287
to today
265
288
"""
266
- date_format = "%d-%m-%y"
267
-
268
- start_date = datetime .strptime (date_string , date_format )
269
289
270
290
time_difference = datetime .now () - start_date
271
291
random_number_of_days = random .randint (0 , time_difference .days )
@@ -296,6 +316,7 @@ def update_date_of_records(models: list, random_dates: list, api_key: str) -> No
296
316
# Create a dictionary to map the query_id to the random date
297
317
date_map_dic = {queries [i ].query_id : random_dates [i ] for i in range (len (queries ))}
298
318
for model in models :
319
+ print (f"Updating the date of the records for { model [0 ].__name__ } ..." )
299
320
session = next (get_session ())
300
321
301
322
rows = [c for c in session .query (model [0 ]).all () if c .user_id == user .user_id ]
@@ -312,12 +333,31 @@ def update_date_of_records(models: list, random_dates: list, api_key: str) -> No
312
333
session .commit ()
313
334
314
335
336
+ def update_date_of_contents (date : datetime ) -> None :
337
+ """
338
+ Update the date of the content records in the database for consistency
339
+ """
340
+ session = next (get_session ())
341
+ contents = session .query (ContentDB ).all ()
342
+ for content in contents :
343
+ content .created_datetime_utc = date
344
+ content .updated_datetime_utc = date
345
+ session .merge (content )
346
+ session .commit ()
347
+
348
+
315
349
if __name__ == "__main__" :
316
350
HOST = args .host
317
351
NB_WORKERS = int (args .nb_workers ) if args .nb_workers else 8
318
352
API_KEY = args .api_key if args .api_key else ADMIN_API_KEY
319
353
320
- start_date = args .start_date if args .start_date else "01-08-23"
354
+ date_string = args .start_date if args .start_date else "01-08-23"
355
+ date_format = "%d-%m-%y"
356
+ start_date = datetime .strptime (date_string , date_format )
357
+ assert (
358
+ start_date and start_date < datetime .now ()
359
+ ), "Invalid start date. Please provide a valid start date."
360
+
321
361
path = args .csv
322
362
df = pd .read_csv (path )
323
363
saved_queries = defaultdict (list )
@@ -409,5 +449,8 @@ def update_date_of_records(models: list, random_dates: list, api_key: str) -> No
409
449
]
410
450
print ("Updating the date of the records..." )
411
451
update_date_of_records (MODELS , random_dates , API_KEY )
452
+
453
+ print ("Updating the date of the content records..." )
454
+ update_date_of_contents (start_date )
412
455
print ("All records dates updated successfully." )
413
456
print ("All records added successfully." )
0 commit comments