6
6
@date:2023/10/20 14:01
7
7
@desc:
8
8
"""
9
+ import datetime
9
10
import os
10
11
import threading
11
- import datetime
12
12
import traceback
13
13
from typing import List
14
14
15
15
import django .db .models
16
16
from django .db .models import QuerySet
17
17
from django .db .models .functions import Substr , Reverse
18
+ from django .utils .translation import gettext_lazy as _
18
19
from langchain_core .embeddings import Embeddings
19
20
20
21
from common .config .embedding_config import VectorStore
23
24
from common .utils .lock import RedisLock
24
25
from common .utils .logger import maxkb_logger
25
26
from common .utils .page_utils import page_desc
26
- from knowledge .models import Paragraph , Status , Document , ProblemParagraphMapping , TaskType , State ,SourceType , SearchMode
27
+ from knowledge .models import Paragraph , Status , Document , ProblemParagraphMapping , TaskType , State , SourceType , \
28
+ SearchMode
27
29
from maxkb .conf import (PROJECT_DIR )
28
- from django .utils .translation import gettext_lazy as _
29
-
30
30
31
31
lock = threading .Lock ()
32
32
@@ -91,8 +91,9 @@ def embedding_by_paragraph_list(paragraph_id_list, embedding_model: Embeddings):
91
91
92
92
@staticmethod
93
93
def embedding_by_paragraph_data_list (data_list , paragraph_id_list , embedding_model : Embeddings ):
94
- maxkb_logger .info (_ ('Start--->Embedding paragraph: {paragraph_id_list}' ).format (paragraph_id_list = paragraph_id_list ))
95
- status = State .SUCCESS
94
+ maxkb_logger .info (_ ('Start--->Embedding paragraph: {paragraph_id_list}' ).format (
95
+ paragraph_id_list = paragraph_id_list )
96
+ )
96
97
try :
97
98
# 删除段落
98
99
VectorStore .get_embedding_vector ().delete_by_paragraph_ids (paragraph_id_list )
@@ -102,14 +103,20 @@ def is_save_function():
102
103
103
104
# 批量向量化
104
105
VectorStore .get_embedding_vector ().batch_save (data_list , embedding_model , is_save_function )
106
+ ListenerManagement .update_status (
107
+ QuerySet (Paragraph ).filter (id__in = paragraph_id_list ), TaskType .EMBEDDING , State .SUCCESS
108
+ )
105
109
except Exception as e :
106
110
maxkb_logger .error (_ ('Vectorized paragraph: {paragraph_id_list} error {error} {traceback}' ).format (
107
- paragraph_id_list = paragraph_id_list , error = str (e ), traceback = traceback .format_exc ()))
108
- status = State .FAILURE
111
+ paragraph_id_list = paragraph_id_list , error = str (e ), traceback = traceback .format_exc ())
112
+ )
113
+ ListenerManagement .update_status (
114
+ QuerySet (Paragraph ).filter (id__in = paragraph_id_list ), TaskType .EMBEDDING , State .FAILURE
115
+ )
109
116
finally :
110
- QuerySet ( Paragraph ). filter ( id__in = paragraph_id_list ). update ( ** { 'status' : status })
111
- maxkb_logger . info (
112
- _ ( 'End--->Embedding paragraph: {paragraph_id_list}' ). format ( paragraph_id_list = paragraph_id_list ) )
117
+ maxkb_logger . info ( _ ( 'End--->Embedding paragraph: {paragraph_id_list}' ). format (
118
+ paragraph_id_list = paragraph_id_list )
119
+ )
113
120
114
121
@staticmethod
115
122
def embedding_by_paragraph (paragraph_id , embedding_model : Embeddings ):
@@ -266,12 +273,11 @@ def is_the_task_interrupted():
266
273
if is_the_task_interrupted ():
267
274
return
268
275
maxkb_logger .info (_ ('Start--->Embedding document: {document_id}' ).format (document_id = document_id )
269
- )
276
+ )
270
277
# 批量修改状态为PADDING
271
278
ListenerManagement .update_status (QuerySet (Document ).filter (id = document_id ), TaskType .EMBEDDING ,
272
279
State .STARTED )
273
280
274
-
275
281
# 根据段落进行向量化处理
276
282
page_desc (QuerySet (Paragraph )
277
283
.annotate (
@@ -381,5 +387,6 @@ def hit_test(query_text, knowledge_id: list[str], exclude_document_id_list: list
381
387
similarity : float ,
382
388
search_mode : SearchMode ,
383
389
embedding : Embeddings ):
384
- return VectorStore .get_embedding_vector ().hit_test (query_text , knowledge_id , exclude_document_id_list , top_number ,
390
+ return VectorStore .get_embedding_vector ().hit_test (query_text , knowledge_id , exclude_document_id_list ,
391
+ top_number ,
385
392
similarity , search_mode , embedding )
0 commit comments