Skip to content

Commit 6e16c74

Browse files
committed
refactor: improve logging and status updates in embedding methods
1 parent c363003 commit 6e16c74

File tree

1 file changed

+21
-14
lines changed

1 file changed

+21
-14
lines changed

apps/common/event/listener_manage.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,16 @@
66
@date:2023/10/20 14:01
77
@desc:
88
"""
9+
import datetime
910
import os
1011
import threading
11-
import datetime
1212
import traceback
1313
from typing import List
1414

1515
import django.db.models
1616
from django.db.models import QuerySet
1717
from django.db.models.functions import Substr, Reverse
18+
from django.utils.translation import gettext_lazy as _
1819
from langchain_core.embeddings import Embeddings
1920

2021
from common.config.embedding_config import VectorStore
@@ -23,10 +24,9 @@
2324
from common.utils.lock import RedisLock
2425
from common.utils.logger import maxkb_logger
2526
from common.utils.page_utils import page_desc
26-
from knowledge.models import Paragraph, Status, Document, ProblemParagraphMapping, TaskType, State,SourceType, SearchMode
27+
from knowledge.models import Paragraph, Status, Document, ProblemParagraphMapping, TaskType, State, SourceType, \
28+
SearchMode
2729
from maxkb.conf import (PROJECT_DIR)
28-
from django.utils.translation import gettext_lazy as _
29-
3030

3131
lock = threading.Lock()
3232

@@ -91,8 +91,9 @@ def embedding_by_paragraph_list(paragraph_id_list, embedding_model: Embeddings):
9191

9292
@staticmethod
9393
def embedding_by_paragraph_data_list(data_list, paragraph_id_list, embedding_model: Embeddings):
94-
maxkb_logger.info(_('Start--->Embedding paragraph: {paragraph_id_list}').format(paragraph_id_list=paragraph_id_list))
95-
status = State.SUCCESS
94+
maxkb_logger.info(_('Start--->Embedding paragraph: {paragraph_id_list}').format(
95+
paragraph_id_list=paragraph_id_list)
96+
)
9697
try:
9798
# 删除段落
9899
VectorStore.get_embedding_vector().delete_by_paragraph_ids(paragraph_id_list)
@@ -102,14 +103,20 @@ def is_save_function():
102103

103104
# 批量向量化
104105
VectorStore.get_embedding_vector().batch_save(data_list, embedding_model, is_save_function)
106+
ListenerManagement.update_status(
107+
QuerySet(Paragraph).filter(id__in=paragraph_id_list), TaskType.EMBEDDING, State.SUCCESS
108+
)
105109
except Exception as e:
106110
maxkb_logger.error(_('Vectorized paragraph: {paragraph_id_list} error {error} {traceback}').format(
107-
paragraph_id_list=paragraph_id_list, error=str(e), traceback=traceback.format_exc()))
108-
status = State.FAILURE
111+
paragraph_id_list=paragraph_id_list, error=str(e), traceback=traceback.format_exc())
112+
)
113+
ListenerManagement.update_status(
114+
QuerySet(Paragraph).filter(id__in=paragraph_id_list), TaskType.EMBEDDING, State.FAILURE
115+
)
109116
finally:
110-
QuerySet(Paragraph).filter(id__in=paragraph_id_list).update(**{'status': status})
111-
maxkb_logger.info(
112-
_('End--->Embedding paragraph: {paragraph_id_list}').format(paragraph_id_list=paragraph_id_list))
117+
maxkb_logger.info(_('End--->Embedding paragraph: {paragraph_id_list}').format(
118+
paragraph_id_list=paragraph_id_list)
119+
)
113120

114121
@staticmethod
115122
def embedding_by_paragraph(paragraph_id, embedding_model: Embeddings):
@@ -266,12 +273,11 @@ def is_the_task_interrupted():
266273
if is_the_task_interrupted():
267274
return
268275
maxkb_logger.info(_('Start--->Embedding document: {document_id}').format(document_id=document_id)
269-
)
276+
)
270277
# 批量修改状态为PADDING
271278
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), TaskType.EMBEDDING,
272279
State.STARTED)
273280

274-
275281
# 根据段落进行向量化处理
276282
page_desc(QuerySet(Paragraph)
277283
.annotate(
@@ -381,5 +387,6 @@ def hit_test(query_text, knowledge_id: list[str], exclude_document_id_list: list
381387
similarity: float,
382388
search_mode: SearchMode,
383389
embedding: Embeddings):
384-
return VectorStore.get_embedding_vector().hit_test(query_text, knowledge_id, exclude_document_id_list, top_number,
390+
return VectorStore.get_embedding_vector().hit_test(query_text, knowledge_id, exclude_document_id_list,
391+
top_number,
385392
similarity, search_mode, embedding)

0 commit comments

Comments
 (0)