From ee98cb571eed3d407016677860b7898a7931ed68 Mon Sep 17 00:00:00 2001 From: hanlintang Date: Fri, 11 Apr 2025 13:32:23 +0000 Subject: [PATCH] [PIR] update milvus pir --- slm/applications/neural_search/recall/milvus/README.md | 10 ++++++++++ .../neural_search/recall/milvus/feature_extract.py | 8 ++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/slm/applications/neural_search/recall/milvus/README.md b/slm/applications/neural_search/recall/milvus/README.md index d29fc91982b2..3c6b6e37b023 100644 --- a/slm/applications/neural_search/recall/milvus/README.md +++ b/slm/applications/neural_search/recall/milvus/README.md @@ -91,6 +91,12 @@ ``` +下载数据集并解压到当前目录: +```shell +wget https://bj.bcebos.com/v1/paddlenlp/data/literature_search_data.zip +unzip literature_search_data.zip +``` + ## 5. 向量检索 @@ -141,6 +147,10 @@ python milvus_ann_search.py --data_path milvus/milvus_data.csv \ * `search`: 是否检索向量 * `batch_size`: 表示的是一次性插入的向量的数量 +也可以运行脚本: +``` +sh scripts/feature_extract.sh +``` | 数据量 | 时间 | | ------------ | ------------ | diff --git a/slm/applications/neural_search/recall/milvus/feature_extract.py b/slm/applications/neural_search/recall/milvus/feature_extract.py index a2a850449ca2..e9ebb4468991 100644 --- a/slm/applications/neural_search/recall/milvus/feature_extract.py +++ b/slm/applications/neural_search/recall/milvus/feature_extract.py @@ -23,6 +23,10 @@ from paddlenlp.data import Pad, Tuple from paddlenlp.transformers import AutoTokenizer +from paddlenlp.utils.env import ( + PADDLE_INFERENCE_MODEL_SUFFIX, + PADDLE_INFERENCE_WEIGHTS_SUFFIX, +) sys.path.append(".") @@ -59,8 +63,8 @@ def __init__( self.max_seq_length = max_seq_length self.batch_size = batch_size - model_file = model_dir + "/inference.get_pooled_embedding.pdmodel" - params_file = model_dir + "/inference.get_pooled_embedding.pdiparams" + model_file = model_dir + f"/inference.get_pooled_embedding{PADDLE_INFERENCE_MODEL_SUFFIX}" + params_file = model_dir + f"/inference.get_pooled_embedding{PADDLE_INFERENCE_WEIGHTS_SUFFIX}" if not os.path.exists(model_file): raise ValueError("not find model file path {}".format(model_file)) if not os.path.exists(params_file):