1
1
import json
2
+ import os
2
3
from typing import Any , Literal , Optional
3
4
from langevals_core .base_evaluator import (
4
5
BaseEvaluator ,
@@ -100,11 +101,14 @@ class PresidioPIIDetectionEvaluator(
100
101
101
102
@classmethod
102
103
def preload (cls ):
104
+ cache_dir = os .path .join (os .path .dirname (__file__ ), ".cache" )
105
+ os .makedirs (cache_dir , exist_ok = True )
103
106
try :
104
- spacy .load ("en_core_web_lg" )
107
+ spacy .load (os . path . join ( cache_dir , "en_core_web_lg" ) )
105
108
except Exception :
106
- spacy .cli .download ("en_core_web_lg" ) # type: ignore
107
- spacy .load ("en_core_web_lg" )
109
+ spacy .cli .download ("en_core_web_lg" , False , False , "--no-cache-dir" ) # type: ignore
110
+ nlp = spacy .load ("en_core_web_lg" )
111
+ nlp .to_disk (os .path .join (cache_dir , "en_core_web_lg" ))
108
112
cls .analyzer = AnalyzerEngine (
109
113
nlp_engine = SpacyNlpEngine (
110
114
models = [{"lang_code" : "en" , "model_name" : "en_core_web_lg" }]
@@ -130,9 +134,7 @@ def evaluate(self, entry: PresidioPIIDetectionEntry) -> SingleEvaluationResult:
130
134
"Content exceeds the maximum length of 524288 bytes allowed by PII Detection"
131
135
)
132
136
133
- results = self .analyzer .analyze (
134
- text = content , entities = entities , language = "en"
135
- )
137
+ results = self .analyzer .analyze (text = content , entities = entities , language = "en" )
136
138
results = [
137
139
result for result in results if result .score >= self .settings .min_threshold
138
140
]
0 commit comments