Skip to content

privatemode ai #161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@
import gc
import os
import pandas as pd
import shutil
from openai import APIConnectionError

from src.embedders import Transformer, util

# Embedder imports are used by eval(Embedder) in __setup_tmp_embedder
from src.embedders.classification.contextual import (
from src.embedders.classification.contextual import ( # noqa: F401
OpenAISentenceEmbedder,
HuggingFaceSentenceEmbedder,
PrivatemodeAISentenceEmbedder,
)
from src.embedders.classification.reduce import PCASentenceReducer
from src.embedders.classification.reduce import PCASentenceReducer # noqa: F401
from src.util import daemon, request_util
from src.util.decorator import param_throttle
from src.util.embedders import get_embedder
Expand Down
79 changes: 79 additions & 0 deletions src/embedders/classification/contextual.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
from openai import OpenAI, AzureOpenAI
from openai import AuthenticationError, RateLimitError
import time
import os
from transformers import AutoTokenizer


PRIVATEMODE_AI_URL = os.getenv("PRIVATEMODE_AI_URL", "http://privatemode-proxy:8080/v1")


class TransformerSentenceEmbedder(SentenceEmbedder):
Expand Down Expand Up @@ -199,3 +204,77 @@ def dump(self, project_id: str, embedding_id: str) -> None:
export_file = util.INFERENCE_DIR / project_id / f"embedder-{embedding_id}.json"
export_file.parent.mkdir(parents=True, exist_ok=True)
util.write_json(self.to_json(), export_file, indent=2)


class PrivatemodeAISentenceEmbedder(SentenceEmbedder):

def __init__(
self,
batch_size: int = 128,
model_name: str = "intfloat/multilingual-e5-large-instruct",
):
"""
Embeds documents using privatemode ai proxy via OpenAI classes.
Note that the model and api key are currently hardcoded since they aren't configurable.

Args:
batch_size (int, optional): Defines the number of conversions after which the embedder yields. Defaults to 128.
model_name (str, optional): Name of the embedding model from Privatemode AI (e.g. intfloat/multilingual-e5-large-instruct). Defaults to "intfloat/multilingual-e5-large-instruct".

Raises:
Exception: If you use Azure, you need to provide api_type, api_version and api_base.


"""
super().__init__(batch_size)
self.model_name = model_name
self.openai_client = OpenAI(
api_key="dummy", # Set in proxy
base_url=PRIVATEMODE_AI_URL,
)
# for trimming the length of the text if > 512 tokens
self._auto_tokenizer = AutoTokenizer.from_pretrained(self.model_name)

def _encode(
self, documents: List[Union[str, Doc]], fit_model: bool
) -> Generator[List[List[float]], None, None]:
for documents_batch in util.batch(documents, self.batch_size):
documents_batch = [self._trim_length(doc.replace("\n", " ")) for doc in documents_batch]
try:
response = self.openai_client.embeddings.create(
input=documents_batch, model=self.model_name
)
embeddings = [entry.embedding for entry in response.data]
yield embeddings
except AuthenticationError:
raise Exception(
"OpenAI API key is invalid. Please provide a valid API key in the constructor of PrivatemodeAISentenceEmbedder."
)

@staticmethod
def load(embedder: dict) -> "PrivatemodeAISentenceEmbedder":
return PrivatemodeAISentenceEmbedder(
model_name=embedder["model_name"],
batch_size=embedder["batch_size"],
)

def to_json(self) -> dict:
return {
"cls": "PrivatemodeAISentenceEmbedder",
"model_name": self.model_name,
"batch_size": self.batch_size,
}

def dump(self, project_id: str, embedding_id: str) -> None:
export_file = util.INFERENCE_DIR / project_id / f"embedder-{embedding_id}.json"
export_file.parent.mkdir(parents=True, exist_ok=True)
util.write_json(self.to_json(), export_file, indent=2)

def _trim_length(self, text: str, max_length: int=512) -> str:
tokens = self._auto_tokenizer(
text,
truncation=True,
max_length=max_length,
return_tensors=None # No tensors needed for just truncating
)
return self._auto_tokenizer.decode(tokens["input_ids"], skip_special_tokens=True)
3 changes: 2 additions & 1 deletion src/embedders/classification/reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from src.embedders import PCAReducer, util

# Embedder imports are used by eval(Embedder) in load methods
from src.embedders.classification.contextual import (
from src.embedders.classification.contextual import ( # noqa: F401
OpenAISentenceEmbedder,
HuggingFaceSentenceEmbedder,
PrivatemodeAISentenceEmbedder,
)


Expand Down
1 change: 1 addition & 0 deletions src/embedders/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ def read_json(file_path: str) -> dict[str, Any]:
def write_json(obj: Any, file_path: str, **kwargs) -> None:
with open(file_path, "w") as f:
json.dump(obj, f, **kwargs)

3 changes: 3 additions & 0 deletions src/util/embedders.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from src.embedders.classification.contextual import (
OpenAISentenceEmbedder,
HuggingFaceSentenceEmbedder,
PrivatemodeAISentenceEmbedder,
)
from src.embedders.extraction.contextual import TransformerTokenEmbedder
from src.embedders.classification.reduce import PCASentenceReducer
Expand Down Expand Up @@ -42,6 +43,8 @@ def get_embedder(
embedder = HuggingFaceSentenceEmbedder(
config_string=model, batch_size=batch_size
)
elif platform == enums.EmbeddingPlatform.PRIVATEMODE_AI.value:
embedder = PrivatemodeAISentenceEmbedder(batch_size=batch_size)
else:
raise Exception(f"Unknown platform {platform}")

Expand Down
2 changes: 1 addition & 1 deletion submodules/model
Submodule model updated 1 files
+4 −0 enums.py