Add support for new Google embedding models and configuration options

Your Name · Your Name · commit d66aa8e40daa · 2025-05-22T13:29:12.000Z
- Introduced environment variables for specifying the VertexAI embedding model location and dimension.
- Added new supported embedding models for Google Gemini and multilingual embeddings in the backend.
- Updated frontend interface to include the new multilingual embedding model from Google.
diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py
@@ -77,6 +77,13 @@
     os.environ.get("VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE", "25")
 )
 
+VERTEXAI_EMBEDDING_MODEL_LOCATION = os.environ.get("VERTEXAI_EMBEDDING_MODEL_LOCATION", "europe-west1")
+
+# Dimension of the VertexAI embedding model (768 is the default) but for Gemini can be up to 3072.
+VERTEXAI_EMBEDDING_MODEL_DIMENSION = int(
+    os.environ.get("VERTEXAI_EMBEDDING_MODEL_DIMENSION", "768")
+)
+
 # Only used for OpenAI
 OPENAI_EMBEDDING_TIMEOUT = int(
     os.environ.get("OPENAI_EMBEDDING_TIMEOUT", API_BASED_EMBEDDING_TIMEOUT)
@@ -222,6 +229,26 @@ async def async_return_default_schema(*args: Any, **kwargs: Any) -> str:
         dim=768,
         index_name="danswer_chunk_text_embedding_004",
     ),
+    SupportedEmbeddingModel(
+        name="google/gemini-embedding-001",
+        dim=768,
+        index_name="danswer_chunk_google_gemini_embedding_001",
+    ),
+    SupportedEmbeddingModel(
+        name="google/gemini-embedding-001",
+        dim=768,
+        index_name="danswer_chunk_gemini_embedding_001",
+    ),
+    SupportedEmbeddingModel(
+        name="google/text-multilingual-embedding-002",
+        dim=768,
+        index_name="danswer_chunk_google_multilingual_embedding_002",
+    ),
+    SupportedEmbeddingModel(
+        name="google/text-multilingual-embedding-002",
+        dim=768,
+        index_name="danswer_chunk_multilingual_embedding_002",
+    ),
     SupportedEmbeddingModel(
         name="google/textembedding-gecko@003",
         dim=768,
diff --git a/web/src/components/embedding/interfaces.tsx b/web/src/components/embedding/interfaces.tsx
@@ -291,6 +291,19 @@ export const AVAILABLE_CLOUD_PROVIDERS: CloudEmbeddingProvider[] = [
         api_key: null,
         api_url: null,
       },
+      {
+        provider_type: EmbeddingProvider.GOOGLE,
+        model_name: "text-multilingual-embedding-002",
+        description: "Google's most recent multilingual text embedding model.",
+        pricePerMillion: 0.015,
+        model_dim: 768,
+        normalize: false,
+        query_prefix: "",
+        passage_prefix: "",
+        index_name: "",
+        api_key: null,
+        api_url: null,
+      },
       {
         provider_type: EmbeddingProvider.GOOGLE,
         model_name: "textembedding-gecko@003",