Skip to content

Commit 49a1910

Browse files
[Feat] Integrate Bedrock & Vertex AI Embedding Adapters (#156)
* Initial commit * Added bedrock embedding support * Fix: Inherit EmbeddingAdapter in Bedrock * added method to instantiate Bedrock class * Added vertexAI embedding * Added schema for vertex ai embeddings * updated json schema for bedrock embeddings * updated sdk's version * Updated SDK's version * Update src/unstract/sdk/adapters/exceptions.py Co-authored-by: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com> Signed-off-by: Praveen Kumar <praveen@zipstack.com> * Update __init__.py Signed-off-by: Praveen Kumar <praveen@zipstack.com> * Imported EmbeddinError from parent exception class * moved test_embedding function to parent class * Moved get_jason_schema to base Adapter class * added pdm.lock --------- Signed-off-by: Praveen Kumar <praveen@zipstack.com> Co-authored-by: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com>
1 parent 1d354ae commit 49a1910

File tree

51 files changed

+788
-606
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+788
-606
lines changed

pdm.lock

Lines changed: 340 additions & 348 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ dependencies = [
2424
# "llama-index-embeddings-fastembed==0.1.4",
2525
"llama-index-embeddings-openai==0.3.1",
2626
"llama-index-embeddings-ollama==0.5.0",
27+
"llama-index-embeddings-bedrock==0.5.0",
28+
"llama-index-embeddings-vertex==0.3.1",
2729
"llama-index-vector-stores-postgres==0.4.1",
2830
# Including Supabase conflicts with postgres on pg-vector.
2931
# Hence, commenting it out at the moment

src/unstract/sdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.56.0rc4"
1+
__version__ = "0.57.0rc1"
22

33

44
def get_sdk_version():

src/unstract/sdk/adapters/base.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,13 @@ def get_description() -> str:
3030
def get_icon() -> str:
3131
return ""
3232

33-
@staticmethod
34-
@abstractmethod
35-
def get_json_schema() -> str:
36-
return ""
33+
@classmethod
34+
def get_json_schema(cls) -> str:
35+
schema_path = getattr(cls, 'SCHEMA_PATH', None)
36+
if schema_path is None:
37+
raise ValueError(f"SCHEMA_PATH not defined for {cls.__name__}")
38+
with open(schema_path) as f:
39+
return f.read()
3740

3841
@staticmethod
3942
@abstractmethod

src/unstract/sdk/adapters/embedding/azure_open_ai/src/azure_open_ai.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ def __init__(self, settings: dict[str, Any]):
2727
super().__init__("AzureOpenAIEmbedding")
2828
self.config = settings
2929

30+
SCHEMA_PATH = f"{os.path.dirname(__file__)}/static/json_schema.json"
31+
3032
@staticmethod
3133
def get_id() -> str:
3234
return "azureopenai|9770f3f6-f8ba-4fa0-bb3a-bef48a00e66f"
@@ -47,13 +49,6 @@ def get_provider() -> str:
4749
def get_icon() -> str:
4850
return "/icons/adapter-icons/AzureopenAI.png"
4951

50-
@staticmethod
51-
def get_json_schema() -> str:
52-
f = open(f"{os.path.dirname(__file__)}/static/json_schema.json")
53-
schema = f.read()
54-
f.close()
55-
return schema
56-
5752
def get_embedding_instance(self) -> BaseEmbedding:
5853
try:
5954
embedding_batch_size = EmbeddingHelper.get_embedding_batch_size(
@@ -77,7 +72,4 @@ def get_embedding_instance(self) -> BaseEmbedding:
7772
except Exception as e:
7873
raise AdapterError(str(e))
7974

80-
def test_connection(self) -> bool:
81-
embedding = self.get_embedding_instance()
82-
test_result: bool = EmbeddingHelper.test_embedding_instance(embedding)
83-
return test_result
75+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Unstract Bedrock Embeddings
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
[build-system]
2+
requires = ["pdm-backend"]
3+
build-backend = "pdm.backend"
4+
5+
6+
[project]
7+
name = "unstract-bedrock-embedding"
8+
version = "0.0.1"
9+
description = "Bedrock Embedding"
10+
authors = [
11+
{name = "Zipstack Inc.", email = "devsupport@zipstack.com"},
12+
]
13+
dependencies = [
14+
]
15+
requires-python = ">=3.9"
16+
readme = "README.md"
17+
classifiers = [
18+
"Programming Language :: Python"
19+
]
20+
license = {text = "MIT"}
21+
22+
[tool.pdm.build]
23+
includes = ["src"]
24+
package-dir = "src"
25+
# source-includes = ["tests"]
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from .bedrock import Bedrock
2+
3+
metadata = {
4+
"name": Bedrock.__name__,
5+
"version": "1.0.0",
6+
"adapter": Bedrock,
7+
"description": "Bedrock embedding adapter",
8+
"is_active": True,
9+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import os
2+
from typing import Any
3+
4+
from llama_index.core.embeddings import BaseEmbedding
5+
from llama_index.embeddings.bedrock import BedrockEmbedding
6+
7+
from unstract.sdk.adapters.embedding.embedding_adapter import EmbeddingAdapter
8+
from unstract.sdk.adapters.embedding.helper import EmbeddingHelper
9+
from unstract.sdk.adapters.exceptions import AdapterError
10+
11+
class Constants:
12+
MODEL = "model"
13+
TIMEOUT = "timeout"
14+
MAX_RETRIES = "max_retries"
15+
SECRET_ACCESS_KEY = "aws_secret_access_key"
16+
ACCESS_KEY_ID = "aws_access_key_id"
17+
REGION_NAME = "region_name"
18+
DEFAULT_TIMEOUT = 240
19+
DEFAULT_MAX_RETRIES = 3
20+
21+
class Bedrock(EmbeddingAdapter):
22+
def __init__(self, settings: dict[str, Any]):
23+
super().__init__("Bedrock")
24+
self.config = settings
25+
26+
SCHEMA_PATH = f"{os.path.dirname(__file__)}/static/json_schema.json"
27+
28+
@staticmethod
29+
def get_id() -> str:
30+
return "bedrock|88199741-8d7e-4e8c-9d92-d76b0dc20c91"
31+
32+
@staticmethod
33+
def get_name() -> str:
34+
return "Bedrock"
35+
36+
@staticmethod
37+
def get_description() -> str:
38+
return "Bedrock Embedding"
39+
40+
@staticmethod
41+
def get_provider() -> str:
42+
return "bedrock"
43+
44+
@staticmethod
45+
def get_icon() -> str:
46+
return "/icons/adapter-icons/Bedrock.png"
47+
48+
def get_embedding_instance(self) -> BaseEmbedding:
49+
try:
50+
embedding_batch_size = EmbeddingHelper.get_embedding_batch_size(
51+
config=self.config
52+
)
53+
embedding: BaseEmbedding = BedrockEmbedding(
54+
model_name=self.config.get(Constants.MODEL),
55+
aws_access_key_id=self.config.get(Constants.ACCESS_KEY_ID),
56+
aws_secret_access_key=self.config.get(Constants.SECRET_ACCESS_KEY),
57+
region_name=self.config.get(Constants.REGION_NAME),
58+
timeout=float(
59+
self.config.get(Constants.TIMEOUT, Constants.DEFAULT_TIMEOUT)
60+
),
61+
max_retries=int(
62+
self.config.get(Constants.MAX_RETRIES, Constants.DEFAULT_MAX_RETRIES)
63+
),
64+
embed_batch_size=embedding_batch_size,
65+
)
66+
return embedding
67+
except Exception as e:
68+
raise AdapterError(str(e))
69+
70+
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
{
2+
"title": "Bedrock Embeddings",
3+
"type": "object",
4+
"required": [
5+
"aws_secret_access_key",
6+
"region_name",
7+
"aws_access_key_id",
8+
"model",
9+
"adapter_name"
10+
],
11+
"properties": {
12+
"adapter_name": {
13+
"type": "string",
14+
"title": "Name",
15+
"default": "",
16+
"description": "Provide a unique name for this adapter instance. Example: Bedrock-Embedding-1"
17+
},
18+
"model": {
19+
"type": "string",
20+
"title": "Model",
21+
"default": "amazon.titan-embed-text-v2:0",
22+
"description": "Model name. Refer to [Bedrock's documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) for the list of available models."
23+
},
24+
"aws_access_key_id": {
25+
"type": "string",
26+
"title": "AWS Access Key ID",
27+
"description": "Provide your AWS Access Key ID",
28+
"format": "password"
29+
},
30+
"aws_secret_access_key": {
31+
"type": "string",
32+
"title": "AWS Secret Access Key",
33+
"description": "Provide your AWS Secret Access Key",
34+
"format": "password"
35+
},
36+
"region_name": {
37+
"type": "string",
38+
"title": "AWS Region name",
39+
"description": "Provide the AWS Region name where the service is running. Eg. us-east-1"
40+
},
41+
"embed_batch_size": {
42+
"type": "number",
43+
"minimum": 0,
44+
"multipleOf": 1,
45+
"title": "Embedding Batch Size",
46+
"default": 10
47+
},
48+
"max_retries": {
49+
"type": "number",
50+
"minimum": 0,
51+
"multipleOf": 1,
52+
"title": "Max Retries",
53+
"default": 5,
54+
"description": "Maximum number of retries to attempt when a request fails."
55+
},
56+
"timeout": {
57+
"type": "number",
58+
"minimum": 0,
59+
"multipleOf": 1,
60+
"title": "Timeout",
61+
"default": 900,
62+
"description": "Timeout in seconds"
63+
}
64+
}
65+
}
66+

src/unstract/sdk/adapters/embedding/embedding_adapter.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from unstract.sdk.adapters.base import Adapter
88
from unstract.sdk.adapters.enums import AdapterTypes
99

10+
from unstract.sdk.adapters.embedding.helper import EmbeddingHelper
1011

1112
class EmbeddingAdapter(Adapter, ABC):
1213
def __init__(self, name: str):
@@ -34,10 +35,6 @@ def get_provider() -> str:
3435
def get_icon() -> str:
3536
return ""
3637

37-
@staticmethod
38-
def get_json_schema() -> str:
39-
return ""
40-
4138
@staticmethod
4239
def get_adapter_type() -> AdapterTypes:
4340
return AdapterTypes.EMBEDDING
@@ -50,3 +47,8 @@ def get_embedding_instance(self, embed_config: dict[str, Any]) -> BaseEmbedding:
5047
Raises exceptions for any error
5148
"""
5249
return MockEmbedding(embed_dim=1)
50+
51+
def test_connection(self) -> bool:
52+
embedding = self.get_embedding_instance()
53+
test_result: bool = EmbeddingHelper.test_embedding_instance(embedding)
54+
return test_result

src/unstract/sdk/adapters/embedding/hugging_face/src/hugging_face.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ def __init__(self, settings: dict[str, Any]):
2222
super().__init__("HuggingFace")
2323
self.config = settings
2424

25+
SCHEMA_PATH = f"{os.path.dirname(__file__)}/static/json_schema.json"
26+
2527
@staticmethod
2628
def get_id() -> str:
2729
return "huggingface|90ec9ec2-1768-4d69-8fb1-c88b95de5e5a"
@@ -38,13 +40,6 @@ def get_description() -> str:
3840
def get_icon() -> str:
3941
return "/icons/adapter-icons/huggingface.png"
4042

41-
@staticmethod
42-
def get_json_schema() -> str:
43-
f = open(f"{os.path.dirname(__file__)}/static/json_schema.json")
44-
schema = f.read()
45-
f.close()
46-
return schema
47-
4843
def get_embedding_instance(self) -> BaseEmbedding:
4944
try:
5045
embedding_batch_size = EmbeddingHelper.get_embedding_batch_size(
@@ -67,7 +62,4 @@ def get_embedding_instance(self) -> BaseEmbedding:
6762
except Exception as e:
6863
raise AdapterError(str(e))
6964

70-
def test_connection(self) -> bool:
71-
embedding = self.get_embedding_instance()
72-
test_result: bool = EmbeddingHelper.test_embedding_instance(embedding)
73-
return test_result
65+

src/unstract/sdk/adapters/embedding/no_op/src/no_op_embedding.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ def __init__(self, settings: dict[str, Any]):
1515
super().__init__("NoOpCustomEmbedding")
1616
self.config = settings
1717

18+
SCHEMA_PATH = f"{os.path.dirname(__file__)}/static/json_schema.json"
19+
1820
@staticmethod
1921
def get_id() -> str:
2022
return "noOpEmbedding|ff223003-fee8-4079-b288-e86215e6b39a"
@@ -35,13 +37,6 @@ def get_icon() -> str:
3537
def get_provider() -> str:
3638
return "NoOp"
3739

38-
@staticmethod
39-
def get_json_schema() -> str:
40-
f = open(f"{os.path.dirname(__file__)}/static/json_schema.json")
41-
schema = f.read()
42-
f.close()
43-
return schema
44-
4540
def get_embedding_instance(self) -> BaseEmbedding:
4641
embedding: BaseEmbedding = NoOpCustomEmbedding(
4742
embed_dim=1, wait_time=self.config.get("wait_time")

src/unstract/sdk/adapters/embedding/ollama/src/ollama.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ def __init__(self, settings: dict[str, Any]):
2020
super().__init__("Ollama")
2121
self.config = settings
2222

23+
SCHEMA_PATH = f"{os.path.dirname(__file__)}/static/json_schema.json"
24+
2325
@staticmethod
2426
def get_id() -> str:
2527
return "ollama|d58d7080-55a9-4542-becd-8433528e127b"
@@ -40,13 +42,6 @@ def get_provider() -> str:
4042
def get_icon() -> str:
4143
return "/icons/adapter-icons/ollama.png"
4244

43-
@staticmethod
44-
def get_json_schema() -> str:
45-
f = open(f"{os.path.dirname(__file__)}/static/json_schema.json")
46-
schema = f.read()
47-
f.close()
48-
return schema
49-
5045
def get_embedding_instance(self) -> BaseEmbedding:
5146
try:
5247
embedding_batch_size = EmbeddingHelper.get_embedding_batch_size(
@@ -61,7 +56,4 @@ def get_embedding_instance(self) -> BaseEmbedding:
6156
except Exception as e:
6257
raise AdapterError(str(e))
6358

64-
def test_connection(self) -> bool:
65-
embedding = self.get_embedding_instance()
66-
test_result: bool = EmbeddingHelper.test_embedding_instance(embedding)
67-
return test_result
59+

src/unstract/sdk/adapters/embedding/open_ai/src/open_ai.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ def __init__(self, settings: dict[str, Any]):
2727
super().__init__("OpenAI")
2828
self.config = settings
2929

30+
SCHEMA_PATH = f"{os.path.dirname(__file__)}/static/json_schema.json"
31+
3032
@staticmethod
3133
def get_id() -> str:
3234
return "openai|717a0b0e-3bbc-41dc-9f0c-5689437a1151"
@@ -47,13 +49,6 @@ def get_provider() -> str:
4749
def get_icon() -> str:
4850
return "/icons/adapter-icons/OpenAI.png"
4951

50-
@staticmethod
51-
def get_json_schema() -> str:
52-
f = open(f"{os.path.dirname(__file__)}/static/json_schema.json")
53-
schema = f.read()
54-
f.close()
55-
return schema
56-
5752
def get_embedding_instance(self) -> BaseEmbedding:
5853
try:
5954
timeout = int(self.config.get(Constants.TIMEOUT, Constants.DEFAULT_TIMEOUT))
@@ -73,7 +68,4 @@ def get_embedding_instance(self) -> BaseEmbedding:
7368
except Exception as e:
7469
raise AdapterError(str(e))
7570

76-
def test_connection(self) -> bool:
77-
embedding = self.get_embedding_instance()
78-
test_result: bool = EmbeddingHelper.test_embedding_instance(embedding)
79-
return test_result
71+

0 commit comments

Comments
 (0)