Skip to content

Commit 27c218a

Browse files
committed
ML/LlamaIndex: Adjustments to make it work with non-Azure OpenAI
1 parent ca8a44f commit 27c218a

File tree

5 files changed

+83
-130
lines changed

5 files changed

+83
-130
lines changed

topic/machine-learning/llama-index/README.md

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,25 @@ This folder contains the codebase for [this tutorial](https://community.cratedb.
44

55
This has been tested using:
66

7-
* Python 3.12.2
8-
* macOS Sequoia 15.0.1
9-
* CrateDB 5.8.3 running in CrateDB Cloud on AWS Europe (Ireland)
7+
* Python 3.12
8+
* macOS
9+
* CrateDB 5.8 and higher
1010

1111
## Database Setup
1212

1313
You will need a CrateDB Cloud database: sign up [here](https://console.cratedb.cloud/) and use the free "CRFREE" tier.
1414

1515
Make a note of the hostname, username and password for your database. You'll need those when configuring the environment file later.
1616

17+
If you don't use CrateDB Cloud, you can also provide an instance for testing
18+
purposes like this:
19+
20+
```shell
21+
docker run --rm -it --name=cratedb \
22+
--publish=4200:4200 --publish=5432:5432 \
23+
--env=CRATE_HEAP_SIZE=2g crate:latest -Cdiscovery.type=single-node
24+
```
25+
1726
Create a table in CrateDB:
1827

1928
```sql
@@ -61,7 +70,7 @@ pip install -r requirements.txt
6170

6271
## Configure your Environment
6372

64-
To configure your environment, copy the provided [`env.example`](./env.example) file to a new file named `.env`, then open it with a text editor.
73+
To configure your environment, copy the provided [`env.azure`](./env.azure) or [`env.standalone`](./env.standalone) file to a new file named `.env`, then open it with a text editor.
6574

6675
Set the values in the file as follows:
6776

@@ -72,7 +81,7 @@ OPENAI_AZURE_ENDPOINT=https://<Your endpoint from Azure e.g. myendpoint.openai.a
7281
OPENAI_AZURE_API_VERSION=2024-08-01-preview
7382
LLM_INSTANCE=<The name of your Chat GPT 3.5 turbo instance from Azure>
7483
EMBEDDING_MODEL_INSTANCE=<The name of your Text Embedding Ada 2.0 instance from Azure>
75-
CRATEDB_URL="crate://<Database user name>:<Database password>@<Database host>:4200/?ssl=true"
84+
CRATEDB_SQLALCHEMY_URL="crate://<Database user name>:<Database password>@<Database host>:4200/?ssl=true"
7685
CRATEDB_TABLE_NAME=time_series_data
7786
```
7887

topic/machine-learning/llama-index/env.example renamed to topic/machine-learning/llama-index/env.azure

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@ OPENAI_AZURE_ENDPOINT=https://TODO.openai.azure.com
44
OPENAI_AZURE_API_VERSION=2024-08-01-preview
55
LLM_INSTANCE=TODO
66
EMBEDDING_MODEL_INSTANCE=TODO
7-
CRATEDB_URL="crate://USER:PASSWORD@HOST:4200/?ssl=true"
8-
CRATEDB_TABLE_NAME=time_series_data
7+
CRATEDB_SQLALCHEMY_URL="crate://USER:PASSWORD@HOST:4200/?ssl=true"
8+
CRATEDB_TABLE_NAME=time_series_data
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# OPENAI_API_KEY=sk-XJZ7pfog5Gp8Kus8D--invalid--0CJ5lyAKSefZLaV1Y9S1
2+
OPENAI_API_TYPE=openai
3+
CRATEDB_SQLALCHEMY_URL="crate://crate@localhost:4200/"
4+
CRATEDB_TABLE_NAME=time_series_data
Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,92 @@
1-
""" Example code using Azure Open AI and llama-index. """
1+
"""
2+
Use an LLM to query a database in human language.
3+
Example code using LlamaIndex with vanilla Open AI and Azure Open AI.
4+
"""
25

36
import os
47
import openai
58
import sqlalchemy as sa
69

710
from dotenv import load_dotenv
811
from langchain_openai import AzureOpenAIEmbeddings
12+
from langchain_openai import OpenAIEmbeddings
913
from llama_index.llms.azure_openai import AzureOpenAI
14+
from llama_index.llms.openai import OpenAI
1015
from llama_index.embeddings.langchain import LangchainEmbedding
1116
from llama_index.core.utilities.sql_wrapper import SQLDatabase
1217
from llama_index.core.query_engine import NLSQLTableQueryEngine
1318
from llama_index.core import Settings
1419

15-
if __name__ == "__main__":
16-
load_dotenv()
20+
21+
def configure_llm():
22+
"""
23+
Configure LLM. Use either vanilla Open AI, or Azure Open AI.
24+
"""
1725

1826
openai.api_type = os.getenv("OPENAI_API_TYPE")
1927
openai.azure_endpoint = os.getenv("OPENAI_AZURE_ENDPOINT")
2028
openai.api_version = os.getenv("OPENAI_AZURE_API_VERSION")
2129
openai.api_key = os.getenv("OPENAI_API_KEY")
2230

23-
llm = AzureOpenAI(
24-
engine=os.getenv("LLM_INSTANCE"),
25-
azure_endpoint=os.getenv("OPENAI_AZURE_ENDPOINT"),
26-
api_key = os.getenv("OPENAI_API_KEY"),
27-
api_version = os.getenv("OPENAI_AZURE_API_VERSION"),
28-
temperature=0.0
29-
)
31+
if openai.api_type == "openai":
32+
llm = OpenAI(
33+
api_key=os.getenv("OPENAI_API_KEY"),
34+
temperature=0.0
35+
)
36+
elif openai.api_type == "azure":
37+
llm = AzureOpenAI(
38+
engine=os.getenv("LLM_INSTANCE"),
39+
azure_endpoint=os.getenv("OPENAI_AZURE_ENDPOINT"),
40+
api_key = os.getenv("OPENAI_API_KEY"),
41+
api_version = os.getenv("OPENAI_AZURE_API_VERSION"),
42+
temperature=0.0
43+
)
44+
else:
45+
raise ValueError(f"Open AI API type not defined or invalid: {openai.api_type}")
3046

3147
Settings.llm = llm
32-
Settings.embed_model = LangchainEmbedding(
33-
AzureOpenAIEmbeddings(
34-
azure_endpoint=os.getenv("OPENAI_AZURE_ENDPOINT"),
35-
model=os.getenv("EMBEDDING_MODEL_INSTANCE")
48+
if openai.api_type == "openai":
49+
Settings.embed_model = LangchainEmbedding(OpenAIEmbeddings())
50+
elif openai.api_type == "azure":
51+
Settings.embed_model = LangchainEmbedding(
52+
AzureOpenAIEmbeddings(
53+
azure_endpoint=os.getenv("OPENAI_AZURE_ENDPOINT"),
54+
model=os.getenv("EMBEDDING_MODEL_INSTANCE")
55+
)
3656
)
37-
)
3857

39-
print("Creating SQLAlchemy engine...")
40-
engine_crate = sa.create_engine(os.getenv("CRATEDB_URL"))
41-
print("Connecting to CrateDB...")
58+
59+
def main():
60+
"""
61+
Use an LLM to query a database in human language.
62+
"""
63+
64+
# Configure application.
65+
load_dotenv()
66+
configure_llm()
67+
68+
# Configure database connection and query engine.
69+
print("Connecting to CrateDB")
70+
engine_crate = sa.create_engine(os.getenv("CRATEDB_SQLALCHEMY_URL"))
4271
engine_crate.connect()
43-
print("Creating SQLDatabase instance...")
72+
73+
print("Creating LlamaIndex QueryEngine")
4474
sql_database = SQLDatabase(engine_crate, include_tables=[os.getenv("CRATEDB_TABLE_NAME")])
45-
print("Creating QueryEngine...")
4675
query_engine = NLSQLTableQueryEngine(
4776
sql_database=sql_database,
4877
tables=[os.getenv("CRATEDB_TABLE_NAME")],
49-
llm = llm
78+
llm=Settings.llm
5079
)
5180

52-
print("Running query...")
53-
81+
# Invoke an inquiry.
82+
print("Running query")
5483
QUERY_STR = "What is the average value for sensor 1?"
5584
answer = query_engine.query(QUERY_STR)
5685
print(answer.get_formatted_sources())
5786
print("Query was:", QUERY_STR)
5887
print("Answer was:", answer)
5988
print(answer.metadata)
89+
90+
91+
if __name__ == "__main__":
92+
main()
Lines changed: 7 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1,100 +1,7 @@
1-
aiohappyeyeballs==2.4.3
2-
aiohttp==3.10.10
3-
aiosignal==1.3.1
4-
annotated-types==0.7.0
5-
anyio==4.6.2.post1
6-
attrs==24.2.0
7-
azure-core==1.31.0
8-
azure-identity==1.19.0
9-
beautifulsoup4==4.12.3
10-
certifi==2024.8.30
11-
cffi==1.17.1
12-
charset-normalizer==3.4.0
13-
click==8.1.7
14-
crate>=1.0.0.dev2
15-
cryptography==43.0.3
16-
dataclasses-json==0.6.7
17-
Deprecated==1.2.14
18-
dirtyjson==1.0.8
19-
distro==1.9.0
20-
frozenlist==1.4.1
21-
fsspec==2024.10.0
22-
geojson==3.1.0
23-
greenlet==3.1.1
24-
h11==0.14.0
25-
httpcore==1.0.6
26-
httpx==0.27.2
27-
idna==3.10
28-
jiter==0.6.1
29-
joblib==1.4.2
30-
jsonpatch==1.33
31-
jsonpointer==3.0.0
32-
langchain==0.3.4
33-
langchain-community==0.3.3
34-
langchain-core==0.3.12
35-
langchain-openai==0.2.3
36-
langchain-text-splitters==0.3.0
37-
langsmith==0.1.136
38-
llama-cloud==0.1.4
39-
llama-index==0.11.19
40-
llama-index-agent-openai==0.3.4
41-
llama-index-cli==0.3.1
42-
llama-index-core==0.11.19
43-
llama-index-embeddings-langchain==0.2.1
44-
llama-index-embeddings-openai==0.2.5
45-
llama-index-indices-managed-llama-cloud==0.4.0
46-
llama-index-legacy==0.9.48.post3
47-
llama-index-llms-azure-openai==0.2.2
48-
llama-index-llms-langchain==0.4.2
49-
llama-index-llms-openai==0.2.15
50-
llama-index-multi-modal-llms-openai==0.2.2
51-
llama-index-program-openai==0.2.0
52-
llama-index-question-gen-openai==0.2.0
53-
llama-index-readers-file==0.2.2
54-
llama-index-readers-llama-parse==0.3.0
55-
llama-parse==0.5.10
56-
marshmallow==3.23.0
57-
msal==1.31.0
58-
msal-extensions==1.2.0
59-
multidict==6.1.0
60-
mypy-extensions==1.0.0
61-
nest-asyncio==1.6.0
62-
networkx==3.4.2
63-
nltk==3.9.1
64-
numpy==1.26.4
65-
openai==1.52.0
66-
orjson==3.10.9
67-
packaging==24.1
68-
pandas==2.2.3
69-
pillow==11.0.0
70-
portalocker==2.10.1
71-
propcache==0.2.0
72-
pycparser==2.22
73-
pydantic==2.9.2
74-
pydantic-settings==2.6.0
75-
pydantic_core==2.23.4
76-
PyJWT==2.9.0
77-
pypdf==4.3.1
78-
python-dateutil==2.9.0.post0
79-
python-dotenv==1.0.1
80-
pytz==2024.2
81-
PyYAML==6.0.2
82-
regex==2024.9.11
83-
requests==2.32.3
84-
requests-toolbelt==1.0.0
85-
six==1.16.0
86-
sniffio==1.3.1
87-
soupsieve==2.6
88-
SQLAlchemy==2.0.36
89-
sqlalchemy-cratedb>=0.40.0
90-
striprtf==0.0.26
91-
tenacity==8.5.0
92-
tiktoken==0.8.0
93-
tqdm==4.66.5
94-
typing-inspect==0.9.0
95-
typing_extensions==4.12.2
96-
tzdata==2024.2
97-
urllib3==2.2.3
98-
verlib2==0.2.0
99-
wrapt==1.16.0
100-
yarl==1.16.0
1+
langchain-openai<0.3
2+
llama-index-embeddings-langchain<0.3
3+
llama-index-embeddings-openai<0.3
4+
llama-index-llms-azure-openai<0.3
5+
llama-index-llms-openai<0.3
6+
python-dotenv
7+
sqlalchemy-cratedb

0 commit comments

Comments
 (0)