Skip to content

Commit cb99f35

Browse files
committed
LLM: Update to langchain-cratedb 0.0.0
After quite a bit of back and forth, and a slow genesis in general, this subsystem is finally approaching departures to take off. On the chrome/surface/interface, this update doesn't change much.
1 parent 706a09d commit cb99f35

12 files changed

+42
-67
lines changed

topic/machine-learning/llm-langchain/conversational_memory.ipynb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,14 @@
5959
"execution_count": 2,
6060
"outputs": [],
6161
"source": [
62-
"from langchain_community.chat_message_histories import CrateDBChatMessageHistory\n",
62+
"from langchain_cratedb.chat_message_histories import CrateDBChatMessageHistory\n",
6363
"\n",
6464
"# Connect to a self-managed CrateDB instance.\n",
6565
"CONNECTION_STRING = \"crate://crate@localhost/?schema=notebook\"\n",
6666
"\n",
6767
"chat_message_history = CrateDBChatMessageHistory(\n",
6868
"\tsession_id=\"test_session\",\n",
69-
"\tconnection_string=CONNECTION_STRING\n",
69+
"\tconnection=CONNECTION_STRING\n",
7070
")\n",
7171
"\n",
7272
"# Make sure to start with a blank canvas.\n",
@@ -216,7 +216,7 @@
216216
"\n",
217217
"\tchat_message_history = CrateDBChatMessageHistory(\n",
218218
"\t\tsession_id=\"test_session\",\n",
219-
"\t\tconnection_string=CONNECTION_STRING,\n",
219+
"\t\tconnection=CONNECTION_STRING,\n",
220220
"\t\tcustom_message_converter=CustomMessageConverter(\n",
221221
"\t\t\tauthor_email=\"test@example.com\"\n",
222222
"\t\t)\n",
@@ -286,7 +286,7 @@
286286
"import json\n",
287287
"import typing as t\n",
288288
"\n",
289-
"from langchain_community.chat_message_histories.cratedb import CrateDBMessageConverter\n",
289+
"from langchain_cratedb.chat_message_histories import CrateDBMessageConverter\n",
290290
"from langchain.schema import _message_to_dict\n",
291291
"\n",
292292
"\n",
@@ -314,7 +314,7 @@
314314
"\n",
315315
"\tchat_message_history = CrateDBChatMessageHistory(\n",
316316
"\t\tsession_id=\"test_session\",\n",
317-
"\t\tconnection_string=CONNECTION_STRING,\n",
317+
"\t\tconnection=CONNECTION_STRING,\n",
318318
"\t\tcustom_message_converter=CustomMessageConverterWithDifferentSessionIdColumn(),\n",
319319
"\t\tsession_id_field_name=\"custom_session_id\",\n",
320320
"\t)\n",

topic/machine-learning/llm-langchain/conversational_memory.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import os
1717
from pprint import pprint
1818

19-
from langchain_community.chat_message_histories import CrateDBChatMessageHistory
19+
from langchain_cratedb.chat_message_histories import CrateDBChatMessageHistory
2020

2121

2222
CONNECTION_STRING = os.environ.get(
@@ -29,7 +29,7 @@ def main():
2929

3030
chat_message_history = CrateDBChatMessageHistory(
3131
session_id="test_session",
32-
connection_string=CONNECTION_STRING,
32+
connection=CONNECTION_STRING,
3333
)
3434
chat_message_history.add_user_message("Hello")
3535
chat_message_history.add_ai_message("Hi")

topic/machine-learning/llm-langchain/cratedb-vectorstore-rag-openai-sql.ipynb

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,11 @@
7575
"metadata": {},
7676
"outputs": [],
7777
"source": [
78-
"import openai\n",
7978
"import pandas as pd\n",
8079
"import sqlalchemy as sa\n",
8180
"\n",
8281
"from langchain_community.document_loaders import PyPDFLoader\n",
83-
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
84-
"from langchain_openai import OpenAIEmbeddings"
82+
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
8583
]
8684
},
8785
{
@@ -162,7 +160,7 @@
162160
"# environment variables.\n",
163161
"import os\n",
164162
"\n",
165-
"CONNECTION_STRING = CrateDBVectorSearch.connection_string_from_db_params(\n",
163+
"CONNECTION_STRING = CrateDBVectorStore.connection_string_from_db_params(\n",
166164
" driver=os.environ.get(\"CRATEDB_DRIVER\", \"crate\"),\n",
167165
" host=os.environ.get(\"CRATEDB_HOST\", \"localhost\"),\n",
168166
" port=int(os.environ.get(\"CRATEDB_PORT\", \"4200\")),\n",

topic/machine-learning/llm-langchain/cratedb_rag_customer_support.ipynb

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,20 +65,15 @@
6565
},
6666
"outputs": [],
6767
"source": [
68-
"from langchain.chains import RetrievalQA, ConversationalRetrievalChain\n",
69-
"from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n",
70-
"import pandas as pd\n",
71-
"import sqlalchemy as sa\n",
72-
"from sqlalchemy import create_engine\n",
73-
"from sqlalchemy import text\n",
74-
"import crate\n",
7568
"import openai\n",
76-
"import os\n",
7769
"import requests\n",
78-
"from pueblo.util.environ import getenvpass\n",
70+
"import pandas as pd\n",
71+
"from langchain.chains import RetrievalQA, ConversationalRetrievalChain\n",
7972
"from langchain_community.document_loaders import CSVLoader\n",
80-
"from langchain_community.vectorstores import Chroma\n",
81-
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
73+
"from langchain_openai import OpenAIEmbeddings\n",
74+
"from pueblo.util.environ import getenvpass\n",
75+
"from sqlalchemy import create_engine\n",
76+
"from sqlalchemy import text"
8277
]
8378
},
8479
{

topic/machine-learning/llm-langchain/cratedb_rag_customer_support_langchain.ipynb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@
107107
"from pueblo.util.environ import getenvpass\n",
108108
"from langchain_openai import OpenAIEmbeddings\n",
109109
"from langchain_community.document_loaders import CSVLoader\n",
110-
"from langchain_community.vectorstores import CrateDBVectorSearch\n",
110+
"from langchain_cratedb.vectorstores import CrateDBVectorStore\n",
111111
"\n",
112112
"warnings.filterwarnings('ignore')"
113113
]
@@ -301,11 +301,11 @@
301301
"source": [
302302
"embeddings = OpenAIEmbeddings()\n",
303303
"\n",
304-
"store = CrateDBVectorSearch.from_documents(\n",
304+
"store = CrateDBVectorStore.from_documents(\n",
305305
" embedding=embeddings,\n",
306306
" documents=data,\n",
307307
" collection_name=COLLECTION_NAME,\n",
308-
" connection_string=CONNECTION_STRING,\n",
308+
" connection=CONNECTION_STRING,\n",
309309
")"
310310
]
311311
},
@@ -519,11 +519,11 @@
519519
"\n",
520520
"COLLECTION_NAME = \"customer_data_jina\"\n",
521521
"\n",
522-
"store = CrateDBVectorSearch.from_documents(\n",
522+
"store = CrateDBVectorStore.from_documents(\n",
523523
" embedding=embeddings,\n",
524524
" documents=data,\n",
525525
" collection_name=COLLECTION_NAME,\n",
526-
" connection_string=CONNECTION_STRING,\n",
526+
" connection=CONNECTION_STRING,\n",
527527
")\n",
528528
"documents = return_documents(store, my_question)"
529529
]

topic/machine-learning/llm-langchain/cratedb_rag_customer_support_vertexai.ipynb

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -97,28 +97,20 @@
9797
"outputs": [],
9898
"source": [
9999
"import os\n",
100+
"import re\n",
100101
"\n",
101-
"import openai\n",
102102
"import pandas as pd\n",
103-
"import warnings\n",
104103
"import requests\n",
105-
"import re\n",
106-
"from typing import Dict, List, Optional, Tuple, Union\n",
107-
"\n",
104+
"import warnings\n",
108105
"\n",
109-
"from pueblo.util.environ import getenvpass\n",
110106
"from google.cloud import aiplatform\n",
111107
"from vertexai.generative_models import (\n",
112108
" GenerationConfig,\n",
113-
" GenerationResponse,\n",
114109
" GenerativeModel,\n",
115-
" HarmBlockThreshold,\n",
116-
" HarmCategory,\n",
117110
")\n",
118111
"from langchain_community.document_loaders import CSVLoader\n",
119112
"from langchain_community.embeddings import VertexAIEmbeddings\n",
120-
"from langchain_community.llms import VertexAI\n",
121-
"from langchain_community.vectorstores import CrateDBVectorSearch\n",
113+
"from langchain_cratedb.vectorstores import CrateDBVectorStore\n",
122114
"\n",
123115
"warnings.filterwarnings('ignore')"
124116
]
@@ -347,11 +339,11 @@
347339
"source": [
348340
"embeddings = VertexAIEmbeddings(model_name=\"textembedding-gecko@001\")\n",
349341
"\n",
350-
"store = CrateDBVectorSearch.from_documents(\n",
342+
"store = CrateDBVectorStore.from_documents(\n",
351343
" embedding=embeddings,\n",
352344
" documents=data,\n",
353345
" collection_name=COLLECTION_NAME,\n",
354-
" connection_string=CONNECTION_STRING,\n",
346+
" connection=CONNECTION_STRING,\n",
355347
")"
356348
]
357349
},

topic/machine-learning/llm-langchain/document_loader.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@
107107
"outputs": [],
108108
"source": [
109109
"import sqlalchemy as sa\n",
110-
"from langchain_community.document_loaders import CrateDBLoader\n",
111110
"from langchain_community.utilities.sql_database import SQLDatabase\n",
111+
"from langchain_cratedb.document_loaders import CrateDBLoader\n",
112112
"from pprint import pprint\n",
113113
"\n",
114114
"db = SQLDatabase(engine=sa.create_engine(CONNECTION_STRING))\n",

topic/machine-learning/llm-langchain/document_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@
2929
import requests
3030
import sqlalchemy as sa
3131
from cratedb_toolkit.util import DatabaseAdapter
32-
from langchain_community.document_loaders import CrateDBLoader
3332
from langchain_community.utilities.sql_database import SQLDatabase
33+
from langchain_cratedb.document_loaders import CrateDBLoader
3434
from pprint import pprint
3535

3636

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# Real.
1+
# Production.
22
cratedb-toolkit[io]
33
pueblo[notebook,testing]
44

5-
# Development.
5+
# Staging.
66
# cratedb-toolkit[io] @ git+https://github.com/crate-workbench/cratedb-toolkit.git@main
77
# pueblo[notebook,testing] @ git+https://github.com/pyveci/pueblo.git@main
88

9-
# Workstation.
9+
# Development.
1010
#--editable=/Users/amo/dev/crate/ecosystem/cratedb-retentions[io]
1111
#--editable=/Users/amo/dev/pyveci/sources/pueblo[testing]
Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,12 @@
1-
# Real.
21
crash
3-
crate>=1.0.0.dev2
42
google-cloud-aiplatform<2
3+
langchain-cratedb>=0.0.0.dev0
54
langchain-google-vertexai<3
65
langchain-openai<0.3
76
langchain-text-splitters<0.4
87
pueblo[cli,nlp]>=0.0.10
9-
pydantic>=2,<3
108
pypdf<6
119
python-dotenv<2
1210
requests<3
1311
requests-cache<2
14-
sqlalchemy==2.*
15-
sqlalchemy-cratedb>=0.40.0
1612
unstructured<0.17
17-
18-
# Development.
19-
# cratedb-toolkit @ git+https://github.com/crate-workbench/cratedb-toolkit.git@main
20-
langchain @ git+https://github.com/crate-workbench/langchain.git@cratedb#subdirectory=libs/langchain
21-
langchain-community @ git+https://github.com/crate-workbench/langchain.git@cratedb#subdirectory=libs/community
22-
# pueblo[cli,fileio,nlp] @ git+https://github.com/pyveci/pueblo.git@main

topic/machine-learning/llm-langchain/vector_search.ipynb

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@
142142
"# environment variables.\n",
143143
"import os\n",
144144
"\n",
145-
"CONNECTION_STRING = CrateDBVectorSearch.connection_string_from_db_params(\n",
145+
"CONNECTION_STRING = CrateDBVectorStore.connection_string_from_db_params(\n",
146146
" driver=os.environ.get(\"CRATEDB_DRIVER\", \"crate\"),\n",
147147
" host=os.environ.get(\"CRATEDB_HOST\", \"localhost\"),\n",
148148
" port=int(os.environ.get(\"CRATEDB_PORT\", \"4200\")),\n",
@@ -166,8 +166,8 @@
166166
"metadata": {},
167167
"outputs": [],
168168
"source": [
169-
"from langchain_community.vectorstores import CrateDBVectorSearch\n",
170169
"from langchain_core.documents import Document\n",
170+
"from langchain_cratedb.vectorstores import CrateDBVectorStore\n",
171171
"from langchain_openai import OpenAIEmbeddings"
172172
]
173173
},
@@ -223,11 +223,11 @@
223223
"source": [
224224
"embeddings = OpenAIEmbeddings()\n",
225225
"\n",
226-
"store = CrateDBVectorSearch.from_documents(\n",
226+
"store = CrateDBVectorStore.from_documents(\n",
227227
" embedding=embeddings,\n",
228228
" documents=docs,\n",
229229
" collection_name=COLLECTION_NAME,\n",
230-
" connection_string=CONNECTION_STRING,\n",
230+
" connection=CONNECTION_STRING,\n",
231231
")"
232232
]
233233
},
@@ -334,10 +334,10 @@
334334
},
335335
"outputs": [],
336336
"source": [
337-
"store = CrateDBVectorSearch(\n",
337+
"store = CrateDBVectorStore(\n",
338338
" collection_name=COLLECTION_NAME,\n",
339-
" connection_string=CONNECTION_STRING,\n",
340-
" embedding_function=embeddings,\n",
339+
" connection=CONNECTION_STRING,\n",
340+
" embeddings=embeddings,\n",
341341
")"
342342
]
343343
},
@@ -426,11 +426,11 @@
426426
},
427427
"outputs": [],
428428
"source": [
429-
"store = CrateDBVectorSearch.from_documents(\n",
429+
"store = CrateDBVectorStore.from_documents(\n",
430430
" documents=docs,\n",
431431
" embedding=embeddings,\n",
432432
" collection_name=COLLECTION_NAME,\n",
433-
" connection_string=CONNECTION_STRING,\n",
433+
" connection=CONNECTION_STRING,\n",
434434
" pre_delete_collection=True,\n",
435435
")"
436436
]

topic/machine-learning/llm-langchain/vector_search.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
python vector_search.py
2121
""" # noqa: E501
2222

23-
from langchain_community.vectorstores import CrateDBVectorSearch
23+
from langchain_cratedb.vectorstores import CrateDBVectorStore
2424
from langchain_openai import OpenAIEmbeddings
2525

2626
import nltk
@@ -37,7 +37,7 @@ def main():
3737
documents = CachedWebResource(url).langchain_documents(chunk_size=1000, chunk_overlap=0)
3838

3939
# Embed each chunk, and load them into the vector store.
40-
db = CrateDBVectorSearch.from_documents(documents, OpenAIEmbeddings())
40+
db = CrateDBVectorStore.from_documents(documents, OpenAIEmbeddings(), connection="crate://")
4141

4242
# Invoke a query, and display the first result.
4343
query = "What did the president say about Ketanji Brown Jackson"

0 commit comments

Comments
 (0)