Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
03aa9d6
fix ValidationError when ChromaDB returns documents with None page_co…
Abhinavexists May 27, 2025
8e77a05
Merge branch 'master' into feature/chromadb-validationerr
Abhinavexists May 27, 2025
c4283b8
fix linting error
Abhinavexists May 27, 2025
8045c1b
ruff linting fix
Abhinavexists May 27, 2025
6027882
type annotation fix
Abhinavexists May 27, 2025
3add620
removed unused import
Abhinavexists May 27, 2025
4998a5d
remove empty file
Abhinavexists Jun 2, 2025
3a58900
Discard changes to pyproject.toml
Abhinavexists Jun 2, 2025
73170bd
Discard changes to libs/partners/chroma/uv.lock
Abhinavexists Jun 2, 2025
b7e4310
Merge branch 'master' into feature/chromadb-validationerr
Abhinavexists Jun 2, 2025
f382970
Discard changes to uv.lock
Abhinavexists Jun 2, 2025
47ce04d
add missing test dependencies to fix CI failures
Abhinavexists Jun 2, 2025
72df9b7
Merge branch 'master' into feature/chromadb-validationerr
Abhinavexists Jun 2, 2025
97360f3
Discard changes to libs/partners/chroma/pyproject.toml
Abhinavexists Jun 2, 2025
74e969b
Delete chroma/chroma.sqlite3
Abhinavexists Jun 2, 2025
e7bf0ab
Merge branch 'master' into feature/chromadb-validationerr
eyurtsev Jun 5, 2025
c27a91d
Merge branch 'master' into feature/chromadb-validationerr
ccurme Jun 5, 2025
fcec695
lock
ccurme Jun 5, 2025
514f56b
Merge branch 'master' into feature/chromadb-validationerr
Abhinavexists Jun 15, 2025
74353ac
Merge branch 'master' into feature/chromadb-validationerr
Abhinavexists Jul 7, 2025
8b8c5be
Merge branch 'master' into feature/chromadb-validationerr
mdrxy Jul 16, 2025
b68adba
Merge branch 'master' into feature/chromadb-validationerr
mdrxy Jul 16, 2025
0bb71e8
Merge branch 'master' into feature/chromadb-validationerr
eyurtsev Sep 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added chroma/chroma.sqlite3
Binary file not shown.
19 changes: 18 additions & 1 deletion libs/partners/chroma/langchain_chroma/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,33 @@ def _results_to_docs_and_scores(results: Any) -> list[tuple[Document, float]]:
results["ids"][0],
results["distances"][0],
)
if result[0] is not None
]


def _results_to_docs_and_vectors(results: Any) -> list[tuple[Document, np.ndarray]]:
"""Convert ChromaDB results to documents and vectors, filtering out None content."""
return [
(Document(page_content=result[0], metadata=result[1] or {}), result[2])
(
Document(page_content=result[0], metadata=result[1] or {}, id=result[3]),
result[2],
)
for result in zip(
results["documents"][0],
results["metadatas"][0],
results["embeddings"][0],
results["ids"][0],
)
if result[0] is not None
]


def safe_results_to_docs_and_scores(results: Any) -> list[Document]:
"""Convert results to documents and scores, filtering out None page_content."""
return [
Document(page_content=result[0], metadata=result[1] or {}, id=result[2])
for result in results
if result[0] is not None
]


Expand Down Expand Up @@ -1064,6 +1080,7 @@ def get_by_ids(self, ids: Sequence[str], /) -> list[Document]:
for doc, meta, doc_id in zip(
results["documents"], results["metadatas"], results["ids"]
)
if doc is not None # Filter out documents with None page_content
]

def update_document(self, document_id: str, document: Document) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,10 @@ def test_chroma_with_metadatas_with_vectors() -> None:
vec_1 = embeddings.embed_query(texts[0])
output = docsearch.similarity_search_with_vectors("foo", k=1)
docsearch.delete_collection()
assert output[0][0] == Document(page_content="foo", metadata={"page": "0"})
doc = output[0][0]
assert doc.page_content == "foo"
assert doc.metadata == {"page": "0"}
assert doc.id is not None
assert (output[0][1] == vec_1).all()


Expand Down Expand Up @@ -805,3 +808,44 @@ def test_delete_where_clause(client: chromadb.ClientAPI) -> None:
assert vectorstore._collection.count() == 1
# Clean up
vectorstore.delete_collection()


def test_chroma_handles_none_page_content() -> None:
"""Test that Chroma gracefully handles None page_content values."""
from langchain_chroma.vectorstores import _results_to_docs_and_scores

mock_results = {
"documents": [["valid content", None, "another valid content"]],
"metadatas": [[{"key": "value1"}, {"key": "value2"}, {"key": "value3"}]],
"ids": [["id1", "id2", "id3"]],
"distances": [[0.1, 0.2, 0.3]],
}

docs_and_scores = _results_to_docs_and_scores(mock_results)

assert len(docs_and_scores) == 2
assert docs_and_scores[0][0].page_content == "valid content"
assert docs_and_scores[1][0].page_content == "another valid content"
assert docs_and_scores[0][0].id == "id1"
assert docs_and_scores[1][0].id == "id3"


def test_chroma_handles_none_page_content_with_vectors() -> None:
"""Test that Chroma gracefully handles None page_content values with vectors."""
from langchain_chroma.vectorstores import _results_to_docs_and_vectors

mock_results = {
"documents": [["valid content", None, "another valid content"]],
"metadatas": [[{"key": "value1"}, {"key": "value2"}, {"key": "value3"}]],
"ids": [["id1", "id2", "id3"]],
"embeddings": [[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]],
}
docs_and_vectors = _results_to_docs_and_vectors(mock_results)

assert len(docs_and_vectors) == 2
assert docs_and_vectors[0][0].page_content == "valid content"
assert docs_and_vectors[1][0].page_content == "another valid content"
assert docs_and_vectors[0][0].id == "id1"
assert docs_and_vectors[1][0].id == "id3"
assert docs_and_vectors[0][1] == [0.1, 0.2]
assert docs_and_vectors[1][1] == [0.5, 0.6]
3 changes: 2 additions & 1 deletion libs/partners/chroma/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
authors = []
license = { text = "MIT" }
requires-python = ">=3.9"
dependencies = []
dependencies = [
"pyproject-toml>=0.1.0",
]
name = "langchain-monorepo"
version = "0.0.1"
description = "LangChain mono-repo"
Expand Down Expand Up @@ -93,4 +95,4 @@ pydocstyle = { convention = "google" }
"F841", # allow assignments to variables that are never read -- it's example code

]
"!libs/langchain/langchain/model_laboratory.py" = ["D"]
"!libs/langchain/langchain/model_laboratory.py" = ["D"]
Loading
Loading