Skip to content
This repository was archived by the owner on Dec 11, 2024. It is now read-only.

Commit 9262c45

Browse files
committed
Convert json to text before sending to index
Signed-off-by: Alex Co <alex.tuan@mindvalley.com>
1 parent 9837bc7 commit 9262c45

File tree

3 files changed

+33
-11
lines changed

3 files changed

+33
-11
lines changed

backend/danswer/connectors/airtable/connector.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33

44
from danswer.configs.app_configs import INDEX_BATCH_SIZE
55
from danswer.configs.constants import DocumentSource
6-
from danswer.connectors.interfaces import GenerateDocumentsOutput
7-
from danswer.connectors.interfaces import LoadConnector
8-
from danswer.connectors.interfaces import PollConnector
9-
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
10-
from danswer.connectors.models import Document
11-
from danswer.connectors.models import Section
6+
from danswer.connectors.interfaces import (
7+
GenerateDocumentsOutput,
8+
LoadConnector,
9+
PollConnector,
10+
SecondsSinceUnixEpoch,
11+
)
12+
from danswer.connectors.models import Document, Section
1213
from pyairtable import Api as AirtableApi
1314

1415

@@ -34,14 +35,34 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None
3435

3536
return None
3637

38+
def json_to_text(self, obj: Any, indent: int = 0) -> str:
39+
"""
40+
Recursively converts JSON object to plain text.
41+
"""
42+
text = ""
43+
if isinstance(obj, dict):
44+
for key, value in obj.items():
45+
text += " " * indent + str(key) + ":\n"
46+
text += self.json_to_text(value, indent + 1)
47+
elif isinstance(obj, list):
48+
for item in obj:
49+
text += self.json_to_text(item, indent)
50+
else:
51+
text += " " * indent + str(obj) + "\n"
52+
53+
return text
54+
3755
def poll_source(
3856
self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
3957
) -> GenerateDocumentsOutput:
4058
if not self.airtable_client:
4159
raise AirtableClientNotSetUpError()
4260

4361
table = self.airtable_client.table(self.base_id, self.table_name_or_id)
44-
all_records = table.all()
62+
63+
table_name = table.schema().name
64+
base_name = self.airtable_client.base(self.base_id, validate=True).name
65+
all_records = table.all(cell_format="string", time_zone="UTC", user_locale="en")
4566

4667
record_documents = []
4768
for record in all_records:
@@ -50,14 +71,16 @@ def poll_source(
5071
sections=[
5172
Section(
5273
link=f"https://airtable.com/{self.base_id}/{self.table_name_or_id}/",
53-
text=json.dumps(record.get("fields")),
74+
text=self.json_to_text(record.get("fields")),
5475
)
5576
],
5677
source=DocumentSource.AIRTABLE,
5778
semantic_identifier=f"Airtable Base ID: {self.base_id}. Table Name or ID: {self.table_name_or_id}",
5879
metadata={
5980
"type": "airtable",
6081
"created_time": record.get("createdTime"),
82+
"table_name": table_name,
83+
"base_name": base_name,
6184
},
6285
)
6386
record_documents.append(record_document)

backend/requirements/default.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ msal==1.28.0
3535
nltk==3.8.1
3636
Office365-REST-Python-Client==2.5.9
3737
oauthlib==3.2.2
38-
openai==1.41.1
3938
openpyxl==3.1.2
4039
playwright==1.41.2
4140
psutil==5.9.5

backend/requirements/model_server.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@ einops==0.8.0
33
fastapi==0.109.2
44
google-cloud-aiplatform==1.58.0
55
numpy==1.26.4
6-
openai==1.41.1
6+
openai==1.53.0
77
pydantic==2.8.2
88
retry==0.9.2
99
safetensors==0.4.2
1010
sentence-transformers==2.6.1
11-
torch==2.2.0
1211
transformers==4.39.2
1312
uvicorn==0.21.1
1413
voyageai==0.2.3
14+
torch==2.5.1

0 commit comments

Comments
 (0)