3
3
4
4
from danswer .configs .app_configs import INDEX_BATCH_SIZE
5
5
from danswer .configs .constants import DocumentSource
6
- from danswer .connectors .interfaces import GenerateDocumentsOutput
7
- from danswer .connectors .interfaces import LoadConnector
8
- from danswer .connectors .interfaces import PollConnector
9
- from danswer .connectors .interfaces import SecondsSinceUnixEpoch
10
- from danswer .connectors .models import Document
11
- from danswer .connectors .models import Section
6
+ from danswer .connectors .interfaces import (
7
+ GenerateDocumentsOutput ,
8
+ LoadConnector ,
9
+ PollConnector ,
10
+ SecondsSinceUnixEpoch ,
11
+ )
12
+ from danswer .connectors .models import Document , Section
12
13
from pyairtable import Api as AirtableApi
13
14
14
15
@@ -34,14 +35,34 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None
34
35
35
36
return None
36
37
38
+ def json_to_text (self , obj : Any , indent : int = 0 ) -> str :
39
+ """
40
+ Recursively converts JSON object to plain text.
41
+ """
42
+ text = ""
43
+ if isinstance (obj , dict ):
44
+ for key , value in obj .items ():
45
+ text += " " * indent + str (key ) + ":\n "
46
+ text += self .json_to_text (value , indent + 1 )
47
+ elif isinstance (obj , list ):
48
+ for item in obj :
49
+ text += self .json_to_text (item , indent )
50
+ else :
51
+ text += " " * indent + str (obj ) + "\n "
52
+
53
+ return text
54
+
37
55
def poll_source (
38
56
self , start : SecondsSinceUnixEpoch | None , end : SecondsSinceUnixEpoch | None
39
57
) -> GenerateDocumentsOutput :
40
58
if not self .airtable_client :
41
59
raise AirtableClientNotSetUpError ()
42
60
43
61
table = self .airtable_client .table (self .base_id , self .table_name_or_id )
44
- all_records = table .all ()
62
+
63
+ table_name = table .schema ().name
64
+ base_name = self .airtable_client .base (self .base_id , validate = True ).name
65
+ all_records = table .all (cell_format = "string" , time_zone = "UTC" , user_locale = "en" )
45
66
46
67
record_documents = []
47
68
for record in all_records :
@@ -50,14 +71,16 @@ def poll_source(
50
71
sections = [
51
72
Section (
52
73
link = f"https://airtable.com/{ self .base_id } /{ self .table_name_or_id } /" ,
53
- text = json . dumps (record .get ("fields" )),
74
+ text = self . json_to_text (record .get ("fields" )),
54
75
)
55
76
],
56
77
source = DocumentSource .AIRTABLE ,
57
78
semantic_identifier = f"Airtable Base ID: { self .base_id } . Table Name or ID: { self .table_name_or_id } " ,
58
79
metadata = {
59
80
"type" : "airtable" ,
60
81
"created_time" : record .get ("createdTime" ),
82
+ "table_name" : table_name ,
83
+ "base_name" : base_name ,
61
84
},
62
85
)
63
86
record_documents .append (record_document )
0 commit comments