Skip to content

Commit 879892b

Browse files
committed
Comment the code
1 parent 292b3e6 commit 879892b

File tree

1 file changed

+63
-13
lines changed

1 file changed

+63
-13
lines changed

text_2_sql/data_dictionary/data_dictionary_creator.py

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -120,13 +120,29 @@ def extract_columns_sql_query(self, entity: EntityItem) -> str:
120120
def extract_distinct_values_sql_query(
121121
self, entity: EntityItem, column: ColumnItem
122122
) -> str:
123-
"""A method to extract distinct values from a column in a database."""
123+
"""A method to extract distinct values from a column in a database. Can be sub-classed if needed.
124+
125+
Args:
126+
entity (EntityItem): The entity to extract distinct values from.
127+
column (ColumnItem): The column to extract distinct values from.
128+
129+
Returns:
130+
str: The SQL query to extract distinct values from a column.
131+
"""
124132
return f"""SELECT DISTINCT {column.name} FROM {entity.entity} ORDER BY {column.name} DESC;"""
125133

126134
async def query_entities(
127135
self, sql_query: str, cast_to: any = None
128136
) -> list[EntityItem]:
129-
"""A method to query a database for entities. Can be sub-classed if needed."""
137+
"""A method to query a database for entities. Can be sub-classed if needed.
138+
139+
Args:
140+
sql_query (str): The SQL query to run.
141+
cast_to (any, optional): The class to cast the results to. Defaults to None.
142+
143+
Returns:
144+
list[EntityItem]: The list of entities.
145+
"""
130146
connection_string = os.environ["Text2Sql__DatabaseConnectionString"]
131147

132148
logging.info(f"Running query: {sql_query}")
@@ -147,8 +163,11 @@ async def query_entities(
147163

148164
return results
149165

150-
async def extract_entities_with_descriptions(self):
151-
"""A method to extract entities with descriptions from a database."""
166+
async def extract_entities_with_descriptions(self) -> list[EntityItem]:
167+
"""A method to extract entities with descriptions from a database.
168+
169+
Returns:
170+
list[EntityItem]: The list of entities."""
152171
table_entities = await self.query_entities(
153172
self.extract_table_entities_sql_query, cast_to=EntityItem
154173
)
@@ -177,7 +196,12 @@ async def extract_entities_with_descriptions(self):
177196
async def extract_column_distinct_values(
178197
self, entity: EntityItem, column: ColumnItem
179198
):
180-
"""A method to extract distinct values from a column in a database."""
199+
"""A method to extract distinct values from a column in a database.
200+
201+
Args:
202+
entity (EntityItem): The entity to extract distinct values from.
203+
column (ColumnItem): The column to extract distinct values from.
204+
"""
181205

182206
try:
183207
distinct_values = await self.query_entities(
@@ -187,6 +211,7 @@ async def extract_column_distinct_values(
187211
column.distinct_values = []
188212
for value in distinct_values:
189213
if value[column.name] is not None:
214+
# Remove any whitespace characters
190215
if isinstance(value[column.name], str):
191216
column.distinct_values.append(
192217
re.sub(r"[\t\n\r\f\v]+", "", value[column.name])
@@ -197,15 +222,18 @@ async def extract_column_distinct_values(
197222
logging.error(f"Error extracting values for {column.name}")
198223
logging.error(e)
199224

225+
# Handle large set of distinct values
200226
if column.distinct_values is not None and len(column.distinct_values) > 5:
201227
column.sample_values = random.sample(column.distinct_values, 5)
202228
elif column.distinct_values is not None:
203229
column.sample_values = column.distinct_values
204230

205231
async def generate_column_description(self, entity: EntityItem, column: ColumnItem):
206-
"""A method to generate a description for a column in a database."""
232+
"""A method to generate a description for a column in a database.
207233
208-
# TODO: Avoid sending all values if cardinality it too high
234+
Args:
235+
entity (EntityItem): The entity the column belongs to.
236+
column (ColumnItem): The column to generate a description for."""
209237

210238
column_description_system_prompt = """You are an expert in SQL Entity analysis. You must generate a brief description for this SQL Column. This description will be used to generate a SQL query with the correct values. Make sure to include a description of the data contained in this column.
211239
@@ -245,7 +273,13 @@ async def generate_column_description(self, entity: EntityItem, column: ColumnIt
245273
async def extract_columns_with_definitions(
246274
self, entity: EntityItem
247275
) -> list[ColumnItem]:
248-
"""A method to extract column information from a database."""
276+
"""A method to extract column information from a database.
277+
278+
Args:
279+
entity (EntityItem): The entity to extract columns from.
280+
281+
Returns:
282+
list[ColumnItem]: The list of columns."""
249283

250284
columns = await self.query_entities(
251285
self.extract_columns_sql_query(entity), cast_to=ColumnItem
@@ -270,8 +304,15 @@ async def extract_columns_with_definitions(
270304

271305
return columns
272306

273-
async def send_request_to_llm(self, system_prompt, input):
274-
"""A method to use GPT to generate a description for an entity."""
307+
async def send_request_to_llm(self, system_prompt: str, input: str):
308+
"""A method to use GPT to generate a description for an entity.
309+
310+
Args:
311+
system_prompt (str): The system prompt to use.
312+
input (str): The input to use.
313+
314+
Returns:
315+
str: The generated description."""
275316

276317
MAX_TOKENS = 2000
277318

@@ -324,7 +365,10 @@ async def send_request_to_llm(self, system_prompt, input):
324365
return response.choices[0].message.content
325366

326367
async def generate_entity_description(self, entity: EntityItem):
327-
"""A method to generate a description for an entity."""
368+
"""A method to generate a description for an entity.
369+
370+
Args:
371+
entity (EntityItem): The entity to generate a description for."""
328372
name_system_prompt = """You are an expert in SQL Entity analysis. You must generate a human readable name for this SQL Entity. This name will be used to select the most appropriate SQL entity to answer a given question. E.g. 'Sales Data', 'Customer Information', 'Product Catalog'."""
329373

330374
name_input = f"""Provide a human readable name for the {
@@ -358,8 +402,14 @@ async def generate_entity_description(self, entity: EntityItem):
358402
logging.info(f"Description for {entity.entity}: {description}")
359403
entity.description = description
360404

361-
async def build_entity_entry(self, entity: EntityItem):
362-
"""A method to build an entity entry."""
405+
async def build_entity_entry(self, entity: EntityItem) -> EntityItem:
406+
"""A method to build an entity entry.
407+
408+
Args:
409+
entity (EntityItem): The entity to build an entry for.
410+
411+
Returns:
412+
EntityItem: The entity entry."""
363413

364414
logging.info(f"Building entity entry for {entity.entity}")
365415

0 commit comments

Comments
 (0)