From 02815003f3ed6fc8511c6f5a765dee454627feb5 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Fri, 15 Nov 2024 16:07:34 +0000 Subject: [PATCH 1/5] Start adding Databricks support --- deploy_ai_search/.env | 2 +- deploy_ai_search/text_2_sql_schema_store.py | 50 +++++++++++++++++++ text_2_sql/data_dictionary/.env | 4 +- .../data_dictionary_creator.py | 47 ++++++++++++++++- .../snowflake_data_dictionary_creator.py | 10 ++-- .../sql_sever_data_dictionary_creator.py | 4 +- 6 files changed, 107 insertions(+), 10 deletions(-) diff --git a/deploy_ai_search/.env b/deploy_ai_search/.env index 2b858b2..e738621 100644 --- a/deploy_ai_search/.env +++ b/deploy_ai_search/.env @@ -19,4 +19,4 @@ OpenAI__Endpoint= OpenAI__EmbeddingModel= OpenAI__EmbeddingDeployment= OpenAI__EmbeddingDimensions=1536 -Text2Sql__DatabaseName= +Text2Sql__DatabaseEngine= diff --git a/deploy_ai_search/text_2_sql_schema_store.py b/deploy_ai_search/text_2_sql_schema_store.py index 59d981e..5f8c24d 100644 --- a/deploy_ai_search/text_2_sql_schema_store.py +++ b/deploy_ai_search/text_2_sql_schema_store.py @@ -24,6 +24,16 @@ from environment import ( IndexerType, ) +import os +from enum import StrEnum + + +class DatabaseEngine(StrEnum): + """An enumeration to represent a database engine.""" + + SNOWFLAKE = "SNOWFLAKE" + SQL_SERVER = "SQL_SERVER" + DATABRICKS = "DATABRICKS" class Text2SqlSchemaStoreAISearch(AISearch): @@ -42,6 +52,9 @@ def __init__( rebuild (bool, optional): Whether to rebuild the index. Defaults to False. """ self.indexer_type = IndexerType.TEXT_2_SQL_SCHEMA_STORE + self.database_engine = DatabaseEngine[ + os.environ["Text2Sql__DatabaseEngine"].upper() + ] super().__init__(suffix, rebuild) if single_data_dictionary: @@ -49,6 +62,24 @@ def __init__( else: self.parsing_mode = BlobIndexerParsingMode.JSON + @property + def excluded_fields_for_database_engine(self): + """A method to get the excluded fields for the database engine.""" + + all_engine_specific_fields = ["Warehouse", "Database", "Catalog"] + if self.database_engine == DatabaseEngine.SNOWFLAKE: + engine_specific_fields = ["Warehouse", "Database"] + elif self.database_engine == DatabaseEngine.SQL_SERVER: + engine_specific_fields = ["Database"] + elif self.database_engine == DatabaseEngine.DATABRICKS: + engine_specific_fields = ["Catalog"] + + return [ + field + for field in all_engine_specific_fields + if field not in engine_specific_fields + ] + def get_index_fields(self) -> list[SearchableField]: """This function returns the index fields for sql index. @@ -78,6 +109,10 @@ def get_index_fields(self) -> list[SearchableField]: name="Warehouse", type=SearchFieldDataType.String, ), + SearchableField( + name="Catalog", + type=SearchFieldDataType.String, + ), SearchableField( name="Definition", type=SearchFieldDataType.String, @@ -161,6 +196,13 @@ def get_index_fields(self) -> list[SearchableField]: ), ] + # Remove fields that are not supported by the database engine + fields = [ + field + for field in fields + if field.name not in self.excluded_fields_for_database_engine + ] + return fields def get_semantic_search(self) -> SemanticSearch: @@ -309,4 +351,12 @@ def get_indexer(self) -> SearchIndexer: parameters=indexer_parameters, ) + # Remove fields that are not supported by the database engine + indexer.output_field_mappings = [ + field_mapping + for field_mapping in indexer.output_field_mappings + if field_mapping.target_field_name + not in self.excluded_fields_for_database_engine + ] + return indexer diff --git a/text_2_sql/data_dictionary/.env b/text_2_sql/data_dictionary/.env index e5cca6f..f710186 100644 --- a/text_2_sql/data_dictionary/.env +++ b/text_2_sql/data_dictionary/.env @@ -3,12 +3,12 @@ OpenAI__EmbeddingModel= OpenAI__Endpoint= OpenAI__ApiKey= OpenAI__ApiVersion= -Text2Sql__DatabaseEngine= Text2Sql__DatabaseName= Text2Sql__DatabaseConnectionString= Text2Sql__Snowflake__User= Text2Sql__Snowflake__Password= Text2Sql__Snowflake__Account= Text2Sql__Snowflake__Warehouse= +Text2Sql__Databricks__Catalog= IdentityType= # system_assigned or user_assigned or key -ClientId= +ClientId= diff --git a/text_2_sql/data_dictionary/data_dictionary_creator.py b/text_2_sql/data_dictionary/data_dictionary_creator.py index 24212a7..1ad11f8 100644 --- a/text_2_sql/data_dictionary/data_dictionary_creator.py +++ b/text_2_sql/data_dictionary/data_dictionary_creator.py @@ -15,10 +15,19 @@ import random import re import networkx as nx +from enum import StrEnum logging.basicConfig(level=logging.INFO) +class DatabaseEngine(StrEnum): + """An enumeration to represent a database engine.""" + + SNOWFLAKE = "SNOWFLAKE" + SQL_SERVER = "SQL_SERVER" + DATABRICKS = "DATABRICKS" + + class ForeignKeyRelationship(BaseModel): column: str = Field(..., alias="Column") foreign_column: str = Field(..., alias="ForeignColumn") @@ -124,6 +133,7 @@ class EntityItem(BaseModel): entity_name: Optional[str] = Field(default=None, alias="EntityName") database: Optional[str] = Field(default=None, alias="Database") warehouse: Optional[str] = Field(default=None, alias="Warehouse") + catalog: Optional[str] = Field(default=None, alias="Catalog") entity_relationships: Optional[list[EntityRelationship]] = Field( alias="EntityRelationships", default_factory=list @@ -186,6 +196,9 @@ def __init__( self.warehouse = None self.database = None + self.catalog = None + + self.database_engine = None load_dotenv(find_dotenv()) @@ -391,6 +404,7 @@ async def extract_entities_with_definitions(self) -> list[EntityItem]: for entity in all_entities: entity.warehouse = self.warehouse entity.database = self.database + entity.catalog = self.catalog return all_entities @@ -636,6 +650,24 @@ async def build_entity_entry(self, entity: EntityItem) -> EntityItem: return entity + @property + def excluded_fields_for_database_engine(self): + """A method to get the excluded fields for the database engine.""" + + all_engine_specific_fields = ["Warehouse", "Database", "Catalog"] + if self.database_engine == DatabaseEngine.SNOWFLAKE: + engine_specific_fields = ["Warehouse", "Database"] + elif self.database_engine == DatabaseEngine.SQL_SERVER: + engine_specific_fields = ["Database"] + elif self.database_engine == DatabaseEngine.DATABRICKS: + engine_specific_fields = ["Catalog"] + + return [ + field + for field in all_engine_specific_fields + if field not in engine_specific_fields + ] + async def create_data_dictionary(self): """A method to build a data dictionary from a database. Writes to file.""" entities = await self.extract_entities_with_definitions() @@ -655,12 +687,23 @@ async def create_data_dictionary(self): logging.info("Saving data dictionary to entities.json") with open("entities.json", "w", encoding="utf-8") as f: json.dump( - data_dictionary.model_dump(by_alias=True), f, indent=4, default=str + data_dictionary.model_dump( + by_alias=True, exclude=self.excluded_fields_for_database_engine + ), + f, + indent=4, + default=str, ) else: for entity in data_dictionary: logging.info(f"Saving data dictionary for {entity.entity}") with open(f"{entity.entity}.json", "w", encoding="utf-8") as f: json.dump( - entity.model_dump(by_alias=True), f, indent=4, default=str + entity.model_dump( + by_alias=True, + exclude=self.excluded_fields_for_database_engine, + ), + f, + indent=4, + default=str, ) diff --git a/text_2_sql/data_dictionary/snowflake_data_dictionary_creator.py b/text_2_sql/data_dictionary/snowflake_data_dictionary_creator.py index e01c0c2..18ff545 100644 --- a/text_2_sql/data_dictionary/snowflake_data_dictionary_creator.py +++ b/text_2_sql/data_dictionary/snowflake_data_dictionary_creator.py @@ -1,6 +1,6 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from data_dictionary_creator import DataDictionaryCreator, EntityItem +from data_dictionary_creator import DataDictionaryCreator, EntityItem, DatabaseEngine import asyncio import snowflake.connector import logging @@ -25,9 +25,11 @@ def __init__( excluded_entities = [] excluded_schemas = ["INFORMATION_SCHEMA"] - return super().__init__( - entities, excluded_entities, excluded_schemas, single_file - ) + super().__init__(entities, excluded_entities, excluded_schemas, single_file) + + self.database = os.environ["Text2Sql__DatabaseName"] + self.warehouse = os.environ["Text2Sql__Snowflake__Warehouse"] + self.database_engine = DatabaseEngine.SNOWFLAKE """A class to extract data dictionary information from a Snowflake database.""" diff --git a/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py b/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py index 235ff00..2b421d2 100644 --- a/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py +++ b/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py @@ -1,6 +1,6 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from data_dictionary_creator import DataDictionaryCreator, EntityItem +from data_dictionary_creator import DataDictionaryCreator, EntityItem, DatabaseEngine import asyncio import os @@ -26,6 +26,8 @@ def __init__( super().__init__(entities, excluded_entities, excluded_schemas, single_file) self.database = os.environ["Text2Sql__DatabaseName"] + self.database_engine = DatabaseEngine.SQL_SERVER + """A class to extract data dictionary information from a SQL Server database.""" @property From 75d962b7aebf9a602748dce85b9c6408ee50618f Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Fri, 15 Nov 2024 16:24:37 +0000 Subject: [PATCH 2/5] Updating databricks connector --- text_2_sql/data_dictionary/.env | 3 + text_2_sql/data_dictionary/README.md | 3 +- .../databricks_data_dictionary_creator.py | 147 ++++++++++++++++++ text_2_sql/data_dictionary/requirements.txt | 1 + 4 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 text_2_sql/data_dictionary/databricks_data_dictionary_creator.py diff --git a/text_2_sql/data_dictionary/.env b/text_2_sql/data_dictionary/.env index f710186..ad420ec 100644 --- a/text_2_sql/data_dictionary/.env +++ b/text_2_sql/data_dictionary/.env @@ -10,5 +10,8 @@ Text2Sql__Snowflake__Password= Text2Sql__Snowflake__Account= Text2Sql__Snowflake__Warehouse= Text2Sql__Databricks__Catalog= +Text2Sql__Databricks__ServerHostname= +Text2Sql__Databricks__HttpPath= +Text2Sql__Databricks__AccessToken= IdentityType= # system_assigned or user_assigned or key ClientId= diff --git a/text_2_sql/data_dictionary/README.md b/text_2_sql/data_dictionary/README.md index 9ebba79..c8492e9 100644 --- a/text_2_sql/data_dictionary/README.md +++ b/text_2_sql/data_dictionary/README.md @@ -99,7 +99,8 @@ See `./generated_samples/` for an example output of the script. This can then be The following Databases have pre-built scripts for them: -- **Microsoft SQL Server:** `sql_server_data_dictionary_creator.py` +- **Databricks:** `databricks_data_dictionary_creator.py` - **Snowflake:** `snowflake_data_dictionary_creator.py` +- **SQL Server:** `sql_server_data_dictionary_creator.py` If there is no pre-built script for your database engine, take one of the above as a starting point and adjust it. diff --git a/text_2_sql/data_dictionary/databricks_data_dictionary_creator.py b/text_2_sql/data_dictionary/databricks_data_dictionary_creator.py new file mode 100644 index 0000000..5a5ab4e --- /dev/null +++ b/text_2_sql/data_dictionary/databricks_data_dictionary_creator.py @@ -0,0 +1,147 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +from data_dictionary_creator import DataDictionaryCreator, EntityItem, DatabaseEngine +import asyncio +from databricks import sql +import logging +import os + + +class SnowflakeDataDictionaryCreator(DataDictionaryCreator): + def __init__( + self, + entities: list[str] = None, + excluded_entities: list[str] = None, + single_file: bool = False, + ): + """A method to initialize the DataDictionaryCreator class. + + Args: + entities (list[str], optional): A list of entities to extract. Defaults to None. If None, all entities are extracted. + excluded_entities (list[str], optional): A list of entities to exclude. Defaults to None. + single_file (bool, optional): A flag to indicate if the data dictionary should be saved to a single file. Defaults to False. + """ + if excluded_entities is None: + excluded_entities = [] + + excluded_schemas = [] + super().__init__(entities, excluded_entities, excluded_schemas, single_file) + + self.catalog = os.environ["Text2Sql__Databricks__Catalog"] + self.database_engine = DatabaseEngine.DATABRICKS + + """A class to extract data dictionary information from a Snowflake database.""" + + @property + def extract_table_entities_sql_query(self) -> str: + """A property to extract table entities from a Snowflake database.""" + return f"""SELECT + t.TABLE_NAME AS Entity, + t.TABLE_SCHEMA AS EntitySchema, + t.COMMENT AS Definition + FROM + INFORMATION_SCHEMA.TABLES t + WHERE + t.TABLE_CATALOG = '{self.catalog}' + """ + + @property + def extract_view_entities_sql_query(self) -> str: + """A property to extract view entities from a Snowflake database.""" + return """SELECT + v.TABLE_NAME AS Entity, + v.TABLE_SCHEMA AS EntitySchema + NULL AS Definition + FROM + INFORMATION_SCHEMA.VIEWS v + WHERE + v.TABLE_CATALOG = '{self.catalog}'""" + + def extract_columns_sql_query(self, entity: EntityItem) -> str: + """A property to extract column information from a Snowflake database.""" + return f"""SELECT + COLUMN_NAME AS Name, + DATA_TYPE AS Type, + COMMENT AS Definition + FROM + INFORMATION_SCHEMA.COLUMNS + WHERE + TABLE_CATALOG = '{self.catalog}' + AND TABLE_SCHEMA = '{entity.entity_schema}' + AND TABLE_NAME = '{entity.name}';""" + + @property + def extract_entity_relationships_sql_query(self) -> str: + """A property to extract entity relationships from a SQL Server database.""" + return """SELECT + tc.table_schema AS EntitySchema, + tc.table_name AS Entity, + rc.unique_constraint_schema AS ForeignEntitySchema, + rc.unique_constraint_name AS ForeignEntityConstraint, + rc.constraint_name AS ForeignKeyConstraint + FROM + information_schema.referential_constraints rc + JOIN + information_schema.table_constraints tc + ON rc.constraint_schema = tc.constraint_schema + AND rc.constraint_name = tc.constraint_name + WHERE + tc.constraint_type = 'FOREIGN KEY' + ORDER BY + EntitySchema, Entity, ForeignEntitySchema, ForeignEntityConstraint; + """ + + async def query_entities(self, sql_query: str, cast_to: any = None) -> list[dict]: + """ + A method to query a Databricks SQL endpoint for entities. + + Args: + sql_query (str): The SQL query to run. + cast_to (any, optional): The class to cast the results to. Defaults to None. + + Returns: + list[dict]: The list of entities or processed rows. + """ + logging.info(f"Running query: {sql_query}") + results = [] + + # Set up connection parameters for Databricks SQL endpoint + connection = sql.connect( + server_hostname=os.environ["Text2Sql__Databricks__ServerHostname"], + http_path=os.environ["Text2Sql__Databricks__HttpPath"], + access_token=os.environ["Text2Sql__Databricks__AccessToken"], + ) + + try: + # Create a cursor + cursor = connection.cursor() + + # Execute the query in a thread-safe manner + await asyncio.to_thread(cursor.execute, sql_query) + + # Fetch column names + columns = [col[0] for col in cursor.description] + + # Fetch rows + rows = await asyncio.to_thread(cursor.fetchall) + + # Process rows + for row in rows: + if cast_to: + results.append(cast_to.from_sql_row(row, columns)) + else: + results.append(dict(zip(columns, row))) + + except Exception as e: + logging.error(f"Error while executing query: {e}") + raise + finally: + cursor.close() + connection.close() + + return results + + +if __name__ == "__main__": + data_dictionary_creator = SnowflakeDataDictionaryCreator() + asyncio.run(data_dictionary_creator.create_data_dictionary()) diff --git a/text_2_sql/data_dictionary/requirements.txt b/text_2_sql/data_dictionary/requirements.txt index 903dd0c..c8cc551 100644 --- a/text_2_sql/data_dictionary/requirements.txt +++ b/text_2_sql/data_dictionary/requirements.txt @@ -5,3 +5,4 @@ pydantic openai snowflake-connector-python networkx +databricks From 5e7110a410d57997a749d308f7c3348ac43768a3 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Fri, 15 Nov 2024 16:34:31 +0000 Subject: [PATCH 3/5] Update databricks connector --- .../databricks_data_dictionary_creator.py | 60 ++++++++++++------- .../snowflake_data_dictionary_creator.py | 2 +- .../sql_sever_data_dictionary_creator.py | 39 +++++++----- 3 files changed, 66 insertions(+), 35 deletions(-) diff --git a/text_2_sql/data_dictionary/databricks_data_dictionary_creator.py b/text_2_sql/data_dictionary/databricks_data_dictionary_creator.py index 5a5ab4e..c6fc17b 100644 --- a/text_2_sql/data_dictionary/databricks_data_dictionary_creator.py +++ b/text_2_sql/data_dictionary/databricks_data_dictionary_creator.py @@ -7,7 +7,7 @@ import os -class SnowflakeDataDictionaryCreator(DataDictionaryCreator): +class DatabricksDataDictionaryCreator(DataDictionaryCreator): def __init__( self, entities: list[str] = None, @@ -30,11 +30,11 @@ def __init__( self.catalog = os.environ["Text2Sql__Databricks__Catalog"] self.database_engine = DatabaseEngine.DATABRICKS - """A class to extract data dictionary information from a Snowflake database.""" + """A class to extract data dictionary information from Databricks Unity Catalog.""" @property def extract_table_entities_sql_query(self) -> str: - """A property to extract table entities from a Snowflake database.""" + """A property to extract table entities from Databricks Unity Catalog.""" return f"""SELECT t.TABLE_NAME AS Entity, t.TABLE_SCHEMA AS EntitySchema, @@ -47,7 +47,7 @@ def extract_table_entities_sql_query(self) -> str: @property def extract_view_entities_sql_query(self) -> str: - """A property to extract view entities from a Snowflake database.""" + """A property to extract view entities from Databricks Unity Catalog.""" return """SELECT v.TABLE_NAME AS Entity, v.TABLE_SCHEMA AS EntitySchema @@ -58,7 +58,7 @@ def extract_view_entities_sql_query(self) -> str: v.TABLE_CATALOG = '{self.catalog}'""" def extract_columns_sql_query(self, entity: EntityItem) -> str: - """A property to extract column information from a Snowflake database.""" + """A property to extract column information from Databricks Unity Catalog.""" return f"""SELECT COLUMN_NAME AS Name, DATA_TYPE AS Type, @@ -72,23 +72,43 @@ def extract_columns_sql_query(self, entity: EntityItem) -> str: @property def extract_entity_relationships_sql_query(self) -> str: - """A property to extract entity relationships from a SQL Server database.""" - return """SELECT - tc.table_schema AS EntitySchema, - tc.table_name AS Entity, - rc.unique_constraint_schema AS ForeignEntitySchema, - rc.unique_constraint_name AS ForeignEntityConstraint, - rc.constraint_name AS ForeignKeyConstraint + """A property to extract entity relationships from Databricks Unity Catalog.""" + return f"""SELECT + fk_schema.TABLE_SCHEMA AS EntitySchema, + fk_tab.TABLE_NAME AS Entity, + pk_schema.TABLE_SCHEMA AS ForeignEntitySchema, + pk_tab.TABLE_NAME AS ForeignEntity, + fk_col.COLUMN_NAME AS [Column], + pk_col.COLUMN_NAME AS ForeignColumn FROM - information_schema.referential_constraints rc - JOIN - information_schema.table_constraints tc - ON rc.constraint_schema = tc.constraint_schema - AND rc.constraint_name = tc.constraint_name + INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS fk + INNER JOIN + INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS fkc + ON fk.constraint_name = fkc.constraint_name + INNER JOIN + INFORMATION_SCHEMA.TABLES AS fk_tab + ON fk_tab.TABLE_NAME = fkc.TABLE_NAME AND fk_tab.TABLE_SCHEMA = fkc.TABLE_SCHEMA + INNER JOIN + INFORMATION_SCHEMA.SCHEMATA AS fk_schema + ON fk_tab.TABLE_SCHEMA = fk_schema.TABLE_SCHEMA + INNER JOIN + INFORMATION_SCHEMA.TABLES AS pk_tab + ON pk_tab.TABLE_NAME = fkc.referenced_TABLE_NAME AND pk_tab.TABLE_SCHEMA = fkc.referenced_TABLE_SCHEMA + INNER JOIN + INFORMATION_SCHEMA.SCHEMATA AS pk_schema + ON pk_tab.TABLE_SCHEMA = pk_schema.TABLE_SCHEMA + INNER JOIN + INFORMATION_SCHEMA.COLUMNS AS fk_col + ON fkc.COLUMN_NAME = fk_col.COLUMN_NAME AND fkc.TABLE_NAME = fk_col.TABLE_NAME AND fkc.TABLE_SCHEMA = fk_col.TABLE_SCHEMA + INNER JOIN + INFORMATION_SCHEMA.COLUMNS AS pk_col + ON fkc.referenced_COLUMN_NAME = pk_col.COLUMN_NAME AND fkc.referenced_TABLE_NAME = pk_col.TABLE_NAME AND fkc.referenced_TABLE_SCHEMA = pk_col.TABLE_SCHEMA WHERE - tc.constraint_type = 'FOREIGN KEY' + fk.constraint_type = 'FOREIGN KEY' + AND fk_tab.TABLE_CATALOG = '{self.catalog}' + AND pk_tab.TABLE_CATALOG = '{self.catalog}' ORDER BY - EntitySchema, Entity, ForeignEntitySchema, ForeignEntityConstraint; + EntitySchema, Entity, ForeignEntitySchema, ForeignEntity; """ async def query_entities(self, sql_query: str, cast_to: any = None) -> list[dict]: @@ -143,5 +163,5 @@ async def query_entities(self, sql_query: str, cast_to: any = None) -> list[dict if __name__ == "__main__": - data_dictionary_creator = SnowflakeDataDictionaryCreator() + data_dictionary_creator = DatabricksDataDictionaryCreator() asyncio.run(data_dictionary_creator.create_data_dictionary()) diff --git a/text_2_sql/data_dictionary/snowflake_data_dictionary_creator.py b/text_2_sql/data_dictionary/snowflake_data_dictionary_creator.py index 18ff545..0232a50 100644 --- a/text_2_sql/data_dictionary/snowflake_data_dictionary_creator.py +++ b/text_2_sql/data_dictionary/snowflake_data_dictionary_creator.py @@ -67,7 +67,7 @@ def extract_columns_sql_query(self, entity: EntityItem) -> str: @property def extract_entity_relationships_sql_query(self) -> str: - """A property to extract entity relationships from a SQL Server database.""" + """A property to extract entity relationships from a Snowflake database.""" return """SELECT tc.table_schema AS EntitySchema, tc.table_name AS Entity, diff --git a/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py b/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py index 2b421d2..4b8fc73 100644 --- a/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py +++ b/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py @@ -86,28 +86,39 @@ def extract_columns_sql_query(self, entity: EntityItem) -> str: def extract_entity_relationships_sql_query(self) -> str: """A property to extract entity relationships from a SQL Server database.""" return """SELECT - fk_schema.name AS EntitySchema, - fk_tab.name AS Entity, - pk_schema.name AS ForeignEntitySchema, - pk_tab.name AS ForeignEntity, - fk_col.name AS [Column], - pk_col.name AS ForeignColumn + fk_schema.schema_name AS EntitySchema, + fk_tab.table_name AS Entity, + pk_schema.schema_name AS ForeignEntitySchema, + pk_tab.table_name AS ForeignEntity, + fk_col.column_name AS [Column], + pk_col.column_name AS ForeignColumn FROM - sys.foreign_keys AS fk + INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS fk INNER JOIN - sys.foreign_key_columns AS fkc ON fk.object_id = fkc.constraint_object_id + INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS fkc + ON fk.constraint_name = fkc.constraint_name INNER JOIN - sys.tables AS fk_tab ON fk_tab.object_id = fk.parent_object_id + INFORMATION_SCHEMA.TABLES AS fk_tab + ON fk_tab.table_name = fkc.table_name AND fk_tab.table_schema = fkc.table_schema INNER JOIN - sys.schemas AS fk_schema ON fk_tab.schema_id = fk_schema.schema_id + INFORMATION_SCHEMA.SCHEMATA AS fk_schema + ON fk_tab.table_schema = fk_schema.schema_name INNER JOIN - sys.tables AS pk_tab ON pk_tab.object_id = fk.referenced_object_id + INFORMATION_SCHEMA.TABLES AS pk_tab + ON pk_tab.table_name = fkc.referenced_table_name AND pk_tab.table_schema = fkc.referenced_table_schema INNER JOIN - sys.schemas AS pk_schema ON pk_tab.schema_id = pk_schema.schema_id + INFORMATION_SCHEMA.SCHEMATA AS pk_schema + ON pk_tab.table_schema = pk_schema.schema_name INNER JOIN - sys.columns AS fk_col ON fkc.parent_object_id = fk_col.object_id AND fkc.parent_column_id = fk_col.column_id + INFORMATION_SCHEMA.COLUMNS AS fk_col + ON fkc.column_name = fk_col.column_name AND fkc.table_name = fk_col.table_name AND fkc.table_schema = fk_col.table_schema INNER JOIN - sys.columns AS pk_col ON fkc.referenced_object_id = pk_col.object_id AND fkc.referenced_column_id = pk_col.column_id + INFORMATION_SCHEMA.COLUMNS AS pk_col + ON fkc.referenced_column_name = pk_col.column_name AND fkc.referenced_table_name = pk_col.table_name AND fkc.referenced_table_schema = pk_col.table_schema + WHERE + fk.constraint_type = 'FOREIGN KEY' + AND fk_tab.table_catalog = 'your_catalog_name' + AND pk_tab.table_catalog = 'your_catalog_name' ORDER BY EntitySchema, Entity, ForeignEntitySchema, ForeignEntity; """ From 61031463f1f4ea86b15741d642eae789b5f2c87d Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Fri, 15 Nov 2024 16:38:04 +0000 Subject: [PATCH 4/5] Update code --- .../sql_sever_data_dictionary_creator.py | 39 +++++++------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py b/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py index 4b8fc73..2b421d2 100644 --- a/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py +++ b/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py @@ -86,39 +86,28 @@ def extract_columns_sql_query(self, entity: EntityItem) -> str: def extract_entity_relationships_sql_query(self) -> str: """A property to extract entity relationships from a SQL Server database.""" return """SELECT - fk_schema.schema_name AS EntitySchema, - fk_tab.table_name AS Entity, - pk_schema.schema_name AS ForeignEntitySchema, - pk_tab.table_name AS ForeignEntity, - fk_col.column_name AS [Column], - pk_col.column_name AS ForeignColumn + fk_schema.name AS EntitySchema, + fk_tab.name AS Entity, + pk_schema.name AS ForeignEntitySchema, + pk_tab.name AS ForeignEntity, + fk_col.name AS [Column], + pk_col.name AS ForeignColumn FROM - INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS fk + sys.foreign_keys AS fk INNER JOIN - INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS fkc - ON fk.constraint_name = fkc.constraint_name + sys.foreign_key_columns AS fkc ON fk.object_id = fkc.constraint_object_id INNER JOIN - INFORMATION_SCHEMA.TABLES AS fk_tab - ON fk_tab.table_name = fkc.table_name AND fk_tab.table_schema = fkc.table_schema + sys.tables AS fk_tab ON fk_tab.object_id = fk.parent_object_id INNER JOIN - INFORMATION_SCHEMA.SCHEMATA AS fk_schema - ON fk_tab.table_schema = fk_schema.schema_name + sys.schemas AS fk_schema ON fk_tab.schema_id = fk_schema.schema_id INNER JOIN - INFORMATION_SCHEMA.TABLES AS pk_tab - ON pk_tab.table_name = fkc.referenced_table_name AND pk_tab.table_schema = fkc.referenced_table_schema + sys.tables AS pk_tab ON pk_tab.object_id = fk.referenced_object_id INNER JOIN - INFORMATION_SCHEMA.SCHEMATA AS pk_schema - ON pk_tab.table_schema = pk_schema.schema_name + sys.schemas AS pk_schema ON pk_tab.schema_id = pk_schema.schema_id INNER JOIN - INFORMATION_SCHEMA.COLUMNS AS fk_col - ON fkc.column_name = fk_col.column_name AND fkc.table_name = fk_col.table_name AND fkc.table_schema = fk_col.table_schema + sys.columns AS fk_col ON fkc.parent_object_id = fk_col.object_id AND fkc.parent_column_id = fk_col.column_id INNER JOIN - INFORMATION_SCHEMA.COLUMNS AS pk_col - ON fkc.referenced_column_name = pk_col.column_name AND fkc.referenced_table_name = pk_col.table_name AND fkc.referenced_table_schema = pk_col.table_schema - WHERE - fk.constraint_type = 'FOREIGN KEY' - AND fk_tab.table_catalog = 'your_catalog_name' - AND pk_tab.table_catalog = 'your_catalog_name' + sys.columns AS pk_col ON fkc.referenced_object_id = pk_col.object_id AND fkc.referenced_column_id = pk_col.column_id ORDER BY EntitySchema, Entity, ForeignEntitySchema, ForeignEntity; """ From ab0806c03a51a8616529664013547d462f1be4f5 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Fri, 15 Nov 2024 16:39:58 +0000 Subject: [PATCH 5/5] Fix single file mode --- text_2_sql/data_dictionary/data_dictionary_creator.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/text_2_sql/data_dictionary/data_dictionary_creator.py b/text_2_sql/data_dictionary/data_dictionary_creator.py index 1ad11f8..0da3209 100644 --- a/text_2_sql/data_dictionary/data_dictionary_creator.py +++ b/text_2_sql/data_dictionary/data_dictionary_creator.py @@ -686,10 +686,14 @@ async def create_data_dictionary(self): if self.single_file: logging.info("Saving data dictionary to entities.json") with open("entities.json", "w", encoding="utf-8") as f: - json.dump( - data_dictionary.model_dump( + data_dictionary_dump = [ + entity.model_dump( by_alias=True, exclude=self.excluded_fields_for_database_engine - ), + ) + for entity in data_dictionary + ] + json.dump( + data_dictionary_dump, f, indent=4, default=str,